2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC chroma_dc_coeff_token_vlc;
54 static VLC total_zeros_vlc[15];
55 static VLC chroma_dc_total_zeros_vlc[3];
57 static VLC run_vlc[6];
60 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
61 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
62 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
63 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
65 static av_always_inline uint32_t pack16to32(int a, int b){
66 #ifdef WORDS_BIGENDIAN
67 return (b&0xFFFF) + (a<<16);
69 return (a&0xFFFF) + (b<<16);
73 const uint8_t ff_rem6[52]={
74 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
77 const uint8_t ff_div6[52]={
78 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
82 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
83 MpegEncContext * const s = &h->s;
84 const int mb_xy= h->mb_xy;
85 int topleft_xy, top_xy, topright_xy, left_xy[2];
86 int topleft_type, top_type, topright_type, left_type[2];
88 int topleft_partition= -1;
91 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
93 //FIXME deblocking could skip the intra and nnz parts.
94 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
97 /* Wow, what a mess, why didn't they simplify the interlacing & intra
98 * stuff, I can't imagine that these complex rules are worth it. */
100 topleft_xy = top_xy - 1;
101 topright_xy= top_xy + 1;
102 left_xy[1] = left_xy[0] = mb_xy-1;
112 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
113 const int top_pair_xy = pair_xy - s->mb_stride;
114 const int topleft_pair_xy = top_pair_xy - 1;
115 const int topright_pair_xy = top_pair_xy + 1;
116 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
117 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
118 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
119 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
120 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
121 const int bottom = (s->mb_y & 1);
122 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
124 ? !curr_mb_frame_flag // bottom macroblock
125 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
127 top_xy -= s->mb_stride;
130 ? !curr_mb_frame_flag // bottom macroblock
131 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
133 topleft_xy -= s->mb_stride;
134 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
135 topleft_xy += s->mb_stride;
136 // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
137 topleft_partition = 0;
140 ? !curr_mb_frame_flag // bottom macroblock
141 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
143 topright_xy -= s->mb_stride;
145 if (left_mb_frame_flag != curr_mb_frame_flag) {
146 left_xy[1] = left_xy[0] = pair_xy - 1;
147 if (curr_mb_frame_flag) {
168 left_xy[1] += s->mb_stride;
181 h->top_mb_xy = top_xy;
182 h->left_mb_xy[0] = left_xy[0];
183 h->left_mb_xy[1] = left_xy[1];
187 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
188 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
189 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
191 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
193 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
195 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
196 for(list=0; list<h->list_count; list++){
197 if(USES_LIST(mb_type,list)){
198 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
199 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
200 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
201 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
207 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
208 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
210 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
211 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
213 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
214 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
219 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
220 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
221 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
222 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
223 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
226 if(IS_INTRA(mb_type)){
227 h->topleft_samples_available=
228 h->top_samples_available=
229 h->left_samples_available= 0xFFFF;
230 h->topright_samples_available= 0xEEEA;
232 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
233 h->topleft_samples_available= 0xB3FF;
234 h->top_samples_available= 0x33FF;
235 h->topright_samples_available= 0x26EA;
238 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
239 h->topleft_samples_available&= 0xDF5F;
240 h->left_samples_available&= 0x5F5F;
244 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
245 h->topleft_samples_available&= 0x7FFF;
247 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
248 h->topright_samples_available&= 0xFBFF;
250 if(IS_INTRA4x4(mb_type)){
251 if(IS_INTRA4x4(top_type)){
252 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
253 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
254 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
255 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
258 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
263 h->intra4x4_pred_mode_cache[4+8*0]=
264 h->intra4x4_pred_mode_cache[5+8*0]=
265 h->intra4x4_pred_mode_cache[6+8*0]=
266 h->intra4x4_pred_mode_cache[7+8*0]= pred;
269 if(IS_INTRA4x4(left_type[i])){
270 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
271 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
274 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
279 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
280 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
295 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
297 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
298 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
299 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
300 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
302 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
303 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
305 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
306 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
309 h->non_zero_count_cache[4+8*0]=
310 h->non_zero_count_cache[5+8*0]=
311 h->non_zero_count_cache[6+8*0]=
312 h->non_zero_count_cache[7+8*0]=
314 h->non_zero_count_cache[1+8*0]=
315 h->non_zero_count_cache[2+8*0]=
317 h->non_zero_count_cache[1+8*3]=
318 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
322 for (i=0; i<2; i++) {
324 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
325 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
326 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
327 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
329 h->non_zero_count_cache[3+8*1 + 2*8*i]=
330 h->non_zero_count_cache[3+8*2 + 2*8*i]=
331 h->non_zero_count_cache[0+8*1 + 8*i]=
332 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
339 h->top_cbp = h->cbp_table[top_xy];
340 } else if(IS_INTRA(mb_type)) {
347 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
348 } else if(IS_INTRA(mb_type)) {
354 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
357 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
362 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
364 for(list=0; list<h->list_count; list++){
365 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
366 /*if(!h->mv_cache_clean[list]){
367 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
368 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
369 h->mv_cache_clean[list]= 1;
373 h->mv_cache_clean[list]= 0;
375 if(USES_LIST(top_type, list)){
376 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
377 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
379 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
380 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
382 h->ref_cache[list][scan8[0] + 0 - 1*8]=
383 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
384 h->ref_cache[list][scan8[0] + 2 - 1*8]=
385 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
388 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
389 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
391 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
395 int cache_idx = scan8[0] - 1 + i*2*8;
396 if(USES_LIST(left_type[i], list)){
397 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
398 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
399 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
400 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
401 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
402 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
404 *(uint32_t*)h->mv_cache [list][cache_idx ]=
405 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
406 h->ref_cache[list][cache_idx ]=
407 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
411 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
414 if(USES_LIST(topleft_type, list)){
415 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
416 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
424 if(USES_LIST(topright_type, list)){
425 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
426 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
427 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
430 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
434 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
437 h->ref_cache[list][scan8[5 ]+1] =
438 h->ref_cache[list][scan8[7 ]+1] =
439 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
440 h->ref_cache[list][scan8[4 ]] =
441 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
442 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
443 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
444 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
445 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
446 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
449 /* XXX beurk, Load mvd */
450 if(USES_LIST(top_type, list)){
451 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
453 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
454 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
458 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
459 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
462 if(USES_LIST(left_type[0], list)){
463 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
464 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
465 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
467 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
468 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
470 if(USES_LIST(left_type[1], list)){
471 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
472 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
473 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
475 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
476 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
478 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
479 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
480 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
481 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
482 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
484 if(h->slice_type == FF_B_TYPE){
485 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
487 if(IS_DIRECT(top_type)){
488 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
489 }else if(IS_8X8(top_type)){
490 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
491 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
492 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
494 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
497 if(IS_DIRECT(left_type[0]))
498 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
499 else if(IS_8X8(left_type[0]))
500 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
502 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
504 if(IS_DIRECT(left_type[1]))
505 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
506 else if(IS_8X8(left_type[1]))
507 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
509 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
515 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
516 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
518 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
519 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
521 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
522 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
523 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
524 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
526 #define MAP_F2F(idx, mb_type)\
527 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
528 h->ref_cache[list][idx] <<= 1;\
529 h->mv_cache[list][idx][1] /= 2;\
530 h->mvd_cache[list][idx][1] /= 2;\
535 #define MAP_F2F(idx, mb_type)\
536 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
537 h->ref_cache[list][idx] >>= 1;\
538 h->mv_cache[list][idx][1] <<= 1;\
539 h->mvd_cache[list][idx][1] <<= 1;\
549 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
552 static inline void write_back_intra_pred_mode(H264Context *h){
553 const int mb_xy= h->mb_xy;
555 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
556 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
557 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
558 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
559 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
560 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
561 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
565 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
567 static inline int check_intra4x4_pred_mode(H264Context *h){
568 MpegEncContext * const s = &h->s;
569 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
570 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
573 if(!(h->top_samples_available&0x8000)){
575 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
577 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
580 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
585 if(!(h->left_samples_available&0x8000)){
587 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
589 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
592 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
613 if(!(h->top_samples_available&0x8000)){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
621 if(!(h->left_samples_available&0x8000)){
624 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
633 * gets the predicted intra4x4 prediction mode.
635 static inline int pred_intra_mode(H264Context *h, int n){
636 const int index8= scan8[n];
637 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
638 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
639 const int min= FFMIN(left, top);
641 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
643 if(min<0) return DC_PRED;
647 static inline void write_back_non_zero_count(H264Context *h){
648 const int mb_xy= h->mb_xy;
650 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
651 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
652 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
653 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
654 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
655 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
656 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
658 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
659 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
660 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
662 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
663 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
664 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
667 // store all luma nnzs, for deblocking
670 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
671 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
676 * gets the predicted number of non zero coefficients.
677 * @param n block index
679 static inline int pred_non_zero_count(H264Context *h, int n){
680 const int index8= scan8[n];
681 const int left= h->non_zero_count_cache[index8 - 1];
682 const int top = h->non_zero_count_cache[index8 - 8];
685 if(i<64) i= (i+1)>>1;
687 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
692 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
693 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
694 MpegEncContext *s = &h->s;
696 /* there is no consistent mapping of mvs to neighboring locations that will
697 * make mbaff happy, so we can't move all this logic to fill_caches */
699 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
701 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
702 *C = h->mv_cache[list][scan8[0]-2];
705 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
706 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
707 if(IS_INTERLACED(mb_types[topright_xy])){
708 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
709 const int x4 = X4, y4 = Y4;\
710 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
711 if(!USES_LIST(mb_type,list))\
712 return LIST_NOT_USED;\
713 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
714 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
715 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
716 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
718 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
721 if(topright_ref == PART_NOT_AVAILABLE
722 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
723 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
725 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
726 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
729 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
731 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
732 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
738 if(topright_ref != PART_NOT_AVAILABLE){
739 *C= h->mv_cache[list][ i - 8 + part_width ];
742 tprintf(s->avctx, "topright MV not available\n");
744 *C= h->mv_cache[list][ i - 8 - 1 ];
745 return h->ref_cache[list][ i - 8 - 1 ];
750 * gets the predicted MV.
751 * @param n the block index
752 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
753 * @param mx the x component of the predicted motion vector
754 * @param my the y component of the predicted motion vector
756 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
757 const int index8= scan8[n];
758 const int top_ref= h->ref_cache[list][ index8 - 8 ];
759 const int left_ref= h->ref_cache[list][ index8 - 1 ];
760 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
761 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
763 int diagonal_ref, match_count;
765 assert(part_width==1 || part_width==2 || part_width==4);
775 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
776 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
777 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
778 if(match_count > 1){ //most common
779 *mx= mid_pred(A[0], B[0], C[0]);
780 *my= mid_pred(A[1], B[1], C[1]);
781 }else if(match_count==1){
785 }else if(top_ref==ref){
793 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
797 *mx= mid_pred(A[0], B[0], C[0]);
798 *my= mid_pred(A[1], B[1], C[1]);
802 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
806 * gets the directionally predicted 16x8 MV.
807 * @param n the block index
808 * @param mx the x component of the predicted motion vector
809 * @param my the y component of the predicted motion vector
811 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
813 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
814 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
816 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
824 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
825 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
827 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
837 pred_motion(h, n, 4, list, ref, mx, my);
841 * gets the directionally predicted 8x16 MV.
842 * @param n the block index
843 * @param mx the x component of the predicted motion vector
844 * @param my the y component of the predicted motion vector
846 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
848 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
849 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
851 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
862 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
864 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
866 if(diagonal_ref == ref){
874 pred_motion(h, n, 2, list, ref, mx, my);
877 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
878 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
879 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
881 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
883 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
884 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
885 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
891 pred_motion(h, 0, 4, 0, 0, mx, my);
896 static inline void direct_dist_scale_factor(H264Context * const h){
897 const int poc = h->s.current_picture_ptr->poc;
898 const int poc1 = h->ref_list[1][0].poc;
900 for(i=0; i<h->ref_count[0]; i++){
901 int poc0 = h->ref_list[0][i].poc;
902 int td = av_clip(poc1 - poc0, -128, 127);
903 if(td == 0 /* FIXME || pic0 is a long-term ref */){
904 h->dist_scale_factor[i] = 256;
906 int tb = av_clip(poc - poc0, -128, 127);
907 int tx = (16384 + (FFABS(td) >> 1)) / td;
908 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
912 for(i=0; i<h->ref_count[0]; i++){
913 h->dist_scale_factor_field[2*i] =
914 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
918 static inline void direct_ref_list_init(H264Context * const h){
919 MpegEncContext * const s = &h->s;
920 Picture * const ref1 = &h->ref_list[1][0];
921 Picture * const cur = s->current_picture_ptr;
923 if(cur->pict_type == FF_I_TYPE)
924 cur->ref_count[0] = 0;
925 if(cur->pict_type != FF_B_TYPE)
926 cur->ref_count[1] = 0;
927 for(list=0; list<2; list++){
928 cur->ref_count[list] = h->ref_count[list];
929 for(j=0; j<h->ref_count[list]; j++)
930 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
932 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
934 for(list=0; list<2; list++){
935 for(i=0; i<ref1->ref_count[list]; i++){
936 const int poc = ref1->ref_poc[list][i];
937 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
938 for(j=0; j<h->ref_count[list]; j++)
939 if(h->ref_list[list][j].poc == poc){
940 h->map_col_to_list0[list][i] = j;
946 for(list=0; list<2; list++){
947 for(i=0; i<ref1->ref_count[list]; i++){
948 j = h->map_col_to_list0[list][i];
949 h->map_col_to_list0_field[list][2*i] = 2*j;
950 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
956 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
957 MpegEncContext * const s = &h->s;
958 const int mb_xy = h->mb_xy;
959 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
960 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
961 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
962 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
963 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
964 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
965 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
966 const int is_b8x8 = IS_8X8(*mb_type);
967 unsigned int sub_mb_type;
970 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
971 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
972 /* FIXME save sub mb types from previous frames (or derive from MVs)
973 * so we know exactly what block size to use */
974 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
975 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
976 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
977 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
978 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
980 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
981 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
984 *mb_type |= MB_TYPE_DIRECT2;
986 *mb_type |= MB_TYPE_INTERLACED;
988 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
990 if(h->direct_spatial_mv_pred){
995 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
997 /* ref = min(neighbors) */
998 for(list=0; list<2; list++){
999 int refa = h->ref_cache[list][scan8[0] - 1];
1000 int refb = h->ref_cache[list][scan8[0] - 8];
1001 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1003 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1004 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1009 if(ref[0] < 0 && ref[1] < 0){
1010 ref[0] = ref[1] = 0;
1011 mv[0][0] = mv[0][1] =
1012 mv[1][0] = mv[1][1] = 0;
1014 for(list=0; list<2; list++){
1016 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1018 mv[list][0] = mv[list][1] = 0;
1024 *mb_type &= ~MB_TYPE_L1;
1025 sub_mb_type &= ~MB_TYPE_L1;
1026 }else if(ref[0] < 0){
1028 *mb_type &= ~MB_TYPE_L0;
1029 sub_mb_type &= ~MB_TYPE_L0;
1032 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1033 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1034 int mb_types_col[2];
1035 int b8_stride = h->b8_stride;
1036 int b4_stride = h->b_stride;
1038 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1040 if(IS_INTERLACED(*mb_type)){
1041 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1042 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1044 l1ref0 -= 2*b8_stride;
1045 l1ref1 -= 2*b8_stride;
1046 l1mv0 -= 4*b4_stride;
1047 l1mv1 -= 4*b4_stride;
1052 int cur_poc = s->current_picture_ptr->poc;
1053 int *col_poc = h->ref_list[1]->field_poc;
1054 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1055 int dy = 2*col_parity - (s->mb_y&1);
1057 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1058 l1ref0 += dy*b8_stride;
1059 l1ref1 += dy*b8_stride;
1060 l1mv0 += 2*dy*b4_stride;
1061 l1mv1 += 2*dy*b4_stride;
1065 for(i8=0; i8<4; i8++){
1068 int xy8 = x8+y8*b8_stride;
1069 int xy4 = 3*x8+y8*b4_stride;
1072 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1074 h->sub_mb_type[i8] = sub_mb_type;
1076 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1077 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1078 if(!IS_INTRA(mb_types_col[y8])
1079 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1080 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1082 a= pack16to32(mv[0][0],mv[0][1]);
1084 b= pack16to32(mv[1][0],mv[1][1]);
1086 a= pack16to32(mv[0][0],mv[0][1]);
1087 b= pack16to32(mv[1][0],mv[1][1]);
1089 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1090 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1092 }else if(IS_16X16(*mb_type)){
1095 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1096 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1097 if(!IS_INTRA(mb_type_col)
1098 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1099 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1100 && (h->x264_build>33 || !h->x264_build)))){
1102 a= pack16to32(mv[0][0],mv[0][1]);
1104 b= pack16to32(mv[1][0],mv[1][1]);
1106 a= pack16to32(mv[0][0],mv[0][1]);
1107 b= pack16to32(mv[1][0],mv[1][1]);
1109 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1110 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1112 for(i8=0; i8<4; i8++){
1113 const int x8 = i8&1;
1114 const int y8 = i8>>1;
1116 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1118 h->sub_mb_type[i8] = sub_mb_type;
1120 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1121 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1122 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1123 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1126 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1127 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1128 && (h->x264_build>33 || !h->x264_build)))){
1129 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1130 if(IS_SUB_8X8(sub_mb_type)){
1131 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1132 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1134 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1136 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1139 for(i4=0; i4<4; i4++){
1140 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1141 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1143 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1145 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1151 }else{ /* direct temporal mv pred */
1152 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1153 const int *dist_scale_factor = h->dist_scale_factor;
1156 if(IS_INTERLACED(*mb_type)){
1157 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1158 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1159 dist_scale_factor = h->dist_scale_factor_field;
1161 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1162 /* FIXME assumes direct_8x8_inference == 1 */
1163 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1164 int mb_types_col[2];
1167 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1168 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1169 | (*mb_type & MB_TYPE_INTERLACED);
1170 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1172 if(IS_INTERLACED(*mb_type)){
1173 /* frame to field scaling */
1174 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1175 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1177 l1ref0 -= 2*h->b8_stride;
1178 l1ref1 -= 2*h->b8_stride;
1179 l1mv0 -= 4*h->b_stride;
1180 l1mv1 -= 4*h->b_stride;
1184 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1185 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1187 *mb_type |= MB_TYPE_16x8;
1189 *mb_type |= MB_TYPE_8x8;
1191 /* field to frame scaling */
1192 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1193 * but in MBAFF, top and bottom POC are equal */
1194 int dy = (s->mb_y&1) ? 1 : 2;
1196 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1197 l1ref0 += dy*h->b8_stride;
1198 l1ref1 += dy*h->b8_stride;
1199 l1mv0 += 2*dy*h->b_stride;
1200 l1mv1 += 2*dy*h->b_stride;
1203 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1205 *mb_type |= MB_TYPE_16x16;
1207 *mb_type |= MB_TYPE_8x8;
1210 for(i8=0; i8<4; i8++){
1211 const int x8 = i8&1;
1212 const int y8 = i8>>1;
1214 const int16_t (*l1mv)[2]= l1mv0;
1216 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1218 h->sub_mb_type[i8] = sub_mb_type;
1220 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1221 if(IS_INTRA(mb_types_col[y8])){
1222 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1223 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1224 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1228 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1230 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1232 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1235 scale = dist_scale_factor[ref0];
1236 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1239 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1240 int my_col = (mv_col[1]<<y_shift)/2;
1241 int mx = (scale * mv_col[0] + 128) >> 8;
1242 int my = (scale * my_col + 128) >> 8;
1243 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1244 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col)){
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1261 : map_col_to_list0[1][l1ref1[0]];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col)){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 ref0 = l1ref0[x8 + y8*h->b8_stride];
1294 ref0 = map_col_to_list0[0][ref0];
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1333 if(!USES_LIST(mb_type, list))
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1395 /* startcode, so we must be past the end */
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1410 dst= h->rbsp_buffer[bufidx];
1416 //printf("decoding esc\n");
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1426 }else //next start code
1430 dst[di++]= src[si++];
1434 *consumed= si + 1;//+1 for the header
1435 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1440 * identifies the exact end of the bitstream
1441 * @return the length of the trailing, or 0 if damaged
1443 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1447 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1457 * idct tranforms the 16 dc values and dequantize them.
1458 * @param qp quantization parameter
1460 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1463 int temp[16]; //FIXME check if this is a good idea
1464 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1465 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1467 //memset(block, 64, 2*256);
1470 const int offset= y_offset[i];
1471 const int z0= block[offset+stride*0] + block[offset+stride*4];
1472 const int z1= block[offset+stride*0] - block[offset+stride*4];
1473 const int z2= block[offset+stride*1] - block[offset+stride*5];
1474 const int z3= block[offset+stride*1] + block[offset+stride*5];
1483 const int offset= x_offset[i];
1484 const int z0= temp[4*0+i] + temp[4*2+i];
1485 const int z1= temp[4*0+i] - temp[4*2+i];
1486 const int z2= temp[4*1+i] - temp[4*3+i];
1487 const int z3= temp[4*1+i] + temp[4*3+i];
1489 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1490 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1491 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1492 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1498 * dct tranforms the 16 dc values.
1499 * @param qp quantization parameter ??? FIXME
1501 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1502 // const int qmul= dequant_coeff[qp][0];
1504 int temp[16]; //FIXME check if this is a good idea
1505 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1506 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1509 const int offset= y_offset[i];
1510 const int z0= block[offset+stride*0] + block[offset+stride*4];
1511 const int z1= block[offset+stride*0] - block[offset+stride*4];
1512 const int z2= block[offset+stride*1] - block[offset+stride*5];
1513 const int z3= block[offset+stride*1] + block[offset+stride*5];
1522 const int offset= x_offset[i];
1523 const int z0= temp[4*0+i] + temp[4*2+i];
1524 const int z1= temp[4*0+i] - temp[4*2+i];
1525 const int z2= temp[4*1+i] - temp[4*3+i];
1526 const int z3= temp[4*1+i] + temp[4*3+i];
1528 block[stride*0 +offset]= (z0 + z3)>>1;
1529 block[stride*2 +offset]= (z1 + z2)>>1;
1530 block[stride*8 +offset]= (z1 - z2)>>1;
1531 block[stride*10+offset]= (z0 - z3)>>1;
1539 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1540 const int stride= 16*2;
1541 const int xStride= 16;
1544 a= block[stride*0 + xStride*0];
1545 b= block[stride*0 + xStride*1];
1546 c= block[stride*1 + xStride*0];
1547 d= block[stride*1 + xStride*1];
1554 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1555 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1556 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1557 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1561 static void chroma_dc_dct_c(DCTELEM *block){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1576 block[stride*0 + xStride*0]= (a+c);
1577 block[stride*0 + xStride*1]= (e+b);
1578 block[stride*1 + xStride*0]= (a-c);
1579 block[stride*1 + xStride*1]= (e-b);
1584 * gets the chroma qp.
1586 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1587 return h->pps.chroma_qp_table[t][qscale & 0xff];
1590 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1591 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1592 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1594 const int * const quant_table= quant_coeff[qscale];
1595 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1596 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1597 const unsigned int threshold2= (threshold1<<1);
1603 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1604 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1605 const unsigned int dc_threshold2= (dc_threshold1<<1);
1607 int level= block[0]*quant_coeff[qscale+18][0];
1608 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1610 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1613 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1616 // last_non_zero = i;
1621 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1622 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1623 const unsigned int dc_threshold2= (dc_threshold1<<1);
1625 int level= block[0]*quant_table[0];
1626 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1628 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1631 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1634 // last_non_zero = i;
1647 const int j= scantable[i];
1648 int level= block[j]*quant_table[j];
1650 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1651 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1652 if(((unsigned)(level+threshold1))>threshold2){
1654 level= (bias + level)>>QUANT_SHIFT;
1657 level= (bias - level)>>QUANT_SHIFT;
1666 return last_non_zero;
1669 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1670 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1671 int src_x_offset, int src_y_offset,
1672 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1673 MpegEncContext * const s = &h->s;
1674 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1675 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1676 const int luma_xy= (mx&3) + ((my&3)<<2);
1677 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1678 uint8_t * src_cb, * src_cr;
1679 int extra_width= h->emu_edge_width;
1680 int extra_height= h->emu_edge_height;
1682 const int full_mx= mx>>2;
1683 const int full_my= my>>2;
1684 const int pic_width = 16*s->mb_width;
1685 const int pic_height = 16*s->mb_height >> MB_FIELD;
1687 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1690 if(mx&7) extra_width -= 3;
1691 if(my&7) extra_height -= 3;
1693 if( full_mx < 0-extra_width
1694 || full_my < 0-extra_height
1695 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1696 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1697 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1698 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1702 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1704 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1707 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1710 // chroma offset when predicting from a field of opposite parity
1711 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1712 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1714 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1715 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1718 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1719 src_cb= s->edge_emu_buffer;
1721 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1724 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1725 src_cr= s->edge_emu_buffer;
1727 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1730 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1731 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1732 int x_offset, int y_offset,
1733 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1734 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1735 int list0, int list1){
1736 MpegEncContext * const s = &h->s;
1737 qpel_mc_func *qpix_op= qpix_put;
1738 h264_chroma_mc_func chroma_op= chroma_put;
1740 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1741 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1742 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1743 x_offset += 8*s->mb_x;
1744 y_offset += 8*(s->mb_y >> MB_FIELD);
1747 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1748 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1749 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1750 qpix_op, chroma_op);
1753 chroma_op= chroma_avg;
1757 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1758 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1759 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1760 qpix_op, chroma_op);
1764 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1765 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1766 int x_offset, int y_offset,
1767 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1768 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1769 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1770 int list0, int list1){
1771 MpegEncContext * const s = &h->s;
1773 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1774 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1775 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1776 x_offset += 8*s->mb_x;
1777 y_offset += 8*(s->mb_y >> MB_FIELD);
1780 /* don't optimize for luma-only case, since B-frames usually
1781 * use implicit weights => chroma too. */
1782 uint8_t *tmp_cb = s->obmc_scratchpad;
1783 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1784 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1785 int refn0 = h->ref_cache[0][ scan8[n] ];
1786 int refn1 = h->ref_cache[1][ scan8[n] ];
1788 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1789 dest_y, dest_cb, dest_cr,
1790 x_offset, y_offset, qpix_put, chroma_put);
1791 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1792 tmp_y, tmp_cb, tmp_cr,
1793 x_offset, y_offset, qpix_put, chroma_put);
1795 if(h->use_weight == 2){
1796 int weight0 = h->implicit_weight[refn0][refn1];
1797 int weight1 = 64 - weight0;
1798 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1799 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1800 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1802 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1803 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1804 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1805 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1806 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1807 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1808 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1809 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1810 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1813 int list = list1 ? 1 : 0;
1814 int refn = h->ref_cache[list][ scan8[n] ];
1815 Picture *ref= &h->ref_list[list][refn];
1816 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1817 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1818 qpix_put, chroma_put);
1820 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1821 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1822 if(h->use_weight_chroma){
1823 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1824 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1825 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1826 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1831 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1832 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1833 int x_offset, int y_offset,
1834 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1835 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1836 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1837 int list0, int list1){
1838 if((h->use_weight==2 && list0 && list1
1839 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1840 || h->use_weight==1)
1841 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1842 x_offset, y_offset, qpix_put, chroma_put,
1843 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1845 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1846 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1849 static inline void prefetch_motion(H264Context *h, int list){
1850 /* fetch pixels for estimated mv 4 macroblocks ahead
1851 * optimized for 64byte cache lines */
1852 MpegEncContext * const s = &h->s;
1853 const int refn = h->ref_cache[list][scan8[0]];
1855 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1856 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1857 uint8_t **src= h->ref_list[list][refn].data;
1858 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1859 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1860 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1861 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1865 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1866 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1867 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1868 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1869 MpegEncContext * const s = &h->s;
1870 const int mb_xy= h->mb_xy;
1871 const int mb_type= s->current_picture.mb_type[mb_xy];
1873 assert(IS_INTER(mb_type));
1875 prefetch_motion(h, 0);
1877 if(IS_16X16(mb_type)){
1878 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1879 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1880 &weight_op[0], &weight_avg[0],
1881 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1882 }else if(IS_16X8(mb_type)){
1883 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1884 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1885 &weight_op[1], &weight_avg[1],
1886 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1887 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1888 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1889 &weight_op[1], &weight_avg[1],
1890 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1891 }else if(IS_8X16(mb_type)){
1892 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1893 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1894 &weight_op[2], &weight_avg[2],
1895 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1896 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1897 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1898 &weight_op[2], &weight_avg[2],
1899 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1903 assert(IS_8X8(mb_type));
1906 const int sub_mb_type= h->sub_mb_type[i];
1908 int x_offset= (i&1)<<2;
1909 int y_offset= (i&2)<<1;
1911 if(IS_SUB_8X8(sub_mb_type)){
1912 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1913 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1914 &weight_op[3], &weight_avg[3],
1915 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1916 }else if(IS_SUB_8X4(sub_mb_type)){
1917 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1918 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1919 &weight_op[4], &weight_avg[4],
1920 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1921 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1922 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1923 &weight_op[4], &weight_avg[4],
1924 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1925 }else if(IS_SUB_4X8(sub_mb_type)){
1926 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1927 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1928 &weight_op[5], &weight_avg[5],
1929 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1930 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1931 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1932 &weight_op[5], &weight_avg[5],
1933 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1936 assert(IS_SUB_4X4(sub_mb_type));
1938 int sub_x_offset= x_offset + 2*(j&1);
1939 int sub_y_offset= y_offset + (j&2);
1940 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1941 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1942 &weight_op[6], &weight_avg[6],
1943 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1949 prefetch_motion(h, 1);
1952 static av_cold void decode_init_vlc(void){
1953 static int done = 0;
1959 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1960 &chroma_dc_coeff_token_len [0], 1, 1,
1961 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1964 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1965 &coeff_token_len [i][0], 1, 1,
1966 &coeff_token_bits[i][0], 1, 1, 1);
1970 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1971 &chroma_dc_total_zeros_len [i][0], 1, 1,
1972 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1974 for(i=0; i<15; i++){
1975 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1976 &total_zeros_len [i][0], 1, 1,
1977 &total_zeros_bits[i][0], 1, 1, 1);
1981 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1982 &run_len [i][0], 1, 1,
1983 &run_bits[i][0], 1, 1, 1);
1985 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1986 &run_len [6][0], 1, 1,
1987 &run_bits[6][0], 1, 1, 1);
1991 static void free_tables(H264Context *h){
1994 av_freep(&h->intra4x4_pred_mode);
1995 av_freep(&h->chroma_pred_mode_table);
1996 av_freep(&h->cbp_table);
1997 av_freep(&h->mvd_table[0]);
1998 av_freep(&h->mvd_table[1]);
1999 av_freep(&h->direct_table);
2000 av_freep(&h->non_zero_count);
2001 av_freep(&h->slice_table_base);
2002 h->slice_table= NULL;
2004 av_freep(&h->mb2b_xy);
2005 av_freep(&h->mb2b8_xy);
2007 for(i = 0; i < MAX_SPS_COUNT; i++)
2008 av_freep(h->sps_buffers + i);
2010 for(i = 0; i < MAX_PPS_COUNT; i++)
2011 av_freep(h->pps_buffers + i);
2013 for(i = 0; i < h->s.avctx->thread_count; i++) {
2014 hx = h->thread_context[i];
2016 av_freep(&hx->top_borders[1]);
2017 av_freep(&hx->top_borders[0]);
2018 av_freep(&hx->s.obmc_scratchpad);
2022 static void init_dequant8_coeff_table(H264Context *h){
2024 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2025 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2026 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2028 for(i=0; i<2; i++ ){
2029 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2030 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2034 for(q=0; q<52; q++){
2035 int shift = ff_div6[q];
2036 int idx = ff_rem6[q];
2038 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2039 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2040 h->pps.scaling_matrix8[i][x]) << shift;
2045 static void init_dequant4_coeff_table(H264Context *h){
2047 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2048 for(i=0; i<6; i++ ){
2049 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2051 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2052 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2059 for(q=0; q<52; q++){
2060 int shift = ff_div6[q] + 2;
2061 int idx = ff_rem6[q];
2063 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2064 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2065 h->pps.scaling_matrix4[i][x]) << shift;
2070 static void init_dequant_tables(H264Context *h){
2072 init_dequant4_coeff_table(h);
2073 if(h->pps.transform_8x8_mode)
2074 init_dequant8_coeff_table(h);
2075 if(h->sps.transform_bypass){
2078 h->dequant4_coeff[i][0][x] = 1<<6;
2079 if(h->pps.transform_8x8_mode)
2082 h->dequant8_coeff[i][0][x] = 1<<6;
2089 * needs width/height
2091 static int alloc_tables(H264Context *h){
2092 MpegEncContext * const s = &h->s;
2093 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2096 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2098 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2099 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2100 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2102 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2103 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2104 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2105 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2107 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2108 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2110 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2111 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2112 for(y=0; y<s->mb_height; y++){
2113 for(x=0; x<s->mb_width; x++){
2114 const int mb_xy= x + y*s->mb_stride;
2115 const int b_xy = 4*x + 4*y*h->b_stride;
2116 const int b8_xy= 2*x + 2*y*h->b8_stride;
2118 h->mb2b_xy [mb_xy]= b_xy;
2119 h->mb2b8_xy[mb_xy]= b8_xy;
2123 s->obmc_scratchpad = NULL;
2125 if(!h->dequant4_coeff[0])
2126 init_dequant_tables(h);
2135 * Mimic alloc_tables(), but for every context thread.
2137 static void clone_tables(H264Context *dst, H264Context *src){
2138 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2139 dst->non_zero_count = src->non_zero_count;
2140 dst->slice_table = src->slice_table;
2141 dst->cbp_table = src->cbp_table;
2142 dst->mb2b_xy = src->mb2b_xy;
2143 dst->mb2b8_xy = src->mb2b8_xy;
2144 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2145 dst->mvd_table[0] = src->mvd_table[0];
2146 dst->mvd_table[1] = src->mvd_table[1];
2147 dst->direct_table = src->direct_table;
2149 dst->s.obmc_scratchpad = NULL;
2150 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2155 * Allocate buffers which are not shared amongst multiple threads.
2157 static int context_init(H264Context *h){
2158 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2159 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2163 return -1; // free_tables will clean up for us
2166 static av_cold void common_init(H264Context *h){
2167 MpegEncContext * const s = &h->s;
2169 s->width = s->avctx->width;
2170 s->height = s->avctx->height;
2171 s->codec_id= s->avctx->codec->id;
2173 ff_h264_pred_init(&h->hpc, s->codec_id);
2175 h->dequant_coeff_pps= -1;
2176 s->unrestricted_mv=1;
2177 s->decode=1; //FIXME
2179 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2180 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2183 static av_cold int decode_init(AVCodecContext *avctx){
2184 H264Context *h= avctx->priv_data;
2185 MpegEncContext * const s = &h->s;
2187 MPV_decode_defaults(s);
2192 s->out_format = FMT_H264;
2193 s->workaround_bugs= avctx->workaround_bugs;
2196 // s->decode_mb= ff_h263_decode_mb;
2197 s->quarter_sample = 1;
2200 if(avctx->codec_id == CODEC_ID_SVQ3)
2201 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2203 avctx->pix_fmt= PIX_FMT_YUV420P;
2207 if(avctx->extradata_size > 0 && avctx->extradata &&
2208 *(char *)avctx->extradata == 1){
2215 h->thread_context[0] = h;
2219 static int frame_start(H264Context *h){
2220 MpegEncContext * const s = &h->s;
2223 if(MPV_frame_start(s, s->avctx) < 0)
2225 ff_er_frame_start(s);
2227 * MPV_frame_start uses pict_type to derive key_frame.
2228 * This is incorrect for H.264; IDR markings must be used.
2229 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2230 * See decode_nal_units().
2232 s->current_picture_ptr->key_frame= 0;
2234 assert(s->linesize && s->uvlinesize);
2236 for(i=0; i<16; i++){
2237 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2238 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2241 h->block_offset[16+i]=
2242 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2243 h->block_offset[24+16+i]=
2244 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2247 /* can't be in alloc_tables because linesize isn't known there.
2248 * FIXME: redo bipred weight to not require extra buffer? */
2249 for(i = 0; i < s->avctx->thread_count; i++)
2250 if(!h->thread_context[i]->s.obmc_scratchpad)
2251 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2253 /* some macroblocks will be accessed before they're available */
2254 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2255 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2257 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2259 // We mark the current picture as non reference after allocating it, so
2260 // that if we break out due to an error it can be released automatically
2261 // in the next MPV_frame_start().
2262 // SVQ3 as well as most other codecs have only last/next/current and thus
2263 // get released even with set reference, besides SVQ3 and others do not
2264 // mark frames as reference later "naturally".
2265 if(s->codec_id != CODEC_ID_SVQ3)
2266 s->current_picture_ptr->reference= 0;
2270 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2271 MpegEncContext * const s = &h->s;
2275 src_cb -= uvlinesize;
2276 src_cr -= uvlinesize;
2278 // There are two lines saved, the line above the the top macroblock of a pair,
2279 // and the line above the bottom macroblock
2280 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2281 for(i=1; i<17; i++){
2282 h->left_border[i]= src_y[15+i* linesize];
2285 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2286 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2288 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2289 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2290 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2292 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2293 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2295 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2296 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2300 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2301 MpegEncContext * const s = &h->s;
2308 if(h->deblocking_filter == 2) {
2310 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2311 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2313 deblock_left = (s->mb_x > 0);
2314 deblock_top = (s->mb_y > 0);
2317 src_y -= linesize + 1;
2318 src_cb -= uvlinesize + 1;
2319 src_cr -= uvlinesize + 1;
2321 #define XCHG(a,b,t,xchg)\
2328 for(i = !deblock_top; i<17; i++){
2329 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2334 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2335 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2336 if(s->mb_x+1 < s->mb_width){
2337 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2341 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2343 for(i = !deblock_top; i<9; i++){
2344 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2345 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2349 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2350 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2355 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2356 MpegEncContext * const s = &h->s;
2359 src_y -= 2 * linesize;
2360 src_cb -= 2 * uvlinesize;
2361 src_cr -= 2 * uvlinesize;
2363 // There are two lines saved, the line above the the top macroblock of a pair,
2364 // and the line above the bottom macroblock
2365 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2366 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2367 for(i=2; i<34; i++){
2368 h->left_border[i]= src_y[15+i* linesize];
2371 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2372 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2373 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2374 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2376 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2377 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2378 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2379 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2380 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2381 for(i=2; i<18; i++){
2382 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2383 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2385 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2386 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2387 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2388 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2392 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2393 MpegEncContext * const s = &h->s;
2396 int deblock_left = (s->mb_x > 0);
2397 int deblock_top = (s->mb_y > 1);
2399 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2401 src_y -= 2 * linesize + 1;
2402 src_cb -= 2 * uvlinesize + 1;
2403 src_cr -= 2 * uvlinesize + 1;
2405 #define XCHG(a,b,t,xchg)\
2412 for(i = (!deblock_top)<<1; i<34; i++){
2413 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2418 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2419 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2420 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2421 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2422 if(s->mb_x+1 < s->mb_width){
2423 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2424 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2428 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2430 for(i = (!deblock_top) << 1; i<18; i++){
2431 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2432 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2436 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2437 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2438 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2439 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2444 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2445 MpegEncContext * const s = &h->s;
2446 const int mb_x= s->mb_x;
2447 const int mb_y= s->mb_y;
2448 const int mb_xy= h->mb_xy;
2449 const int mb_type= s->current_picture.mb_type[mb_xy];
2450 uint8_t *dest_y, *dest_cb, *dest_cr;
2451 int linesize, uvlinesize /*dct_offset*/;
2453 int *block_offset = &h->block_offset[0];
2454 const unsigned int bottom = mb_y & 1;
2455 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2456 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2457 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2459 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2460 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2461 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2463 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2464 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2466 if (!simple && MB_FIELD) {
2467 linesize = h->mb_linesize = s->linesize * 2;
2468 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2469 block_offset = &h->block_offset[24];
2470 if(mb_y&1){ //FIXME move out of this func?
2471 dest_y -= s->linesize*15;
2472 dest_cb-= s->uvlinesize*7;
2473 dest_cr-= s->uvlinesize*7;
2477 for(list=0; list<h->list_count; list++){
2478 if(!USES_LIST(mb_type, list))
2480 if(IS_16X16(mb_type)){
2481 int8_t *ref = &h->ref_cache[list][scan8[0]];
2482 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2484 for(i=0; i<16; i+=4){
2485 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2486 int ref = h->ref_cache[list][scan8[i]];
2488 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2494 linesize = h->mb_linesize = s->linesize;
2495 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2496 // dct_offset = s->linesize * 16;
2499 if(transform_bypass){
2501 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2502 }else if(IS_8x8DCT(mb_type)){
2503 idct_dc_add = s->dsp.h264_idct8_dc_add;
2504 idct_add = s->dsp.h264_idct8_add;
2506 idct_dc_add = s->dsp.h264_idct_dc_add;
2507 idct_add = s->dsp.h264_idct_add;
2510 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2511 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2512 int mbt_y = mb_y&~1;
2513 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2514 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2515 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2516 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2519 if (!simple && IS_INTRA_PCM(mb_type)) {
2522 // The pixels are stored in h->mb array in the same order as levels,
2523 // copy them in output in the correct order.
2524 for(i=0; i<16; i++) {
2525 for (y=0; y<4; y++) {
2526 for (x=0; x<4; x++) {
2527 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2531 for(i=16; i<16+4; i++) {
2532 for (y=0; y<4; y++) {
2533 for (x=0; x<4; x++) {
2534 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2538 for(i=20; i<20+4; i++) {
2539 for (y=0; y<4; y++) {
2540 for (x=0; x<4; x++) {
2541 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2546 if(IS_INTRA(mb_type)){
2547 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2548 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2550 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2551 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2552 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2555 if(IS_INTRA4x4(mb_type)){
2556 if(simple || !s->encoding){
2557 if(IS_8x8DCT(mb_type)){
2558 for(i=0; i<16; i+=4){
2559 uint8_t * const ptr= dest_y + block_offset[i];
2560 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2561 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2562 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2563 (h->topright_samples_available<<i)&0x4000, linesize);
2565 if(nnz == 1 && h->mb[i*16])
2566 idct_dc_add(ptr, h->mb + i*16, linesize);
2568 idct_add(ptr, h->mb + i*16, linesize);
2572 for(i=0; i<16; i++){
2573 uint8_t * const ptr= dest_y + block_offset[i];
2575 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2578 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2579 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2580 assert(mb_y || linesize <= block_offset[i]);
2581 if(!topright_avail){
2582 tr= ptr[3 - linesize]*0x01010101;
2583 topright= (uint8_t*) &tr;
2585 topright= ptr + 4 - linesize;
2589 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2590 nnz = h->non_zero_count_cache[ scan8[i] ];
2593 if(nnz == 1 && h->mb[i*16])
2594 idct_dc_add(ptr, h->mb + i*16, linesize);
2596 idct_add(ptr, h->mb + i*16, linesize);
2598 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2603 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2605 if(!transform_bypass)
2606 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2608 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2610 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2611 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2613 hl_motion(h, dest_y, dest_cb, dest_cr,
2614 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2615 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2616 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2620 if(!IS_INTRA4x4(mb_type)){
2622 if(IS_INTRA16x16(mb_type)){
2623 for(i=0; i<16; i++){
2624 if(h->non_zero_count_cache[ scan8[i] ])
2625 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2626 else if(h->mb[i*16])
2627 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2630 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2631 for(i=0; i<16; i+=di){
2632 int nnz = h->non_zero_count_cache[ scan8[i] ];
2634 if(nnz==1 && h->mb[i*16])
2635 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2637 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2642 for(i=0; i<16; i++){
2643 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2644 uint8_t * const ptr= dest_y + block_offset[i];
2645 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2651 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2652 uint8_t *dest[2] = {dest_cb, dest_cr};
2653 if(transform_bypass){
2654 idct_add = idct_dc_add = s->dsp.add_pixels4;
2656 idct_add = s->dsp.h264_idct_add;
2657 idct_dc_add = s->dsp.h264_idct_dc_add;
2658 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2659 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2662 for(i=16; i<16+8; i++){
2663 if(h->non_zero_count_cache[ scan8[i] ])
2664 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2665 else if(h->mb[i*16])
2666 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2669 for(i=16; i<16+8; i++){
2670 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2671 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2672 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2678 if(h->deblocking_filter) {
2679 if (!simple && FRAME_MBAFF) {
2680 //FIXME try deblocking one mb at a time?
2681 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2682 const int mb_y = s->mb_y - 1;
2683 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2684 const int mb_xy= mb_x + mb_y*s->mb_stride;
2685 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2686 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2687 if (!bottom) return;
2688 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2689 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2690 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2692 if(IS_INTRA(mb_type_top | mb_type_bottom))
2693 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2695 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2698 s->mb_y--; h->mb_xy -= s->mb_stride;
2699 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2700 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2701 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2702 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2703 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2705 s->mb_y++; h->mb_xy += s->mb_stride;
2706 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2707 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2708 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2709 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2710 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2712 tprintf(h->s.avctx, "call filter_mb\n");
2713 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2714 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2715 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2721 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2723 static void hl_decode_mb_simple(H264Context *h){
2724 hl_decode_mb_internal(h, 1);
2728 * Process a macroblock; this handles edge cases, such as interlacing.
2730 static void av_noinline hl_decode_mb_complex(H264Context *h){
2731 hl_decode_mb_internal(h, 0);
2734 static void hl_decode_mb(H264Context *h){
2735 MpegEncContext * const s = &h->s;
2736 const int mb_xy= h->mb_xy;
2737 const int mb_type= s->current_picture.mb_type[mb_xy];
2738 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2739 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2741 if(ENABLE_H264_ENCODER && !s->decode)
2745 hl_decode_mb_complex(h);
2746 else hl_decode_mb_simple(h);
2749 static void pic_as_field(Picture *pic, const int parity){
2751 for (i = 0; i < 4; ++i) {
2752 if (parity == PICT_BOTTOM_FIELD)
2753 pic->data[i] += pic->linesize[i];
2754 pic->reference = parity;
2755 pic->linesize[i] *= 2;
2759 static int split_field_copy(Picture *dest, Picture *src,
2760 int parity, int id_add){
2761 int match = !!(src->reference & parity);
2765 pic_as_field(dest, parity);
2767 dest->pic_id += id_add;
2774 * Split one reference list into field parts, interleaving by parity
2775 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2776 * set to look at the actual start of data for that field.
2778 * @param dest output list
2779 * @param dest_len maximum number of fields to put in dest
2780 * @param src the source reference list containing fields and/or field pairs
2781 * (aka short_ref/long_ref, or
2782 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2783 * @param src_len number of Picture's in source (pairs and unmatched fields)
2784 * @param parity the parity of the picture being decoded/needing
2785 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2786 * @return number of fields placed in dest
2788 static int split_field_half_ref_list(Picture *dest, int dest_len,
2789 Picture *src, int src_len, int parity){
2790 int same_parity = 1;
2796 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2797 if (same_parity && same_i < src_len) {
2798 field_output = split_field_copy(dest + out_i, src + same_i,
2800 same_parity = !field_output;
2803 } else if (opp_i < src_len) {
2804 field_output = split_field_copy(dest + out_i, src + opp_i,
2805 PICT_FRAME - parity, 0);
2806 same_parity = field_output;
2818 * Split the reference frame list into a reference field list.
2819 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2820 * The input list contains both reference field pairs and
2821 * unmatched reference fields; it is ordered as spec describes
2822 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2823 * unmatched field pairs are also present. Conceptually this is equivalent
2824 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2826 * @param dest output reference list where ordered fields are to be placed
2827 * @param dest_len max number of fields to place at dest
2828 * @param src source reference list, as described above
2829 * @param src_len number of pictures (pairs and unmatched fields) in src
2830 * @param parity parity of field being currently decoded
2831 * (one of PICT_{TOP,BOTTOM}_FIELD)
2832 * @param long_i index into src array that holds first long reference picture,
2833 * or src_len if no long refs present.
2835 static int split_field_ref_list(Picture *dest, int dest_len,
2836 Picture *src, int src_len,
2837 int parity, int long_i){
2839 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2843 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2844 src_len - long_i, parity);
2849 * fills the default_ref_list.
2851 static int fill_default_ref_list(H264Context *h){
2852 MpegEncContext * const s = &h->s;
2854 int smallest_poc_greater_than_current = -1;
2856 Picture sorted_short_ref[32];
2857 Picture field_entry_list[2][32];
2858 Picture *frame_list[2];
2860 if (FIELD_PICTURE) {
2861 structure_sel = PICT_FRAME;
2862 frame_list[0] = field_entry_list[0];
2863 frame_list[1] = field_entry_list[1];
2866 frame_list[0] = h->default_ref_list[0];
2867 frame_list[1] = h->default_ref_list[1];
2870 if(h->slice_type==FF_B_TYPE){
2877 /* sort frame according to poc in B slice */
2878 for(out_i=0; out_i<h->short_ref_count; out_i++){
2880 int best_poc=INT_MAX;
2882 for(i=0; i<h->short_ref_count; i++){
2883 const int poc= h->short_ref[i]->poc;
2884 if(poc > limit && poc < best_poc){
2890 assert(best_i != INT_MIN);
2893 sorted_short_ref[out_i]= *h->short_ref[best_i];
2894 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2895 if (-1 == smallest_poc_greater_than_current) {
2896 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2897 smallest_poc_greater_than_current = out_i;
2902 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2904 // find the largest poc
2905 for(list=0; list<2; list++){
2908 int step= list ? -1 : 1;
2910 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2912 while(j<0 || j>= h->short_ref_count){
2913 if(j != -99 && step == (list ? -1 : 1))
2916 j= smallest_poc_greater_than_current + (step>>1);
2918 sel = sorted_short_ref[j].reference | structure_sel;
2919 if(sel != PICT_FRAME) continue;
2920 frame_list[list][index ]= sorted_short_ref[j];
2921 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2923 short_len[list] = index;
2925 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2927 if(h->long_ref[i] == NULL) continue;
2928 sel = h->long_ref[i]->reference | structure_sel;
2929 if(sel != PICT_FRAME) continue;
2931 frame_list[ list ][index ]= *h->long_ref[i];
2932 frame_list[ list ][index++].pic_id= i;
2937 for(list=0; list<2; list++){
2939 len[list] = split_field_ref_list(h->default_ref_list[list],
2943 s->picture_structure,
2946 // swap the two first elements of L1 when L0 and L1 are identical
2947 if(list && len[0] > 1 && len[0] == len[1])
2948 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
2950 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2954 if(len[list] < h->ref_count[ list ])
2955 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2962 for(i=0; i<h->short_ref_count; i++){
2964 sel = h->short_ref[i]->reference | structure_sel;
2965 if(sel != PICT_FRAME) continue;
2966 frame_list[0][index ]= *h->short_ref[i];
2967 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2970 for(i = 0; i < 16; i++){
2972 if(h->long_ref[i] == NULL) continue;
2973 sel = h->long_ref[i]->reference | structure_sel;
2974 if(sel != PICT_FRAME) continue;
2975 frame_list[0][index ]= *h->long_ref[i];
2976 frame_list[0][index++].pic_id= i;
2980 index = split_field_ref_list(h->default_ref_list[0],
2981 h->ref_count[0], frame_list[0],
2982 index, s->picture_structure,
2985 if(index < h->ref_count[0])
2986 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2989 for (i=0; i<h->ref_count[0]; i++) {
2990 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2992 if(h->slice_type==FF_B_TYPE){
2993 for (i=0; i<h->ref_count[1]; i++) {
2994 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
3001 static void print_short_term(H264Context *h);
3002 static void print_long_term(H264Context *h);
3005 * Extract structure information about the picture described by pic_num in
3006 * the current decoding context (frame or field). Note that pic_num is
3007 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
3008 * @param pic_num picture number for which to extract structure information
3009 * @param structure one of PICT_XXX describing structure of picture
3011 * @return frame number (short term) or long term index of picture
3012 * described by pic_num
3014 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3015 MpegEncContext * const s = &h->s;
3017 *structure = s->picture_structure;
3020 /* opposite field */
3021 *structure ^= PICT_FRAME;
3028 static int decode_ref_pic_list_reordering(H264Context *h){
3029 MpegEncContext * const s = &h->s;
3030 int list, index, pic_structure;
3032 print_short_term(h);
3034 if(h->slice_type_nos==FF_I_TYPE) return 0; //FIXME move before func
3036 for(list=0; list<h->list_count; list++){
3037 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3039 if(get_bits1(&s->gb)){
3040 int pred= h->curr_pic_num;
3042 for(index=0; ; index++){
3043 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3044 unsigned int pic_id;
3046 Picture *ref = NULL;
3048 if(reordering_of_pic_nums_idc==3)
3051 if(index >= h->ref_count[list]){
3052 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3056 if(reordering_of_pic_nums_idc<3){
3057 if(reordering_of_pic_nums_idc<2){
3058 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3061 if(abs_diff_pic_num > h->max_pic_num){
3062 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3066 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3067 else pred+= abs_diff_pic_num;
3068 pred &= h->max_pic_num - 1;
3070 frame_num = pic_num_extract(h, pred, &pic_structure);
3072 for(i= h->short_ref_count-1; i>=0; i--){
3073 ref = h->short_ref[i];
3074 assert(ref->reference);
3075 assert(!ref->long_ref);
3076 if(ref->data[0] != NULL &&
3077 ref->frame_num == frame_num &&
3078 (ref->reference & pic_structure) &&
3079 ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3086 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3088 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3091 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3094 ref = h->long_ref[long_idx];
3095 assert(!(ref && !ref->reference));
3096 if(ref && (ref->reference & pic_structure)){
3097 ref->pic_id= pic_id;
3098 assert(ref->long_ref);
3106 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3107 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3109 for(i=index; i+1<h->ref_count[list]; i++){
3110 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3113 for(; i > index; i--){
3114 h->ref_list[list][i]= h->ref_list[list][i-1];
3116 h->ref_list[list][index]= *ref;
3118 pic_as_field(&h->ref_list[list][index], pic_structure);
3122 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3128 for(list=0; list<h->list_count; list++){
3129 for(index= 0; index < h->ref_count[list]; index++){
3130 if(!h->ref_list[list][index].data[0])
3131 h->ref_list[list][index]= s->current_picture;
3135 if(h->slice_type==FF_B_TYPE && !h->direct_spatial_mv_pred)
3136 direct_dist_scale_factor(h);
3137 direct_ref_list_init(h);
3141 static void fill_mbaff_ref_list(H264Context *h){
3143 for(list=0; list<2; list++){ //FIXME try list_count
3144 for(i=0; i<h->ref_count[list]; i++){
3145 Picture *frame = &h->ref_list[list][i];
3146 Picture *field = &h->ref_list[list][16+2*i];
3149 field[0].linesize[j] <<= 1;
3150 field[0].reference = PICT_TOP_FIELD;
3151 field[1] = field[0];
3153 field[1].data[j] += frame->linesize[j];
3154 field[1].reference = PICT_BOTTOM_FIELD;
3156 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3157 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3159 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3160 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3164 for(j=0; j<h->ref_count[1]; j++){
3165 for(i=0; i<h->ref_count[0]; i++)
3166 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3167 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3168 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3172 static int pred_weight_table(H264Context *h){
3173 MpegEncContext * const s = &h->s;
3175 int luma_def, chroma_def;
3178 h->use_weight_chroma= 0;
3179 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3180 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3181 luma_def = 1<<h->luma_log2_weight_denom;
3182 chroma_def = 1<<h->chroma_log2_weight_denom;
3184 for(list=0; list<2; list++){
3185 for(i=0; i<h->ref_count[list]; i++){
3186 int luma_weight_flag, chroma_weight_flag;
3188 luma_weight_flag= get_bits1(&s->gb);
3189 if(luma_weight_flag){
3190 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3191 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3192 if( h->luma_weight[list][i] != luma_def
3193 || h->luma_offset[list][i] != 0)
3196 h->luma_weight[list][i]= luma_def;
3197 h->luma_offset[list][i]= 0;
3200 chroma_weight_flag= get_bits1(&s->gb);
3201 if(chroma_weight_flag){
3204 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3205 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3206 if( h->chroma_weight[list][i][j] != chroma_def
3207 || h->chroma_offset[list][i][j] != 0)
3208 h->use_weight_chroma= 1;
3213 h->chroma_weight[list][i][j]= chroma_def;
3214 h->chroma_offset[list][i][j]= 0;
3218 if(h->slice_type != FF_B_TYPE) break;
3220 h->use_weight= h->use_weight || h->use_weight_chroma;
3224 static void implicit_weight_table(H264Context *h){
3225 MpegEncContext * const s = &h->s;
3227 int cur_poc = s->current_picture_ptr->poc;
3229 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3230 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3232 h->use_weight_chroma= 0;
3237 h->use_weight_chroma= 2;
3238 h->luma_log2_weight_denom= 5;
3239 h->chroma_log2_weight_denom= 5;
3241 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3242 int poc0 = h->ref_list[0][ref0].poc;
3243 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3244 int poc1 = h->ref_list[1][ref1].poc;
3245 int td = av_clip(poc1 - poc0, -128, 127);
3247 int tb = av_clip(cur_poc - poc0, -128, 127);
3248 int tx = (16384 + (FFABS(td) >> 1)) / td;
3249 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3250 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3251 h->implicit_weight[ref0][ref1] = 32;
3253 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3255 h->implicit_weight[ref0][ref1] = 32;
3261 * Mark a picture as no longer needed for reference. The refmask
3262 * argument allows unreferencing of individual fields or the whole frame.
3263 * If the picture becomes entirely unreferenced, but is being held for
3264 * display purposes, it is marked as such.
3265 * @param refmask mask of fields to unreference; the mask is bitwise
3266 * anded with the reference marking of pic
3267 * @return non-zero if pic becomes entirely unreferenced (except possibly
3268 * for display purposes) zero if one of the fields remains in
3271 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3273 if (pic->reference &= refmask) {
3276 for(i = 0; h->delayed_pic[i]; i++)
3277 if(pic == h->delayed_pic[i]){
3278 pic->reference=DELAYED_PIC_REF;
3286 * instantaneous decoder refresh.
3288 static void idr(H264Context *h){
3291 for(i=0; i<16; i++){
3292 if (h->long_ref[i] != NULL) {
3293 unreference_pic(h, h->long_ref[i], 0);
3294 h->long_ref[i]= NULL;
3297 h->long_ref_count=0;
3299 for(i=0; i<h->short_ref_count; i++){
3300 unreference_pic(h, h->short_ref[i], 0);
3301 h->short_ref[i]= NULL;
3303 h->short_ref_count=0;
3306 /* forget old pics after a seek */
3307 static void flush_dpb(AVCodecContext *avctx){
3308 H264Context *h= avctx->priv_data;
3310 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3311 if(h->delayed_pic[i])
3312 h->delayed_pic[i]->reference= 0;
3313 h->delayed_pic[i]= NULL;
3315 h->outputed_poc= INT_MIN;
3317 if(h->s.current_picture_ptr)
3318 h->s.current_picture_ptr->reference= 0;
3319 h->s.first_field= 0;
3320 ff_mpeg_flush(avctx);
3324 * Find a Picture in the short term reference list by frame number.
3325 * @param frame_num frame number to search for
3326 * @param idx the index into h->short_ref where returned picture is found
3327 * undefined if no picture found.
3328 * @return pointer to the found picture, or NULL if no pic with the provided
3329 * frame number is found
3331 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3332 MpegEncContext * const s = &h->s;
3335 for(i=0; i<h->short_ref_count; i++){
3336 Picture *pic= h->short_ref[i];
3337 if(s->avctx->debug&FF_DEBUG_MMCO)
3338 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3339 if(pic->frame_num == frame_num) {
3348 * Remove a picture from the short term reference list by its index in
3349 * that list. This does no checking on the provided index; it is assumed
3350 * to be valid. Other list entries are shifted down.
3351 * @param i index into h->short_ref of picture to remove.
3353 static void remove_short_at_index(H264Context *h, int i){
3354 assert(i >= 0 && i < h->short_ref_count);
3355 h->short_ref[i]= NULL;
3356 if (--h->short_ref_count)
3357 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3362 * @return the removed picture or NULL if an error occurs
3364 static Picture * remove_short(H264Context *h, int frame_num){
3365 MpegEncContext * const s = &h->s;
3369 if(s->avctx->debug&FF_DEBUG_MMCO)
3370 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3372 pic = find_short(h, frame_num, &i);
3374 remove_short_at_index(h, i);
3380 * Remove a picture from the long term reference list by its index in
3381 * that list. This does no checking on the provided index; it is assumed
3382 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3383 * @param i index into h->long_ref of picture to remove.
3385 static void remove_long_at_index(H264Context *h, int i){
3386 h->long_ref[i]= NULL;
3387 h->long_ref_count--;
3392 * @return the removed picture or NULL if an error occurs
3394 static Picture * remove_long(H264Context *h, int i){
3397 pic= h->long_ref[i];
3399 remove_long_at_index(h, i);
3405 * print short term list
3407 static void print_short_term(H264Context *h) {
3409 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3410 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3411 for(i=0; i<h->short_ref_count; i++){
3412 Picture *pic= h->short_ref[i];
3413 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3419 * print long term list
3421 static void print_long_term(H264Context *h) {
3423 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3424 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3425 for(i = 0; i < 16; i++){
3426 Picture *pic= h->long_ref[i];
3428 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3435 * Executes the reference picture marking (memory management control operations).
3437 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3438 MpegEncContext * const s = &h->s;
3440 int current_ref_assigned=0;
3443 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3444 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3446 for(i=0; i<mmco_count; i++){
3447 int structure, frame_num, unref_pic;
3448 if(s->avctx->debug&FF_DEBUG_MMCO)
3449 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3451 switch(mmco[i].opcode){
3452 case MMCO_SHORT2UNUSED:
3453 if(s->avctx->debug&FF_DEBUG_MMCO)
3454 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3455 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3456 pic = find_short(h, frame_num, &j);
3458 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3459 remove_short_at_index(h, j);
3460 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3461 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
3463 case MMCO_SHORT2LONG:
3464 if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
3465 h->long_ref[mmco[i].long_arg]->frame_num ==
3466 mmco[i].short_pic_num / 2) {
3467 /* do nothing, we've already moved this field pair. */
3469 int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
3471 pic= remove_long(h, mmco[i].long_arg);
3472 if(pic) unreference_pic(h, pic, 0);
3474 h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
3475 if (h->long_ref[ mmco[i].long_arg ]){
3476 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3477 h->long_ref_count++;
3481 case MMCO_LONG2UNUSED:
3482 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3483 pic = h->long_ref[j];
3485 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3486 remove_long_at_index(h, j);
3487 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3488 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3492 if (FIELD_PICTURE && !s->first_field) {
3493 if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
3494 /* Just mark second field as referenced */
3496 } else if (s->current_picture_ptr->reference) {
3497 /* First field in pair is in short term list or
3498 * at a different long term index.
3499 * This is not allowed; see 7.4.3, notes 2 and 3.
3500 * Report the problem and keep the pair where it is,
3501 * and mark this field valid.
3503 av_log(h->s.avctx, AV_LOG_ERROR,
3504 "illegal long term reference assignment for second "
3505 "field in complementary field pair (first field is "
3506 "short term or has non-matching long index)\n");
3512 pic= remove_long(h, mmco[i].long_arg);
3513 if(pic) unreference_pic(h, pic, 0);
3515 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3516 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3517 h->long_ref_count++;
3520 s->current_picture_ptr->reference |= s->picture_structure;
3521 current_ref_assigned=1;
3523 case MMCO_SET_MAX_LONG:
3524 assert(mmco[i].long_arg <= 16);
3525 // just remove the long term which index is greater than new max
3526 for(j = mmco[i].long_arg; j<16; j++){
3527 pic = remove_long(h, j);
3528 if (pic) unreference_pic(h, pic, 0);
3532 while(h->short_ref_count){
3533 pic= remove_short(h, h->short_ref[0]->frame_num);
3534 if(pic) unreference_pic(h, pic, 0);
3536 for(j = 0; j < 16; j++) {
3537 pic= remove_long(h, j);
3538 if(pic) unreference_pic(h, pic, 0);
3545 if (!current_ref_assigned && FIELD_PICTURE &&
3546 !s->first_field && s->current_picture_ptr->reference) {
3548 /* Second field of complementary field pair; the first field of
3549 * which is already referenced. If short referenced, it
3550 * should be first entry in short_ref. If not, it must exist
3551 * in long_ref; trying to put it on the short list here is an
3552 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3554 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3555 /* Just mark the second field valid */
3556 s->current_picture_ptr->reference = PICT_FRAME;
3557 } else if (s->current_picture_ptr->long_ref) {
3558 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3559 "assignment for second field "
3560 "in complementary field pair "
3561 "(first field is long term)\n");
3564 * First field in reference, but not in any sensible place on our
3565 * reference lists. This shouldn't happen unless reference
3566 * handling somewhere else is wrong.
3570 current_ref_assigned = 1;
3573 if(!current_ref_assigned){
3574 pic= remove_short(h, s->current_picture_ptr->frame_num);
3576 unreference_pic(h, pic, 0);
3577 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3580 if(h->short_ref_count)
3581 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3583 h->short_ref[0]= s->current_picture_ptr;
3584 h->short_ref[0]->long_ref=0;
3585 h->short_ref_count++;
3586 s->current_picture_ptr->reference |= s->picture_structure;
3589 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3591 /* We have too many reference frames, probably due to corrupted
3592 * stream. Need to discard one frame. Prevents overrun of the
3593 * short_ref and long_ref buffers.
3595 av_log(h->s.avctx, AV_LOG_ERROR,
3596 "number of reference frames exceeds max (probably "
3597 "corrupt input), discarding one\n");
3599 if (h->long_ref_count) {
3600 for (i = 0; i < 16; ++i)
3605 pic = h->long_ref[i];
3606 remove_long_at_index(h, i);
3608 pic = h->short_ref[h->short_ref_count - 1];
3609 remove_short_at_index(h, h->short_ref_count - 1);
3611 unreference_pic(h, pic, 0);
3614 print_short_term(h);
3619 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3620 MpegEncContext * const s = &h->s;
3623 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3624 s->broken_link= get_bits1(gb) -1;
3625 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3626 if(h->mmco[0].long_arg == -1)
3629 h->mmco[0].opcode= MMCO_LONG;
3633 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3634 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3635 MMCOOpcode opcode= get_ue_golomb(gb);
3637 h->mmco[i].opcode= opcode;
3638 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3639 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3640 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3641 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3645 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3646 unsigned int long_arg= get_ue_golomb(gb);
3647 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3648 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3651 h->mmco[i].long_arg= long_arg;
3654 if(opcode > (unsigned)MMCO_LONG){
3655 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3658 if(opcode == MMCO_END)
3663 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3665 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3666 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3667 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3668 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3670 if (FIELD_PICTURE) {
3671 h->mmco[0].short_pic_num *= 2;
3672 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3673 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3684 static int init_poc(H264Context *h){
3685 MpegEncContext * const s = &h->s;
3686 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3689 if(h->nal_unit_type == NAL_IDR_SLICE){
3690 h->frame_num_offset= 0;
3692 if(h->frame_num < h->prev_frame_num)
3693 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3695 h->frame_num_offset= h->prev_frame_num_offset;
3698 if(h->sps.poc_type==0){
3699 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3701 if(h->nal_unit_type == NAL_IDR_SLICE){
3706 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3707 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3708 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3709 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3711 h->poc_msb = h->prev_poc_msb;
3712 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3714 field_poc[1] = h->poc_msb + h->poc_lsb;
3715 if(s->picture_structure == PICT_FRAME)
3716 field_poc[1] += h->delta_poc_bottom;
3717 }else if(h->sps.poc_type==1){
3718 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3721 if(h->sps.poc_cycle_length != 0)
3722 abs_frame_num = h->frame_num_offset + h->frame_num;
3726 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3729 expected_delta_per_poc_cycle = 0;
3730 for(i=0; i < h->sps.poc_cycle_length; i++)
3731 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3733 if(abs_frame_num > 0){
3734 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3735 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3737 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3738 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3739 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3743 if(h->nal_ref_idc == 0)
3744 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3746 field_poc[0] = expectedpoc + h->delta_poc[0];
3747 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3749 if(s->picture_structure == PICT_FRAME)
3750 field_poc[1] += h->delta_poc[1];
3753 if(h->nal_unit_type == NAL_IDR_SLICE){
3756 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3757 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3763 if(s->picture_structure != PICT_BOTTOM_FIELD) {
3764 s->current_picture_ptr->field_poc[0]= field_poc[0];
3765 s->current_picture_ptr->poc = field_poc[0];
3767 if(s->picture_structure != PICT_TOP_FIELD) {
3768 s->current_picture_ptr->field_poc[1]= field_poc[1];
3769 s->current_picture_ptr->poc = field_poc[1];
3771 if(!FIELD_PICTURE || !s->first_field) {
3772 Picture *cur = s->current_picture_ptr;
3773 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3781 * initialize scan tables
3783 static void init_scan_tables(H264Context *h){
3784 MpegEncContext * const s = &h->s;
3786 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3787 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3788 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3790 for(i=0; i<16; i++){
3791 #define T(x) (x>>2) | ((x<<2) & 0xF)
3792 h->zigzag_scan[i] = T(zigzag_scan[i]);
3793 h-> field_scan[i] = T( field_scan[i]);
3797 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3798 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3799 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3800 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3801 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3803 for(i=0; i<64; i++){
3804 #define T(x) (x>>3) | ((x&7)<<3)
3805 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3806 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3807 h->field_scan8x8[i] = T(field_scan8x8[i]);
3808 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3812 if(h->sps.transform_bypass){ //FIXME same ugly
3813 h->zigzag_scan_q0 = zigzag_scan;
3814 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3815 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3816 h->field_scan_q0 = field_scan;
3817 h->field_scan8x8_q0 = field_scan8x8;
3818 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3820 h->zigzag_scan_q0 = h->zigzag_scan;
3821 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3822 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3823 h->field_scan_q0 = h->field_scan;
3824 h->field_scan8x8_q0 = h->field_scan8x8;
3825 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3830 * Replicates H264 "master" context to thread contexts.
3832 static void clone_slice(H264Context *dst, H264Context *src)
3834 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3835 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3836 dst->s.current_picture = src->s.current_picture;
3837 dst->s.linesize = src->s.linesize;
3838 dst->s.uvlinesize = src->s.uvlinesize;
3839 dst->s.first_field = src->s.first_field;
3841 dst->prev_poc_msb = src->prev_poc_msb;
3842 dst->prev_poc_lsb = src->prev_poc_lsb;
3843 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3844 dst->prev_frame_num = src->prev_frame_num;
3845 dst->short_ref_count = src->short_ref_count;
3847 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3848 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3849 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3850 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3852 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3853 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3857 * decodes a slice header.
3858 * This will also call MPV_common_init() and frame_start() as needed.
3860 * @param h h264context
3861 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3863 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3865 static int decode_slice_header(H264Context *h, H264Context *h0){
3866 MpegEncContext * const s = &h->s;
3867 MpegEncContext * const s0 = &h0->s;
3868 unsigned int first_mb_in_slice;
3869 unsigned int pps_id;
3870 int num_ref_idx_active_override_flag;
3871 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3872 unsigned int slice_type, tmp, i, j;
3873 int default_ref_list_done = 0;
3874 int last_pic_structure;
3876 s->dropable= h->nal_ref_idc == 0;
3878 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3879 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3880 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3882 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3883 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3886 first_mb_in_slice= get_ue_golomb(&s->gb);
3888 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3889 h0->current_slice = 0;
3890 if (!s0->first_field)
3891 s->current_picture_ptr= NULL;
3894 slice_type= get_ue_golomb(&s->gb);
3896 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3901 h->slice_type_fixed=1;
3903 h->slice_type_fixed=0;
3905 slice_type= slice_type_map[ slice_type ];
3906 if (slice_type == FF_I_TYPE
3907 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3908 default_ref_list_done = 1;
3910 h->slice_type= slice_type;
3911 h->slice_type_nos= slice_type & 3;
3913 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3914 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3915 av_log(h->s.avctx, AV_LOG_ERROR,
3916 "B picture before any references, skipping\n");
3920 pps_id= get_ue_golomb(&s->gb);
3921 if(pps_id>=MAX_PPS_COUNT){
3922 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3925 if(!h0->pps_buffers[pps_id]) {
3926 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3929 h->pps= *h0->pps_buffers[pps_id];
3931 if(!h0->sps_buffers[h->pps.sps_id]) {
3932 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3935 h->sps = *h0->sps_buffers[h->pps.sps_id];
3937 if(h == h0 && h->dequant_coeff_pps != pps_id){
3938 h->dequant_coeff_pps = pps_id;
3939 init_dequant_tables(h);
3942 s->mb_width= h->sps.mb_width;
3943 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3945 h->b_stride= s->mb_width*4;
3946 h->b8_stride= s->mb_width*2;
3948 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3949 if(h->sps.frame_mbs_only_flag)
3950 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3952 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3954 if (s->context_initialized
3955 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3957 return -1; // width / height changed during parallelized decoding
3961 if (!s->context_initialized) {
3963 return -1; // we cant (re-)initialize context during parallel decoding
3964 if (MPV_common_init(s) < 0)
3968 init_scan_tables(h);
3971 for(i = 1; i < s->avctx->thread_count; i++) {
3973 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3974 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3975 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3978 init_scan_tables(c);
3982 for(i = 0; i < s->avctx->thread_count; i++)
3983 if(context_init(h->thread_context[i]) < 0)
3986 s->avctx->width = s->width;
3987 s->avctx->height = s->height;
3988 s->avctx->sample_aspect_ratio= h->sps.sar;
3989 if(!s->avctx->sample_aspect_ratio.den)
3990 s->avctx->sample_aspect_ratio.den = 1;
3992 if(h->sps.timing_info_present_flag){
3993 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3994 if(h->x264_build > 0 && h->x264_build < 44)
3995 s->avctx->time_base.den *= 2;
3996 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3997 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4001 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4004 h->mb_aff_frame = 0;
4005 last_pic_structure = s0->picture_structure;
4006 if(h->sps.frame_mbs_only_flag){
4007 s->picture_structure= PICT_FRAME;
4009 if(get_bits1(&s->gb)) { //field_pic_flag
4010 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4012 s->picture_structure= PICT_FRAME;
4013 h->mb_aff_frame = h->sps.mb_aff;
4017 if(h0->current_slice == 0){
4018 /* See if we have a decoded first field looking for a pair... */
4019 if (s0->first_field) {
4020 assert(s0->current_picture_ptr);
4021 assert(s0->current_picture_ptr->data[0]);
4022 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
4024 /* figure out if we have a complementary field pair */
4025 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
4027 * Previous field is unmatched. Don't display it, but let it
4028 * remain for reference if marked as such.
4030 s0->current_picture_ptr = NULL;
4031 s0->first_field = FIELD_PICTURE;
4034 if (h->nal_ref_idc &&
4035 s0->current_picture_ptr->reference &&
4036 s0->current_picture_ptr->frame_num != h->frame_num) {
4038 * This and previous field were reference, but had
4039 * different frame_nums. Consider this field first in
4040 * pair. Throw away previous field except for reference
4043 s0->first_field = 1;
4044 s0->current_picture_ptr = NULL;
4047 /* Second field in complementary pair */
4048 s0->first_field = 0;
4053 /* Frame or first field in a potentially complementary pair */
4054 assert(!s0->current_picture_ptr);
4055 s0->first_field = FIELD_PICTURE;
4058 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
4059 s0->first_field = 0;
4066 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
4068 assert(s->mb_num == s->mb_width * s->mb_height);
4069 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
4070 first_mb_in_slice >= s->mb_num){
4071 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4074 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4075 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
4076 if (s->picture_structure == PICT_BOTTOM_FIELD)
4077 s->resync_mb_y = s->mb_y = s->mb_y + 1;
4078 assert(s->mb_y < s->mb_height);
4080 if(s->picture_structure==PICT_FRAME){
4081 h->curr_pic_num= h->frame_num;
4082 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4084 h->curr_pic_num= 2*h->frame_num + 1;
4085 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4088 if(h->nal_unit_type == NAL_IDR_SLICE){
4089 get_ue_golomb(&s->gb); /* idr_pic_id */
4092 if(h->sps.poc_type==0){
4093 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4095 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4096 h->delta_poc_bottom= get_se_golomb(&s->gb);
4100 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4101 h->delta_poc[0]= get_se_golomb(&s->gb);
4103 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4104 h->delta_poc[1]= get_se_golomb(&s->gb);
4109 if(h->pps.redundant_pic_cnt_present){
4110 h->redundant_pic_count= get_ue_golomb(&s->gb);
4113 //set defaults, might be overriden a few line later
4114 h->ref_count[0]= h->pps.ref_count[0];
4115 h->ref_count[1]= h->pps.ref_count[1];
4117 if(h->slice_type_nos != FF_I_TYPE){
4118 if(h->slice_type == FF_B_TYPE){
4119 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4120 if(FIELD_PICTURE && h->direct_spatial_mv_pred)
4121 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF + spatial direct mode is not implemented\n");
4123 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4125 if(num_ref_idx_active_override_flag){
4126 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4127 if(h->slice_type==FF_B_TYPE)
4128 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4130 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4131 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4132 h->ref_count[0]= h->ref_count[1]= 1;
4136 if(h->slice_type == FF_B_TYPE)
4143 if(!default_ref_list_done){
4144 fill_default_ref_list(h);
4147 if(decode_ref_pic_list_reordering(h) < 0)
4150 if( (h->pps.weighted_pred && (h->slice_type_nos == FF_P_TYPE ))
4151 || (h->pps.weighted_bipred_idc==1 && h->slice_type==FF_B_TYPE ) )
4152 pred_weight_table(h);
4153 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==FF_B_TYPE)
4154 implicit_weight_table(h);
4159 decode_ref_pic_marking(h0, &s->gb);
4162 fill_mbaff_ref_list(h);
4164 h->ref2frm[1][0]= h->ref2frm[1][1]= -1;
4167 h->ref2frm[j][ 1]= -1;
4169 h->ref2frm[j][i+2]= 4*h->ref_list[j][i].frame_num
4170 +(h->ref_list[j][i].reference&3);
4173 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
4174 tmp = get_ue_golomb(&s->gb);
4176 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4179 h->cabac_init_idc= tmp;
4182 h->last_qscale_diff = 0;
4183 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4185 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4189 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4190 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4191 //FIXME qscale / qp ... stuff
4192 if(h->slice_type == FF_SP_TYPE){
4193 get_bits1(&s->gb); /* sp_for_switch_flag */
4195 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4196 get_se_golomb(&s->gb); /* slice_qs_delta */
4199 h->deblocking_filter = 1;
4200 h->slice_alpha_c0_offset = 0;
4201 h->slice_beta_offset = 0;
4202 if( h->pps.deblocking_filter_parameters_present ) {
4203 tmp= get_ue_golomb(&s->gb);
4205 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4208 h->deblocking_filter= tmp;
4209 if(h->deblocking_filter < 2)
4210 h->deblocking_filter^= 1; // 1<->0
4212 if( h->deblocking_filter ) {
4213 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4214 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4218 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4219 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != FF_I_TYPE)
4220 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == FF_B_TYPE)
4221 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4222 h->deblocking_filter= 0;
4224 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4225 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4226 /* Cheat slightly for speed:
4227 Do not bother to deblock across slices. */
4228 h->deblocking_filter = 2;
4230 h0->max_contexts = 1;
4231 if(!h0->single_decode_warning) {
4232 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4233 h0->single_decode_warning = 1;
4236 return 1; // deblocking switched inside frame
4241 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4242 slice_group_change_cycle= get_bits(&s->gb, ?);
4245 h0->last_slice_type = slice_type;
4246 h->slice_num = ++h0->current_slice;
4248 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4249 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4251 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4252 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4254 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4256 av_get_pict_type_char(h->slice_type),
4257 pps_id, h->frame_num,
4258 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4259 h->ref_count[0], h->ref_count[1],
4261 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4263 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4264 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4274 static inline int get_level_prefix(GetBitContext *gb){
4278 OPEN_READER(re, gb);
4279 UPDATE_CACHE(re, gb);
4280 buf=GET_CACHE(re, gb);
4282 log= 32 - av_log2(buf);
4284 print_bin(buf>>(32-log), log);
4285 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4288 LAST_SKIP_BITS(re, gb, log);
4289 CLOSE_READER(re, gb);
4294 static inline int get_dct8x8_allowed(H264Context *h){
4297 if(!IS_SUB_8X8(h->sub_mb_type[i])
4298 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4305 * decodes a residual block.
4306 * @param n block index
4307 * @param scantable scantable
4308 * @param max_coeff number of coefficients in the block
4309 * @return <0 if an error occurred
4311 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4312 MpegEncContext * const s = &h->s;
4313 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4315 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4317 //FIXME put trailing_onex into the context
4319 if(n == CHROMA_DC_BLOCK_INDEX){
4320 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4321 total_coeff= coeff_token>>2;
4323 if(n == LUMA_DC_BLOCK_INDEX){
4324 total_coeff= pred_non_zero_count(h, 0);
4325 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4326 total_coeff= coeff_token>>2;
4328 total_coeff= pred_non_zero_count(h, n);
4329 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4330 total_coeff= coeff_token>>2;
4331 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4335 //FIXME set last_non_zero?
4339 if(total_coeff > (unsigned)max_coeff) {
4340 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4344 trailing_ones= coeff_token&3;
4345 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4346 assert(total_coeff<=16);
4348 for(i=0; i<trailing_ones; i++){
4349 level[i]= 1 - 2*get_bits1(gb);
4353 int level_code, mask;
4354 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4355 int prefix= get_level_prefix(gb);
4357 //first coefficient has suffix_length equal to 0 or 1
4358 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4360 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4362 level_code= (prefix<<suffix_length); //part
4363 }else if(prefix==14){
4365 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4367 level_code= prefix + get_bits(gb, 4); //part
4369 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4370 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4372 level_code += (1<<(prefix-3))-4096;
4375 if(trailing_ones < 3) level_code += 2;
4380 mask= -(level_code&1);
4381 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4384 //remaining coefficients have suffix_length > 0
4385 for(;i<total_coeff;i++) {
4386 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4387 prefix = get_level_prefix(gb);
4389 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4391 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4393 level_code += (1<<(prefix-3))-4096;
4395 mask= -(level_code&1);
4396 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4397 if(level_code > suffix_limit[suffix_length])
4402 if(total_coeff == max_coeff)
4405 if(n == CHROMA_DC_BLOCK_INDEX)
4406 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4408 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4411 coeff_num = zeros_left + total_coeff - 1;
4412 j = scantable[coeff_num];
4414 block[j] = level[0];
4415 for(i=1;i<total_coeff;i++) {
4418 else if(zeros_left < 7){
4419 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4421 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4423 zeros_left -= run_before;
4424 coeff_num -= 1 + run_before;
4425 j= scantable[ coeff_num ];
4430 block[j] = (level[0] * qmul[j] + 32)>>6;
4431 for(i=1;i<total_coeff;i++) {
4434 else if(zeros_left < 7){
4435 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4437 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4439 zeros_left -= run_before;
4440 coeff_num -= 1 + run_before;
4441 j= scantable[ coeff_num ];
4443 block[j]= (level[i] * qmul[j] + 32)>>6;
4448 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4455 static void predict_field_decoding_flag(H264Context *h){
4456 MpegEncContext * const s = &h->s;
4457 const int mb_xy= h->mb_xy;
4458 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4459 ? s->current_picture.mb_type[mb_xy-1]
4460 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4461 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4463 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4467 * decodes a P_SKIP or B_SKIP macroblock
4469 static void decode_mb_skip(H264Context *h){
4470 MpegEncContext * const s = &h->s;
4471 const int mb_xy= h->mb_xy;
4474 memset(h->non_zero_count[mb_xy], 0, 16);
4475 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4478 mb_type|= MB_TYPE_INTERLACED;
4480 if( h->slice_type == FF_B_TYPE )
4482 // just for fill_caches. pred_direct_motion will set the real mb_type
4483 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4485 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4486 pred_direct_motion(h, &mb_type);
4487 mb_type|= MB_TYPE_SKIP;
4492 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4494 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4495 pred_pskip_motion(h, &mx, &my);
4496 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4497 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4500 write_back_motion(h, mb_type);
4501 s->current_picture.mb_type[mb_xy]= mb_type;
4502 s->current_picture.qscale_table[mb_xy]= s->qscale;
4503 h->slice_table[ mb_xy ]= h->slice_num;
4504 h->prev_mb_skipped= 1;
4508 * decodes a macroblock
4509 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4511 static int decode_mb_cavlc(H264Context *h){
4512 MpegEncContext * const s = &h->s;
4514 int partition_count;
4515 unsigned int mb_type, cbp;
4516 int dct8x8_allowed= h->pps.transform_8x8_mode;
4518 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4520 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4522 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4523 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4525 if(h->slice_type_nos != FF_I_TYPE){
4526 if(s->mb_skip_run==-1)
4527 s->mb_skip_run= get_ue_golomb(&s->gb);
4529 if (s->mb_skip_run--) {
4530 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4531 if(s->mb_skip_run==0)
4532 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4534 predict_field_decoding_flag(h);
4541 if( (s->mb_y&1) == 0 )
4542 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4544 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4546 h->prev_mb_skipped= 0;
4548 mb_type= get_ue_golomb(&s->gb);
4549 if(h->slice_type == FF_B_TYPE){
4551 partition_count= b_mb_type_info[mb_type].partition_count;
4552 mb_type= b_mb_type_info[mb_type].type;
4555 goto decode_intra_mb;
4557 }else if(h->slice_type == FF_P_TYPE /*|| h->slice_type == FF_SP_TYPE */){
4559 partition_count= p_mb_type_info[mb_type].partition_count;
4560 mb_type= p_mb_type_info[mb_type].type;
4563 goto decode_intra_mb;
4566 assert(h->slice_type == FF_I_TYPE);
4569 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4573 cbp= i_mb_type_info[mb_type].cbp;
4574 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4575 mb_type= i_mb_type_info[mb_type].type;
4579 mb_type |= MB_TYPE_INTERLACED;
4581 h->slice_table[ mb_xy ]= h->slice_num;
4583 if(IS_INTRA_PCM(mb_type)){
4586 // We assume these blocks are very rare so we do not optimize it.
4587 align_get_bits(&s->gb);
4589 // The pixels are stored in the same order as levels in h->mb array.
4590 for(y=0; y<16; y++){
4591 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4592 for(x=0; x<16; x++){
4593 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4594 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4598 const int index= 256 + 4*(y&3) + 32*(y>>2);
4600 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4601 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4605 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4607 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4608 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4612 // In deblocking, the quantizer is 0
4613 s->current_picture.qscale_table[mb_xy]= 0;
4614 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
4615 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
4616 // All coeffs are present
4617 memset(h->non_zero_count[mb_xy], 16, 16);
4619 s->current_picture.mb_type[mb_xy]= mb_type;
4624 h->ref_count[0] <<= 1;
4625 h->ref_count[1] <<= 1;
4628 fill_caches(h, mb_type, 0);
4631 if(IS_INTRA(mb_type)){
4633 // init_top_left_availability(h);
4634 if(IS_INTRA4x4(mb_type)){
4637 if(dct8x8_allowed && get_bits1(&s->gb)){
4638 mb_type |= MB_TYPE_8x8DCT;
4642 // fill_intra4x4_pred_table(h);
4643 for(i=0; i<16; i+=di){
4644 int mode= pred_intra_mode(h, i);
4646 if(!get_bits1(&s->gb)){
4647 const int rem_mode= get_bits(&s->gb, 3);
4648 mode = rem_mode + (rem_mode >= mode);
4652 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4654 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4656 write_back_intra_pred_mode(h);
4657 if( check_intra4x4_pred_mode(h) < 0)
4660 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4661 if(h->intra16x16_pred_mode < 0)
4665 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4668 h->chroma_pred_mode= pred_mode;
4669 }else if(partition_count==4){
4670 int i, j, sub_partition_count[4], list, ref[2][4];
4672 if(h->slice_type == FF_B_TYPE){
4674 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4675 if(h->sub_mb_type[i] >=13){
4676 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4679 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4680 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4682 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4683 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4684 pred_direct_motion(h, &mb_type);
4685 h->ref_cache[0][scan8[4]] =
4686 h->ref_cache[1][scan8[4]] =
4687 h->ref_cache[0][scan8[12]] =
4688 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4691 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4693 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4694 if(h->sub_mb_type[i] >=4){
4695 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4698 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4699 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4703 for(list=0; list<h->list_count; list++){
4704 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4706 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4707 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4708 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4710 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4722 dct8x8_allowed = get_dct8x8_allowed(h);
4724 for(list=0; list<h->list_count; list++){
4726 if(IS_DIRECT(h->sub_mb_type[i])) {
4727 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4730 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4731 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4733 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4734 const int sub_mb_type= h->sub_mb_type[i];
4735 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4736 for(j=0; j<sub_partition_count[i]; j++){
4738 const int index= 4*i + block_width*j;
4739 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4740 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4741 mx += get_se_golomb(&s->gb);
4742 my += get_se_golomb(&s->gb);
4743 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4745 if(IS_SUB_8X8(sub_mb_type)){
4747 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4749 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4750 }else if(IS_SUB_8X4(sub_mb_type)){
4751 mv_cache[ 1 ][0]= mx;
4752 mv_cache[ 1 ][1]= my;
4753 }else if(IS_SUB_4X8(sub_mb_type)){
4754 mv_cache[ 8 ][0]= mx;
4755 mv_cache[ 8 ][1]= my;
4757 mv_cache[ 0 ][0]= mx;
4758 mv_cache[ 0 ][1]= my;
4761 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4767 }else if(IS_DIRECT(mb_type)){
4768 pred_direct_motion(h, &mb_type);
4769 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4771 int list, mx, my, i;
4772 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4773 if(IS_16X16(mb_type)){
4774 for(list=0; list<h->list_count; list++){
4776 if(IS_DIR(mb_type, 0, list)){
4777 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4778 if(val >= h->ref_count[list]){
4779 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4783 val= LIST_NOT_USED&0xFF;
4784 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4786 for(list=0; list<h->list_count; list++){
4788 if(IS_DIR(mb_type, 0, list)){
4789 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4790 mx += get_se_golomb(&s->gb);
4791 my += get_se_golomb(&s->gb);
4792 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4794 val= pack16to32(mx,my);
4797 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4800 else if(IS_16X8(mb_type)){
4801 for(list=0; list<h->list_count; list++){
4804 if(IS_DIR(mb_type, i, list)){
4805 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4806 if(val >= h->ref_count[list]){
4807 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4811 val= LIST_NOT_USED&0xFF;
4812 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4815 for(list=0; list<h->list_count; list++){
4818 if(IS_DIR(mb_type, i, list)){
4819 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4820 mx += get_se_golomb(&s->gb);
4821 my += get_se_golomb(&s->gb);
4822 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4824 val= pack16to32(mx,my);
4827 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4831 assert(IS_8X16(mb_type));
4832 for(list=0; list<h->list_count; list++){
4835 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4836 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4837 if(val >= h->ref_count[list]){
4838 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4842 val= LIST_NOT_USED&0xFF;
4843 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4846 for(list=0; list<h->list_count; list++){
4849 if(IS_DIR(mb_type, i, list)){
4850 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4851 mx += get_se_golomb(&s->gb);
4852 my += get_se_golomb(&s->gb);
4853 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4855 val= pack16to32(mx,my);
4858 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4864 if(IS_INTER(mb_type))
4865 write_back_motion(h, mb_type);
4867 if(!IS_INTRA16x16(mb_type)){
4868 cbp= get_ue_golomb(&s->gb);
4870 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4874 if(IS_INTRA4x4(mb_type))
4875 cbp= golomb_to_intra4x4_cbp[cbp];
4877 cbp= golomb_to_inter_cbp[cbp];
4881 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4882 if(get_bits1(&s->gb))
4883 mb_type |= MB_TYPE_8x8DCT;
4885 s->current_picture.mb_type[mb_xy]= mb_type;
4887 if(cbp || IS_INTRA16x16(mb_type)){
4888 int i8x8, i4x4, chroma_idx;
4890 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4891 const uint8_t *scan, *scan8x8, *dc_scan;
4893 // fill_non_zero_count_cache(h);
4895 if(IS_INTERLACED(mb_type)){
4896 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4897 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4898 dc_scan= luma_dc_field_scan;
4900 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4901 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4902 dc_scan= luma_dc_zigzag_scan;
4905 dquant= get_se_golomb(&s->gb);
4907 if( dquant > 25 || dquant < -26 ){
4908 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4912 s->qscale += dquant;
4913 if(((unsigned)s->qscale) > 51){
4914 if(s->qscale<0) s->qscale+= 52;
4915 else s->qscale-= 52;
4918 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4919 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4920 if(IS_INTRA16x16(mb_type)){
4921 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4922 return -1; //FIXME continue if partitioned and other return -1 too
4925 assert((cbp&15) == 0 || (cbp&15) == 15);
4928 for(i8x8=0; i8x8<4; i8x8++){
4929 for(i4x4=0; i4x4<4; i4x4++){
4930 const int index= i4x4 + 4*i8x8;
4931 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4937 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4940 for(i8x8=0; i8x8<4; i8x8++){
4941 if(cbp & (1<<i8x8)){
4942 if(IS_8x8DCT(mb_type)){
4943 DCTELEM *buf = &h->mb[64*i8x8];
4945 for(i4x4=0; i4x4<4; i4x4++){
4946 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4947 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4950 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4951 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4953 for(i4x4=0; i4x4<4; i4x4++){
4954 const int index= i4x4 + 4*i8x8;
4956 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4962 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4963 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4969 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4970 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4976 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4977 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4978 for(i4x4=0; i4x4<4; i4x4++){
4979 const int index= 16 + 4*chroma_idx + i4x4;
4980 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4986 uint8_t * const nnz= &h->non_zero_count_cache[0];
4987 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4988 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4991 uint8_t * const nnz= &h->non_zero_count_cache[0];
4992 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4993 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4994 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4996 s->current_picture.qscale_table[mb_xy]= s->qscale;
4997 write_back_non_zero_count(h);
5000 h->ref_count[0] >>= 1;
5001 h->ref_count[1] >>= 1;
5007 static int decode_cabac_field_decoding_flag(H264Context *h) {
5008 MpegEncContext * const s = &h->s;
5009 const int mb_x = s->mb_x;
5010 const int mb_y = s->mb_y & ~1;
5011 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5012 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5014 unsigned int ctx = 0;
5016 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5019 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5023 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5026 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5027 uint8_t *state= &h->cabac_state[ctx_base];
5031 MpegEncContext * const s = &h->s;
5032 const int mba_xy = h->left_mb_xy[0];
5033 const int mbb_xy = h->top_mb_xy;
5035 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5037 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5039 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5040 return 0; /* I4x4 */
5043 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5044 return 0; /* I4x4 */
5047 if( get_cabac_terminate( &h->cabac ) )
5048 return 25; /* PCM */
5050 mb_type = 1; /* I16x16 */
5051 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5052 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5053 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5054 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5055 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5059 static int decode_cabac_mb_type( H264Context *h ) {
5060 MpegEncContext * const s = &h->s;
5062 if( h->slice_type == FF_I_TYPE ) {
5063 return decode_cabac_intra_mb_type(h, 3, 1);
5064 } else if( h->slice_type == FF_P_TYPE ) {
5065 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5067 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5068 /* P_L0_D16x16, P_8x8 */
5069 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5071 /* P_L0_D8x16, P_L0_D16x8 */
5072 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5075 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5077 } else if( h->slice_type == FF_B_TYPE ) {
5078 const int mba_xy = h->left_mb_xy[0];
5079 const int mbb_xy = h->top_mb_xy;
5083 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5085 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5088 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5089 return 0; /* B_Direct_16x16 */
5091 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5092 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5095 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5096 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5097 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5098 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5100 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5101 else if( bits == 13 ) {
5102 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5103 } else if( bits == 14 )
5104 return 11; /* B_L1_L0_8x16 */
5105 else if( bits == 15 )
5106 return 22; /* B_8x8 */
5108 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5109 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5111 /* TODO SI/SP frames? */
5116 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5117 MpegEncContext * const s = &h->s;
5121 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5122 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5125 && h->slice_table[mba_xy] == h->slice_num
5126 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5127 mba_xy += s->mb_stride;
5129 mbb_xy = mb_xy - s->mb_stride;
5131 && h->slice_table[mbb_xy] == h->slice_num
5132 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5133 mbb_xy -= s->mb_stride;
5135 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5137 int mb_xy = h->mb_xy;
5139 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5142 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5144 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5147 if( h->slice_type == FF_B_TYPE )
5149 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5152 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5155 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5158 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5159 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5160 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5162 if( mode >= pred_mode )
5168 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5169 const int mba_xy = h->left_mb_xy[0];
5170 const int mbb_xy = h->top_mb_xy;
5174 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5175 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5178 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5181 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5184 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5186 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5192 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5193 int cbp_b, cbp_a, ctx, cbp = 0;
5195 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5196 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5198 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5199 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5200 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5201 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5202 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5203 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5204 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5205 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5208 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5212 cbp_a = (h->left_cbp>>4)&0x03;
5213 cbp_b = (h-> top_cbp>>4)&0x03;
5216 if( cbp_a > 0 ) ctx++;
5217 if( cbp_b > 0 ) ctx += 2;
5218 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5222 if( cbp_a == 2 ) ctx++;
5223 if( cbp_b == 2 ) ctx += 2;
5224 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5226 static int decode_cabac_mb_dqp( H264Context *h) {
5230 if( h->last_qscale_diff != 0 )
5233 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5239 if(val > 102) //prevent infinite loop
5246 return -(val + 1)/2;
5248 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5249 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5251 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5253 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5257 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5259 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5260 return 0; /* B_Direct_8x8 */
5261 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5262 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5264 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5265 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5266 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5269 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5270 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5274 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5275 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5278 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5279 int refa = h->ref_cache[list][scan8[n] - 1];
5280 int refb = h->ref_cache[list][scan8[n] - 8];
5284 if( h->slice_type == FF_B_TYPE) {
5285 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5287 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5296 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5302 if(ref >= 32 /*h->ref_list[list]*/){
5303 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5304 return 0; //FIXME we should return -1 and check the return everywhere
5310 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5311 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5312 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5313 int ctxbase = (l == 0) ? 40 : 47;
5318 else if( amvd > 32 )
5323 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5328 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5336 while( get_cabac_bypass( &h->cabac ) ) {
5340 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5345 if( get_cabac_bypass( &h->cabac ) )
5349 return get_cabac_bypass_sign( &h->cabac, -mvd );
5352 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5358 nza = h->left_cbp&0x100;
5359 nzb = h-> top_cbp&0x100;
5361 nza = (h->left_cbp>>(6+idx))&0x01;
5362 nzb = (h-> top_cbp>>(6+idx))&0x01;
5366 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5367 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5369 assert(cat == 1 || cat == 2);
5370 nza = h->non_zero_count_cache[scan8[idx] - 1];
5371 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5381 return ctx + 4 * cat;
5384 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5385 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5386 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5387 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5388 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5391 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5392 static const int significant_coeff_flag_offset[2][6] = {
5393 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5394 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5396 static const int last_coeff_flag_offset[2][6] = {
5397 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5398 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5400 static const int coeff_abs_level_m1_offset[6] = {
5401 227+0, 227+10, 227+20, 227+30, 227+39, 426
5403 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5404 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5405 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5406 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5407 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5408 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5409 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5410 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5411 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5413 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5414 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5415 * map node ctx => cabac ctx for level=1 */
5416 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5417 /* map node ctx => cabac ctx for level>1 */
5418 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5419 static const uint8_t coeff_abs_level_transition[2][8] = {
5420 /* update node ctx after decoding a level=1 */
5421 { 1, 2, 3, 3, 4, 5, 6, 7 },
5422 /* update node ctx after decoding a level>1 */
5423 { 4, 4, 4, 4, 5, 6, 7, 7 }
5429 int coeff_count = 0;
5432 uint8_t *significant_coeff_ctx_base;
5433 uint8_t *last_coeff_ctx_base;
5434 uint8_t *abs_level_m1_ctx_base;
5437 #define CABAC_ON_STACK
5439 #ifdef CABAC_ON_STACK
5442 cc.range = h->cabac.range;
5443 cc.low = h->cabac.low;
5444 cc.bytestream= h->cabac.bytestream;
5446 #define CC &h->cabac
5450 /* cat: 0-> DC 16x16 n = 0
5451 * 1-> AC 16x16 n = luma4x4idx
5452 * 2-> Luma4x4 n = luma4x4idx
5453 * 3-> DC Chroma n = iCbCr
5454 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5455 * 5-> Luma8x8 n = 4 * luma8x8idx
5458 /* read coded block flag */
5459 if( is_dc || cat != 5 ) {
5460 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5463 h->non_zero_count_cache[scan8[16+n]] = 0;
5465 h->non_zero_count_cache[scan8[n]] = 0;
5468 #ifdef CABAC_ON_STACK
5469 h->cabac.range = cc.range ;
5470 h->cabac.low = cc.low ;
5471 h->cabac.bytestream= cc.bytestream;
5477 significant_coeff_ctx_base = h->cabac_state
5478 + significant_coeff_flag_offset[MB_FIELD][cat];
5479 last_coeff_ctx_base = h->cabac_state
5480 + last_coeff_flag_offset[MB_FIELD][cat];
5481 abs_level_m1_ctx_base = h->cabac_state
5482 + coeff_abs_level_m1_offset[cat];
5484 if( !is_dc && cat == 5 ) {
5485 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5486 for(last= 0; last < coefs; last++) { \
5487 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5488 if( get_cabac( CC, sig_ctx )) { \
5489 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5490 index[coeff_count++] = last; \
5491 if( get_cabac( CC, last_ctx ) ) { \
5497 if( last == max_coeff -1 ) {\
5498 index[coeff_count++] = last;\
5500 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5501 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5502 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5504 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5506 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5508 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5511 assert(coeff_count > 0);
5515 h->cbp_table[h->mb_xy] |= 0x100;
5517 h->cbp_table[h->mb_xy] |= 0x40 << n;
5520 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5522 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5524 assert( cat == 1 || cat == 2 );
5525 h->non_zero_count_cache[scan8[n]] = coeff_count;
5529 while( coeff_count-- ) {
5530 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5532 int j= scantable[index[coeff_count]];
5534 if( get_cabac( CC, ctx ) == 0 ) {
5535 node_ctx = coeff_abs_level_transition[0][node_ctx];
5537 block[j] = get_cabac_bypass_sign( CC, -1);
5539 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5543 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5544 node_ctx = coeff_abs_level_transition[1][node_ctx];
5546 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5550 if( coeff_abs >= 15 ) {
5552 while( get_cabac_bypass( CC ) ) {
5558 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5564 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5566 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5570 #ifdef CABAC_ON_STACK
5571 h->cabac.range = cc.range ;
5572 h->cabac.low = cc.low ;
5573 h->cabac.bytestream= cc.bytestream;
5578 #ifndef CONFIG_SMALL
5579 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5580 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5583 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5584 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5588 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5590 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5592 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5593 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5597 static inline void compute_mb_neighbors(H264Context *h)
5599 MpegEncContext * const s = &h->s;
5600 const int mb_xy = h->mb_xy;
5601 h->top_mb_xy = mb_xy - s->mb_stride;
5602 h->left_mb_xy[0] = mb_xy - 1;
5604 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5605 const int top_pair_xy = pair_xy - s->mb_stride;
5606 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5607 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5608 const int curr_mb_frame_flag = !MB_FIELD;
5609 const int bottom = (s->mb_y & 1);
5611 ? !curr_mb_frame_flag // bottom macroblock
5612 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5614 h->top_mb_xy -= s->mb_stride;
5616 if (left_mb_frame_flag != curr_mb_frame_flag) {
5617 h->left_mb_xy[0] = pair_xy - 1;
5619 } else if (FIELD_PICTURE) {
5620 h->top_mb_xy -= s->mb_stride;
5626 * decodes a macroblock
5627 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5629 static int decode_mb_cabac(H264Context *h) {
5630 MpegEncContext * const s = &h->s;
5632 int mb_type, partition_count, cbp = 0;
5633 int dct8x8_allowed= h->pps.transform_8x8_mode;
5635 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5637 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5639 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5640 if( h->slice_type_nos != FF_I_TYPE ) {
5642 /* a skipped mb needs the aff flag from the following mb */
5643 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5644 predict_field_decoding_flag(h);
5645 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5646 skip = h->next_mb_skipped;
5648 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5649 /* read skip flags */
5651 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5652 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5653 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5654 if(h->next_mb_skipped)
5655 predict_field_decoding_flag(h);
5657 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5662 h->cbp_table[mb_xy] = 0;
5663 h->chroma_pred_mode_table[mb_xy] = 0;
5664 h->last_qscale_diff = 0;
5671 if( (s->mb_y&1) == 0 )
5673 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5675 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5677 h->prev_mb_skipped = 0;
5679 compute_mb_neighbors(h);
5680 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5681 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5685 if( h->slice_type == FF_B_TYPE ) {
5687 partition_count= b_mb_type_info[mb_type].partition_count;
5688 mb_type= b_mb_type_info[mb_type].type;
5691 goto decode_intra_mb;
5693 } else if( h->slice_type == FF_P_TYPE ) {
5695 partition_count= p_mb_type_info[mb_type].partition_count;
5696 mb_type= p_mb_type_info[mb_type].type;
5699 goto decode_intra_mb;
5702 assert(h->slice_type == FF_I_TYPE);
5704 partition_count = 0;
5705 cbp= i_mb_type_info[mb_type].cbp;
5706 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5707 mb_type= i_mb_type_info[mb_type].type;
5710 mb_type |= MB_TYPE_INTERLACED;
5712 h->slice_table[ mb_xy ]= h->slice_num;
5714 if(IS_INTRA_PCM(mb_type)) {
5718 // We assume these blocks are very rare so we do not optimize it.
5719 // FIXME The two following lines get the bitstream position in the cabac
5720 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5721 ptr= h->cabac.bytestream;
5722 if(h->cabac.low&0x1) ptr--;
5724 if(h->cabac.low&0x1FF) ptr--;
5727 // The pixels are stored in the same order as levels in h->mb array.
5728 for(y=0; y<16; y++){
5729 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5730 for(x=0; x<16; x++){
5731 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5732 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5736 const int index= 256 + 4*(y&3) + 32*(y>>2);
5738 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5739 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5743 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5745 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5746 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5750 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5752 // All blocks are present
5753 h->cbp_table[mb_xy] = 0x1ef;
5754 h->chroma_pred_mode_table[mb_xy] = 0;
5755 // In deblocking, the quantizer is 0
5756 s->current_picture.qscale_table[mb_xy]= 0;
5757 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
5758 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
5759 // All coeffs are present
5760 memset(h->non_zero_count[mb_xy], 16, 16);
5761 s->current_picture.mb_type[mb_xy]= mb_type;
5762 h->last_qscale_diff = 0;
5767 h->ref_count[0] <<= 1;
5768 h->ref_count[1] <<= 1;
5771 fill_caches(h, mb_type, 0);
5773 if( IS_INTRA( mb_type ) ) {
5775 if( IS_INTRA4x4( mb_type ) ) {
5776 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5777 mb_type |= MB_TYPE_8x8DCT;
5778 for( i = 0; i < 16; i+=4 ) {
5779 int pred = pred_intra_mode( h, i );
5780 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5781 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5784 for( i = 0; i < 16; i++ ) {
5785 int pred = pred_intra_mode( h, i );
5786 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5788 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5791 write_back_intra_pred_mode(h);
5792 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5794 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5795 if( h->intra16x16_pred_mode < 0 ) return -1;
5797 h->chroma_pred_mode_table[mb_xy] =
5798 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5800 pred_mode= check_intra_pred_mode( h, pred_mode );
5801 if( pred_mode < 0 ) return -1;
5802 h->chroma_pred_mode= pred_mode;
5803 } else if( partition_count == 4 ) {
5804 int i, j, sub_partition_count[4], list, ref[2][4];
5806 if( h->slice_type == FF_B_TYPE ) {
5807 for( i = 0; i < 4; i++ ) {
5808 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5809 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5810 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5812 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5813 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5814 pred_direct_motion(h, &mb_type);
5815 h->ref_cache[0][scan8[4]] =
5816 h->ref_cache[1][scan8[4]] =
5817 h->ref_cache[0][scan8[12]] =
5818 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5819 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5820 for( i = 0; i < 4; i++ )
5821 if( IS_DIRECT(h->sub_mb_type[i]) )
5822 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5826 for( i = 0; i < 4; i++ ) {
5827 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5828 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5829 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5833 for( list = 0; list < h->list_count; list++ ) {
5834 for( i = 0; i < 4; i++ ) {
5835 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5836 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5837 if( h->ref_count[list] > 1 )
5838 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5844 h->ref_cache[list][ scan8[4*i]+1 ]=
5845 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5850 dct8x8_allowed = get_dct8x8_allowed(h);
5852 for(list=0; list<h->list_count; list++){
5854 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5855 if(IS_DIRECT(h->sub_mb_type[i])){
5856 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5860 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5861 const int sub_mb_type= h->sub_mb_type[i];
5862 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5863 for(j=0; j<sub_partition_count[i]; j++){
5866 const int index= 4*i + block_width*j;
5867 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5868 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5869 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5871 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5872 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5873 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5875 if(IS_SUB_8X8(sub_mb_type)){
5877 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5879 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5882 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5884 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5885 }else if(IS_SUB_8X4(sub_mb_type)){
5886 mv_cache[ 1 ][0]= mx;
5887 mv_cache[ 1 ][1]= my;
5889 mvd_cache[ 1 ][0]= mx - mpx;
5890 mvd_cache[ 1 ][1]= my - mpy;
5891 }else if(IS_SUB_4X8(sub_mb_type)){
5892 mv_cache[ 8 ][0]= mx;
5893 mv_cache[ 8 ][1]= my;
5895 mvd_cache[ 8 ][0]= mx - mpx;
5896 mvd_cache[ 8 ][1]= my - mpy;
5898 mv_cache[ 0 ][0]= mx;
5899 mv_cache[ 0 ][1]= my;
5901 mvd_cache[ 0 ][0]= mx - mpx;
5902 mvd_cache[ 0 ][1]= my - mpy;
5905 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5906 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5907 p[0] = p[1] = p[8] = p[9] = 0;
5908 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5912 } else if( IS_DIRECT(mb_type) ) {
5913 pred_direct_motion(h, &mb_type);
5914 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5915 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5916 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5918 int list, mx, my, i, mpx, mpy;
5919 if(IS_16X16(mb_type)){
5920 for(list=0; list<h->list_count; list++){
5921 if(IS_DIR(mb_type, 0, list)){
5922 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5923 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5925 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5927 for(list=0; list<h->list_count; list++){
5928 if(IS_DIR(mb_type, 0, list)){
5929 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5931 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5932 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5933 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5935 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5936 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5938 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5941 else if(IS_16X8(mb_type)){
5942 for(list=0; list<h->list_count; list++){
5944 if(IS_DIR(mb_type, i, list)){
5945 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5946 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5948 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5951 for(list=0; list<h->list_count; list++){
5953 if(IS_DIR(mb_type, i, list)){
5954 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5955 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5956 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5957 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5959 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5960 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5962 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5963 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5968 assert(IS_8X16(mb_type));
5969 for(list=0; list<h->list_count; list++){
5971 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5972 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5973 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5975 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5978 for(list=0; list<h->list_count; list++){
5980 if(IS_DIR(mb_type, i, list)){
5981 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5982 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5983 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5985 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5986 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5987 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5989 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5990 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5997 if( IS_INTER( mb_type ) ) {
5998 h->chroma_pred_mode_table[mb_xy] = 0;
5999 write_back_motion( h, mb_type );
6002 if( !IS_INTRA16x16( mb_type ) ) {
6003 cbp = decode_cabac_mb_cbp_luma( h );
6004 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6007 h->cbp_table[mb_xy] = h->cbp = cbp;
6009 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6010 if( decode_cabac_mb_transform_size( h ) )
6011 mb_type |= MB_TYPE_8x8DCT;
6013 s->current_picture.mb_type[mb_xy]= mb_type;
6015 if( cbp || IS_INTRA16x16( mb_type ) ) {
6016 const uint8_t *scan, *scan8x8, *dc_scan;
6017 const uint32_t *qmul;
6020 if(IS_INTERLACED(mb_type)){
6021 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6022 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6023 dc_scan= luma_dc_field_scan;
6025 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6026 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6027 dc_scan= luma_dc_zigzag_scan;
6030 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6031 if( dqp == INT_MIN ){
6032 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6036 if(((unsigned)s->qscale) > 51){
6037 if(s->qscale<0) s->qscale+= 52;
6038 else s->qscale-= 52;
6040 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
6041 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
6043 if( IS_INTRA16x16( mb_type ) ) {
6045 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6046 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
6049 qmul = h->dequant4_coeff[0][s->qscale];
6050 for( i = 0; i < 16; i++ ) {
6051 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6052 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
6055 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6059 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6060 if( cbp & (1<<i8x8) ) {
6061 if( IS_8x8DCT(mb_type) ) {
6062 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6063 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
6065 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
6066 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6067 const int index = 4*i8x8 + i4x4;
6068 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6070 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
6071 //STOP_TIMER("decode_residual")
6075 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6076 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6083 for( c = 0; c < 2; c++ ) {
6084 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6085 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
6091 for( c = 0; c < 2; c++ ) {
6092 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
6093 for( i = 0; i < 4; i++ ) {
6094 const int index = 16 + 4 * c + i;
6095 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6096 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
6100 uint8_t * const nnz= &h->non_zero_count_cache[0];
6101 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6102 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6105 uint8_t * const nnz= &h->non_zero_count_cache[0];
6106 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6107 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6108 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6109 h->last_qscale_diff = 0;
6112 s->current_picture.qscale_table[mb_xy]= s->qscale;
6113 write_back_non_zero_count(h);
6116 h->ref_count[0] >>= 1;
6117 h->ref_count[1] >>= 1;
6124 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6126 const int index_a = qp + h->slice_alpha_c0_offset;
6127 const int alpha = (alpha_table+52)[index_a];
6128 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6133 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6134 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6136 /* 16px edge length, because bS=4 is triggered by being at
6137 * the edge of an intra MB, so all 4 bS are the same */
6138 for( d = 0; d < 16; d++ ) {
6139 const int p0 = pix[-1];
6140 const int p1 = pix[-2];
6141 const int p2 = pix[-3];
6143 const int q0 = pix[0];
6144 const int q1 = pix[1];
6145 const int q2 = pix[2];
6147 if( FFABS( p0 - q0 ) < alpha &&
6148 FFABS( p1 - p0 ) < beta &&
6149 FFABS( q1 - q0 ) < beta ) {
6151 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6152 if( FFABS( p2 - p0 ) < beta)
6154 const int p3 = pix[-4];
6156 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6157 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6158 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6161 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6163 if( FFABS( q2 - q0 ) < beta)
6165 const int q3 = pix[3];
6167 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6168 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6169 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6172 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6176 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6177 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6179 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6185 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6187 const int index_a = qp + h->slice_alpha_c0_offset;
6188 const int alpha = (alpha_table+52)[index_a];
6189 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6194 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6195 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6197 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6201 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6203 for( i = 0; i < 16; i++, pix += stride) {
6209 int bS_index = (i >> 1);
6212 bS_index |= (i & 1);
6215 if( bS[bS_index] == 0 ) {
6219 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6220 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6221 alpha = (alpha_table+52)[index_a];
6222 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6224 if( bS[bS_index] < 4 ) {
6225 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6226 const int p0 = pix[-1];
6227 const int p1 = pix[-2];
6228 const int p2 = pix[-3];
6229 const int q0 = pix[0];
6230 const int q1 = pix[1];
6231 const int q2 = pix[2];
6233 if( FFABS( p0 - q0 ) < alpha &&
6234 FFABS( p1 - p0 ) < beta &&
6235 FFABS( q1 - q0 ) < beta ) {
6239 if( FFABS( p2 - p0 ) < beta ) {
6240 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6243 if( FFABS( q2 - q0 ) < beta ) {
6244 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6248 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6249 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6250 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6251 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6254 const int p0 = pix[-1];
6255 const int p1 = pix[-2];
6256 const int p2 = pix[-3];
6258 const int q0 = pix[0];
6259 const int q1 = pix[1];
6260 const int q2 = pix[2];
6262 if( FFABS( p0 - q0 ) < alpha &&
6263 FFABS( p1 - p0 ) < beta &&
6264 FFABS( q1 - q0 ) < beta ) {
6266 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6267 if( FFABS( p2 - p0 ) < beta)
6269 const int p3 = pix[-4];
6271 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6272 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6273 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6276 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6278 if( FFABS( q2 - q0 ) < beta)
6280 const int q3 = pix[3];
6282 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6283 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6284 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6287 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6291 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6292 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6294 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6299 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6301 for( i = 0; i < 8; i++, pix += stride) {
6309 if( bS[bS_index] == 0 ) {
6313 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6314 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6315 alpha = (alpha_table+52)[index_a];
6316 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6318 if( bS[bS_index] < 4 ) {
6319 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6320 const int p0 = pix[-1];
6321 const int p1 = pix[-2];
6322 const int q0 = pix[0];
6323 const int q1 = pix[1];
6325 if( FFABS( p0 - q0 ) < alpha &&
6326 FFABS( p1 - p0 ) < beta &&
6327 FFABS( q1 - q0 ) < beta ) {
6328 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6330 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6331 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6332 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6335 const int p0 = pix[-1];
6336 const int p1 = pix[-2];
6337 const int q0 = pix[0];
6338 const int q1 = pix[1];
6340 if( FFABS( p0 - q0 ) < alpha &&
6341 FFABS( p1 - p0 ) < beta &&
6342 FFABS( q1 - q0 ) < beta ) {
6344 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6345 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6346 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6352 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6354 const int index_a = qp + h->slice_alpha_c0_offset;
6355 const int alpha = (alpha_table+52)[index_a];
6356 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6357 const int pix_next = stride;
6362 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6363 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6365 /* 16px edge length, see filter_mb_edgev */
6366 for( d = 0; d < 16; d++ ) {
6367 const int p0 = pix[-1*pix_next];
6368 const int p1 = pix[-2*pix_next];
6369 const int p2 = pix[-3*pix_next];
6370 const int q0 = pix[0];
6371 const int q1 = pix[1*pix_next];
6372 const int q2 = pix[2*pix_next];
6374 if( FFABS( p0 - q0 ) < alpha &&
6375 FFABS( p1 - p0 ) < beta &&
6376 FFABS( q1 - q0 ) < beta ) {
6378 const int p3 = pix[-4*pix_next];
6379 const int q3 = pix[ 3*pix_next];
6381 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6382 if( FFABS( p2 - p0 ) < beta) {
6384 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6385 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6386 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6389 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6391 if( FFABS( q2 - q0 ) < beta) {
6393 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6394 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6395 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6398 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6402 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6403 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6405 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6412 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6414 const int index_a = qp + h->slice_alpha_c0_offset;
6415 const int alpha = (alpha_table+52)[index_a];
6416 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6421 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6422 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6424 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6428 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6429 MpegEncContext * const s = &h->s;
6430 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6432 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6436 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6438 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6439 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6440 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6443 assert(!FRAME_MBAFF);
6445 mb_type = s->current_picture.mb_type[mb_xy];
6446 qp = s->current_picture.qscale_table[mb_xy];
6447 qp0 = s->current_picture.qscale_table[mb_xy-1];
6448 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6449 qpc = get_chroma_qp( h, 0, qp );
6450 qpc0 = get_chroma_qp( h, 0, qp0 );
6451 qpc1 = get_chroma_qp( h, 0, qp1 );
6452 qp0 = (qp + qp0 + 1) >> 1;
6453 qp1 = (qp + qp1 + 1) >> 1;
6454 qpc0 = (qpc + qpc0 + 1) >> 1;
6455 qpc1 = (qpc + qpc1 + 1) >> 1;
6456 qp_thresh = 15 - h->slice_alpha_c0_offset;
6457 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6458 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6461 if( IS_INTRA(mb_type) ) {
6462 int16_t bS4[4] = {4,4,4,4};
6463 int16_t bS3[4] = {3,3,3,3};
6464 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6465 if( IS_8x8DCT(mb_type) ) {
6466 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6467 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6468 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6469 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6471 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6472 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6473 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6474 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6475 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6476 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6477 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6478 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6480 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6481 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6482 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6483 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6484 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6485 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6486 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6487 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6490 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6491 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6493 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6495 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6497 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6498 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6499 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6500 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6502 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6503 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6504 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6505 (h->slice_type == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6507 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6508 bSv[0][0] = 0x0004000400040004ULL;
6509 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6510 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6512 #define FILTER(hv,dir,edge)\
6513 if(bSv[dir][edge]) {\
6514 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6516 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6517 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6523 } else if( IS_8x8DCT(mb_type) ) {
6542 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6543 MpegEncContext * const s = &h->s;
6544 const int mb_xy= mb_x + mb_y*s->mb_stride;
6545 const int mb_type = s->current_picture.mb_type[mb_xy];
6546 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6547 int first_vertical_edge_done = 0;
6550 //for sufficiently low qp, filtering wouldn't do anything
6551 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6553 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6554 int qp = s->current_picture.qscale_table[mb_xy];
6556 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6557 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6563 // left mb is in picture
6564 && h->slice_table[mb_xy-1] != 255
6565 // and current and left pair do not have the same interlaced type
6566 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6567 // and left mb is in the same slice if deblocking_filter == 2
6568 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6569 /* First vertical edge is different in MBAFF frames
6570 * There are 8 different bS to compute and 2 different Qp
6572 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6573 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6578 int mb_qp, mbn0_qp, mbn1_qp;
6580 first_vertical_edge_done = 1;
6582 if( IS_INTRA(mb_type) )
6583 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6585 for( i = 0; i < 8; i++ ) {
6586 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6588 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6590 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6591 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6592 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6599 mb_qp = s->current_picture.qscale_table[mb_xy];
6600 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6601 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6602 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6603 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6604 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6605 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6606 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6607 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6608 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6609 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6610 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6611 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6614 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6615 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6616 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6617 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6618 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6620 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6621 for( dir = 0; dir < 2; dir++ )
6624 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6625 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6626 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6628 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6629 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6630 // how often to recheck mv-based bS when iterating between edges
6631 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6632 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6633 // how often to recheck mv-based bS when iterating along each edge
6634 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6636 if (first_vertical_edge_done) {
6638 first_vertical_edge_done = 0;
6641 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6644 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6645 && !IS_INTERLACED(mb_type)
6646 && IS_INTERLACED(mbm_type)
6648 // This is a special case in the norm where the filtering must
6649 // be done twice (one each of the field) even if we are in a
6650 // frame macroblock.
6652 static const int nnz_idx[4] = {4,5,6,3};
6653 unsigned int tmp_linesize = 2 * linesize;
6654 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6655 int mbn_xy = mb_xy - 2 * s->mb_stride;
6660 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6661 if( IS_INTRA(mb_type) ||
6662 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6663 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6665 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6666 for( i = 0; i < 4; i++ ) {
6667 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6668 mbn_nnz[nnz_idx[i]] != 0 )
6674 // Do not use s->qscale as luma quantizer because it has not the same
6675 // value in IPCM macroblocks.
6676 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6677 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6678 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6679 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6680 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6681 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6682 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6683 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6690 for( edge = start; edge < edges; edge++ ) {
6691 /* mbn_xy: neighbor macroblock */
6692 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6693 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6697 if( (edge&1) && IS_8x8DCT(mb_type) )
6700 if( IS_INTRA(mb_type) ||
6701 IS_INTRA(mbn_type) ) {
6704 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6705 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6714 bS[0] = bS[1] = bS[2] = bS[3] = value;
6719 if( edge & mask_edge ) {
6720 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6723 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6724 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6727 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6728 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6729 int bn_idx= b_idx - (dir ? 8:1);
6731 int xn= h->slice_type == FF_B_TYPE && h->ref2frm[0][h->ref_cache[0][b_idx]+2] != h->ref2frm[0][h->ref_cache[0][bn_idx]+2];
6733 for( l = 0; !v && l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
6735 v |= h->ref2frm[l][h->ref_cache[l][b_idx]+2] != h->ref2frm[ln][h->ref_cache[ln][bn_idx]+2] ||
6736 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6737 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6739 bS[0] = bS[1] = bS[2] = bS[3] = v;
6745 for( i = 0; i < 4; i++ ) {
6746 int x = dir == 0 ? edge : i;
6747 int y = dir == 0 ? i : edge;
6748 int b_idx= 8 + 4 + x + 8*y;
6749 int bn_idx= b_idx - (dir ? 8:1);
6751 if( h->non_zero_count_cache[b_idx] != 0 ||
6752 h->non_zero_count_cache[bn_idx] != 0 ) {
6757 int xn= h->slice_type == FF_B_TYPE && h->ref2frm[0][h->ref_cache[0][b_idx]+2] != h->ref2frm[0][h->ref_cache[0][bn_idx]+2];
6759 for( l = 0; l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
6761 if( h->ref2frm[l][h->ref_cache[l][b_idx]+2] != h->ref2frm[ln][h->ref_cache[ln][bn_idx]+2] ||
6762 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6763 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6771 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6776 // Do not use s->qscale as luma quantizer because it has not the same
6777 // value in IPCM macroblocks.
6778 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6779 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6780 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6781 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6783 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6784 if( (edge&1) == 0 ) {
6785 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6786 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6787 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6788 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6791 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6792 if( (edge&1) == 0 ) {
6793 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6794 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6795 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6796 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6803 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6804 MpegEncContext * const s = &h->s;
6805 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6809 if( h->pps.cabac ) {
6813 align_get_bits( &s->gb );
6816 ff_init_cabac_states( &h->cabac);
6817 ff_init_cabac_decoder( &h->cabac,
6818 s->gb.buffer + get_bits_count(&s->gb)/8,
6819 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6820 /* calculate pre-state */
6821 for( i= 0; i < 460; i++ ) {
6823 if( h->slice_type == FF_I_TYPE )
6824 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6826 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6829 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6831 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6836 int ret = decode_mb_cabac(h);
6838 //STOP_TIMER("decode_mb_cabac")
6840 if(ret>=0) hl_decode_mb(h);
6842 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6845 if(ret>=0) ret = decode_mb_cabac(h);
6847 if(ret>=0) hl_decode_mb(h);
6850 eos = get_cabac_terminate( &h->cabac );
6852 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6853 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6854 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6858 if( ++s->mb_x >= s->mb_width ) {
6860 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6862 if(FIELD_OR_MBAFF_PICTURE) {
6867 if( eos || s->mb_y >= s->mb_height ) {
6868 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6869 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6876 int ret = decode_mb_cavlc(h);
6878 if(ret>=0) hl_decode_mb(h);
6880 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6882 ret = decode_mb_cavlc(h);
6884 if(ret>=0) hl_decode_mb(h);
6889 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6890 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6895 if(++s->mb_x >= s->mb_width){
6897 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6899 if(FIELD_OR_MBAFF_PICTURE) {
6902 if(s->mb_y >= s->mb_height){
6903 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6905 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6906 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6910 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6917 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6918 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6919 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6920 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6924 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6933 for(;s->mb_y < s->mb_height; s->mb_y++){
6934 for(;s->mb_x < s->mb_width; s->mb_x++){
6935 int ret= decode_mb(h);
6940 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6941 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6946 if(++s->mb_x >= s->mb_width){
6948 if(++s->mb_y >= s->mb_height){
6949 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6950 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6954 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6961 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6962 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6963 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6967 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6974 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6977 return -1; //not reached
6980 static int decode_unregistered_user_data(H264Context *h, int size){
6981 MpegEncContext * const s = &h->s;
6982 uint8_t user_data[16+256];
6988 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6989 user_data[i]= get_bits(&s->gb, 8);
6993 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6994 if(e==1 && build>=0)
6995 h->x264_build= build;
6997 if(s->avctx->debug & FF_DEBUG_BUGS)
6998 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7001 skip_bits(&s->gb, 8);
7006 static int decode_sei(H264Context *h){
7007 MpegEncContext * const s = &h->s;
7009 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7014 type+= show_bits(&s->gb, 8);
7015 }while(get_bits(&s->gb, 8) == 255);
7019 size+= show_bits(&s->gb, 8);
7020 }while(get_bits(&s->gb, 8) == 255);
7024 if(decode_unregistered_user_data(h, size) < 0)
7028 skip_bits(&s->gb, 8*size);
7031 //FIXME check bits here
7032 align_get_bits(&s->gb);
7038 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7039 MpegEncContext * const s = &h->s;
7041 cpb_count = get_ue_golomb(&s->gb) + 1;
7042 get_bits(&s->gb, 4); /* bit_rate_scale */
7043 get_bits(&s->gb, 4); /* cpb_size_scale */
7044 for(i=0; i<cpb_count; i++){
7045 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7046 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7047 get_bits1(&s->gb); /* cbr_flag */
7049 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7050 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7051 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7052 get_bits(&s->gb, 5); /* time_offset_length */
7055 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7056 MpegEncContext * const s = &h->s;
7057 int aspect_ratio_info_present_flag;
7058 unsigned int aspect_ratio_idc;
7059 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7061 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7063 if( aspect_ratio_info_present_flag ) {
7064 aspect_ratio_idc= get_bits(&s->gb, 8);
7065 if( aspect_ratio_idc == EXTENDED_SAR ) {
7066 sps->sar.num= get_bits(&s->gb, 16);
7067 sps->sar.den= get_bits(&s->gb, 16);
7068 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
7069 sps->sar= pixel_aspect[aspect_ratio_idc];
7071 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7078 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7080 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7081 get_bits1(&s->gb); /* overscan_appropriate_flag */
7084 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7085 get_bits(&s->gb, 3); /* video_format */
7086 get_bits1(&s->gb); /* video_full_range_flag */
7087 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7088 get_bits(&s->gb, 8); /* colour_primaries */
7089 get_bits(&s->gb, 8); /* transfer_characteristics */
7090 get_bits(&s->gb, 8); /* matrix_coefficients */
7094 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7095 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7096 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7099 sps->timing_info_present_flag = get_bits1(&s->gb);
7100 if(sps->timing_info_present_flag){
7101 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7102 sps->time_scale = get_bits_long(&s->gb, 32);
7103 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7106 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7107 if(nal_hrd_parameters_present_flag)
7108 decode_hrd_parameters(h, sps);
7109 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7110 if(vcl_hrd_parameters_present_flag)
7111 decode_hrd_parameters(h, sps);
7112 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7113 get_bits1(&s->gb); /* low_delay_hrd_flag */
7114 get_bits1(&s->gb); /* pic_struct_present_flag */
7116 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7117 if(sps->bitstream_restriction_flag){
7118 unsigned int num_reorder_frames;
7119 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7120 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7121 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7122 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7123 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7124 num_reorder_frames= get_ue_golomb(&s->gb);
7125 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7127 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7128 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7132 sps->num_reorder_frames= num_reorder_frames;
7138 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7139 const uint8_t *jvt_list, const uint8_t *fallback_list){
7140 MpegEncContext * const s = &h->s;
7141 int i, last = 8, next = 8;
7142 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7143 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7144 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7146 for(i=0;i<size;i++){
7148 next = (last + get_se_golomb(&s->gb)) & 0xff;
7149 if(!i && !next){ /* matrix not written, we use the preset one */
7150 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7153 last = factors[scan[i]] = next ? next : last;
7157 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7158 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7159 MpegEncContext * const s = &h->s;
7160 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7161 const uint8_t *fallback[4] = {
7162 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7163 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7164 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7165 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7167 if(get_bits1(&s->gb)){
7168 sps->scaling_matrix_present |= is_sps;
7169 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7170 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7171 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7172 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7173 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7174 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7175 if(is_sps || pps->transform_8x8_mode){
7176 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7177 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7179 } else if(fallback_sps) {
7180 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7181 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7186 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7189 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7190 const size_t size, const char *name)
7193 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7198 vec[id] = av_mallocz(size);
7200 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7205 static inline int decode_seq_parameter_set(H264Context *h){
7206 MpegEncContext * const s = &h->s;
7207 int profile_idc, level_idc;
7208 unsigned int sps_id, tmp, mb_width, mb_height;
7212 profile_idc= get_bits(&s->gb, 8);
7213 get_bits1(&s->gb); //constraint_set0_flag
7214 get_bits1(&s->gb); //constraint_set1_flag
7215 get_bits1(&s->gb); //constraint_set2_flag
7216 get_bits1(&s->gb); //constraint_set3_flag
7217 get_bits(&s->gb, 4); // reserved
7218 level_idc= get_bits(&s->gb, 8);
7219 sps_id= get_ue_golomb(&s->gb);
7221 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7225 sps->profile_idc= profile_idc;
7226 sps->level_idc= level_idc;
7228 if(sps->profile_idc >= 100){ //high profile
7229 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7230 get_bits1(&s->gb); //residual_color_transform_flag
7231 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7232 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7233 sps->transform_bypass = get_bits1(&s->gb);
7234 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7236 sps->scaling_matrix_present = 0;
7238 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7239 sps->poc_type= get_ue_golomb(&s->gb);
7241 if(sps->poc_type == 0){ //FIXME #define
7242 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7243 } else if(sps->poc_type == 1){//FIXME #define
7244 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7245 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7246 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7247 tmp= get_ue_golomb(&s->gb);
7249 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7250 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7253 sps->poc_cycle_length= tmp;
7255 for(i=0; i<sps->poc_cycle_length; i++)
7256 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7257 }else if(sps->poc_type != 2){
7258 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7262 tmp= get_ue_golomb(&s->gb);
7263 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7264 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7267 sps->ref_frame_count= tmp;
7268 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7269 mb_width= get_ue_golomb(&s->gb) + 1;
7270 mb_height= get_ue_golomb(&s->gb) + 1;
7271 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7272 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7273 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7276 sps->mb_width = mb_width;
7277 sps->mb_height= mb_height;
7279 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7280 if(!sps->frame_mbs_only_flag)
7281 sps->mb_aff= get_bits1(&s->gb);
7285 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7287 #ifndef ALLOW_INTERLACE
7289 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7291 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7292 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7294 sps->crop= get_bits1(&s->gb);
7296 sps->crop_left = get_ue_golomb(&s->gb);
7297 sps->crop_right = get_ue_golomb(&s->gb);
7298 sps->crop_top = get_ue_golomb(&s->gb);
7299 sps->crop_bottom= get_ue_golomb(&s->gb);
7300 if(sps->crop_left || sps->crop_top){
7301 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7303 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7304 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7310 sps->crop_bottom= 0;
7313 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7314 if( sps->vui_parameters_present_flag )
7315 decode_vui_parameters(h, sps);
7317 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7318 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7319 sps_id, sps->profile_idc, sps->level_idc,
7321 sps->ref_frame_count,
7322 sps->mb_width, sps->mb_height,
7323 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7324 sps->direct_8x8_inference_flag ? "8B8" : "",
7325 sps->crop_left, sps->crop_right,
7326 sps->crop_top, sps->crop_bottom,
7327 sps->vui_parameters_present_flag ? "VUI" : ""
7334 build_qp_table(PPS *pps, int t, int index)
7337 for(i = 0; i < 255; i++)
7338 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
7341 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7342 MpegEncContext * const s = &h->s;
7343 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7346 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7350 tmp= get_ue_golomb(&s->gb);
7351 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7352 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7357 pps->cabac= get_bits1(&s->gb);
7358 pps->pic_order_present= get_bits1(&s->gb);
7359 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7360 if(pps->slice_group_count > 1 ){
7361 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7362 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7363 switch(pps->mb_slice_group_map_type){
7366 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7367 | run_length[ i ] |1 |ue(v) |
7372 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7374 | top_left_mb[ i ] |1 |ue(v) |
7375 | bottom_right_mb[ i ] |1 |ue(v) |
7383 | slice_group_change_direction_flag |1 |u(1) |
7384 | slice_group_change_rate_minus1 |1 |ue(v) |
7389 | slice_group_id_cnt_minus1 |1 |ue(v) |
7390 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7392 | slice_group_id[ i ] |1 |u(v) |
7397 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7398 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7399 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7400 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7401 pps->ref_count[0]= pps->ref_count[1]= 1;
7405 pps->weighted_pred= get_bits1(&s->gb);
7406 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7407 pps->init_qp= get_se_golomb(&s->gb) + 26;
7408 pps->init_qs= get_se_golomb(&s->gb) + 26;
7409 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7410 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7411 pps->constrained_intra_pred= get_bits1(&s->gb);
7412 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7414 pps->transform_8x8_mode= 0;
7415 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7416 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7417 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7419 if(get_bits_count(&s->gb) < bit_length){
7420 pps->transform_8x8_mode= get_bits1(&s->gb);
7421 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7422 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7424 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7427 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7428 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
7429 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7430 h->pps.chroma_qp_diff= 1;
7432 memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
7434 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7435 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7436 pps_id, pps->sps_id,
7437 pps->cabac ? "CABAC" : "CAVLC",
7438 pps->slice_group_count,
7439 pps->ref_count[0], pps->ref_count[1],
7440 pps->weighted_pred ? "weighted" : "",
7441 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7442 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7443 pps->constrained_intra_pred ? "CONSTR" : "",
7444 pps->redundant_pic_cnt_present ? "REDU" : "",
7445 pps->transform_8x8_mode ? "8x8DCT" : ""
7453 * Call decode_slice() for each context.
7455 * @param h h264 master context
7456 * @param context_count number of contexts to execute
7458 static void execute_decode_slices(H264Context *h, int context_count){
7459 MpegEncContext * const s = &h->s;
7460 AVCodecContext * const avctx= s->avctx;
7464 if(context_count == 1) {
7465 decode_slice(avctx, h);
7467 for(i = 1; i < context_count; i++) {
7468 hx = h->thread_context[i];
7469 hx->s.error_resilience = avctx->error_resilience;
7470 hx->s.error_count = 0;
7473 avctx->execute(avctx, (void *)decode_slice,
7474 (void **)h->thread_context, NULL, context_count);
7476 /* pull back stuff from slices to master context */
7477 hx = h->thread_context[context_count - 1];
7478 s->mb_x = hx->s.mb_x;
7479 s->mb_y = hx->s.mb_y;
7480 s->dropable = hx->s.dropable;
7481 s->picture_structure = hx->s.picture_structure;
7482 for(i = 1; i < context_count; i++)
7483 h->s.error_count += h->thread_context[i]->s.error_count;
7488 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7489 MpegEncContext * const s = &h->s;
7490 AVCodecContext * const avctx= s->avctx;
7492 H264Context *hx; ///< thread context
7493 int context_count = 0;
7495 h->max_contexts = avctx->thread_count;
7498 for(i=0; i<50; i++){
7499 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7502 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7503 h->current_slice = 0;
7504 if (!s->first_field)
7505 s->current_picture_ptr= NULL;
7517 if(buf_index >= buf_size) break;
7519 for(i = 0; i < h->nal_length_size; i++)
7520 nalsize = (nalsize << 8) | buf[buf_index++];
7521 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7526 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7531 // start code prefix search
7532 for(; buf_index + 3 < buf_size; buf_index++){
7533 // This should always succeed in the first iteration.
7534 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7538 if(buf_index+3 >= buf_size) break;
7543 hx = h->thread_context[context_count];
7545 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7546 if (ptr==NULL || dst_length < 0){
7549 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7551 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7553 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7554 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7557 if (h->is_avc && (nalsize != consumed)){
7558 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7562 buf_index += consumed;
7564 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7565 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7570 switch(hx->nal_unit_type){
7572 if (h->nal_unit_type != NAL_IDR_SLICE) {
7573 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7576 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7578 init_get_bits(&hx->s.gb, ptr, bit_length);
7580 hx->inter_gb_ptr= &hx->s.gb;
7581 hx->s.data_partitioning = 0;
7583 if((err = decode_slice_header(hx, h)))
7586 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7587 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7588 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7589 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
7590 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
7591 && avctx->skip_frame < AVDISCARD_ALL)
7595 init_get_bits(&hx->s.gb, ptr, bit_length);
7597 hx->inter_gb_ptr= NULL;
7598 hx->s.data_partitioning = 1;
7600 err = decode_slice_header(hx, h);
7603 init_get_bits(&hx->intra_gb, ptr, bit_length);
7604 hx->intra_gb_ptr= &hx->intra_gb;
7607 init_get_bits(&hx->inter_gb, ptr, bit_length);
7608 hx->inter_gb_ptr= &hx->inter_gb;
7610 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7611 && s->context_initialized
7613 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7614 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
7615 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
7616 && avctx->skip_frame < AVDISCARD_ALL)
7620 init_get_bits(&s->gb, ptr, bit_length);
7624 init_get_bits(&s->gb, ptr, bit_length);
7625 decode_seq_parameter_set(h);
7627 if(s->flags& CODEC_FLAG_LOW_DELAY)
7630 if(avctx->has_b_frames < 2)
7631 avctx->has_b_frames= !s->low_delay;
7634 init_get_bits(&s->gb, ptr, bit_length);
7636 decode_picture_parameter_set(h, bit_length);
7640 case NAL_END_SEQUENCE:
7641 case NAL_END_STREAM:
7642 case NAL_FILLER_DATA:
7644 case NAL_AUXILIARY_SLICE:
7647 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7650 if(context_count == h->max_contexts) {
7651 execute_decode_slices(h, context_count);
7656 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7658 /* Slice could not be decoded in parallel mode, copy down
7659 * NAL unit stuff to context 0 and restart. Note that
7660 * rbsp_buffer is not transfered, but since we no longer
7661 * run in parallel mode this should not be an issue. */
7662 h->nal_unit_type = hx->nal_unit_type;
7663 h->nal_ref_idc = hx->nal_ref_idc;
7669 execute_decode_slices(h, context_count);
7674 * returns the number of bytes consumed for building the current frame
7676 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7677 if(s->flags&CODEC_FLAG_TRUNCATED){
7678 pos -= s->parse_context.last_index;
7679 if(pos<0) pos=0; // FIXME remove (unneeded?)
7683 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7684 if(pos+10>buf_size) pos=buf_size; // oops ;)
7690 static int decode_frame(AVCodecContext *avctx,
7691 void *data, int *data_size,
7692 const uint8_t *buf, int buf_size)
7694 H264Context *h = avctx->priv_data;
7695 MpegEncContext *s = &h->s;
7696 AVFrame *pict = data;
7699 s->flags= avctx->flags;
7700 s->flags2= avctx->flags2;
7702 if(s->flags&CODEC_FLAG_TRUNCATED){
7703 const int next= ff_h264_find_frame_end(h, buf, buf_size);
7704 assert((buf_size > 0) || (next == END_NOT_FOUND));
7706 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7708 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7711 /* no supplementary picture */
7712 if (buf_size == 0) {
7716 //FIXME factorize this with the output code below
7717 out = h->delayed_pic[0];
7719 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7720 if(h->delayed_pic[i]->poc < out->poc){
7721 out = h->delayed_pic[i];
7725 for(i=out_idx; h->delayed_pic[i]; i++)
7726 h->delayed_pic[i] = h->delayed_pic[i+1];
7729 *data_size = sizeof(AVFrame);
7730 *pict= *(AVFrame*)out;
7736 if(h->is_avc && !h->got_avcC) {
7737 int i, cnt, nalsize;
7738 unsigned char *p = avctx->extradata;
7739 if(avctx->extradata_size < 7) {
7740 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7744 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7747 /* sps and pps in the avcC always have length coded with 2 bytes,
7748 so put a fake nal_length_size = 2 while parsing them */
7749 h->nal_length_size = 2;
7750 // Decode sps from avcC
7751 cnt = *(p+5) & 0x1f; // Number of sps
7753 for (i = 0; i < cnt; i++) {
7754 nalsize = AV_RB16(p) + 2;
7755 if(decode_nal_units(h, p, nalsize) < 0) {
7756 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7761 // Decode pps from avcC
7762 cnt = *(p++); // Number of pps
7763 for (i = 0; i < cnt; i++) {
7764 nalsize = AV_RB16(p) + 2;
7765 if(decode_nal_units(h, p, nalsize) != nalsize) {
7766 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7771 // Now store right nal length size, that will be use to parse all other nals
7772 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7773 // Do not reparse avcC
7777 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7778 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7782 buf_index=decode_nal_units(h, buf, buf_size);
7786 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7787 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7788 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7792 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7793 Picture *out = s->current_picture_ptr;
7794 Picture *cur = s->current_picture_ptr;
7795 int i, pics, cross_idr, out_of_order, out_idx;
7799 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7800 s->current_picture_ptr->pict_type= s->pict_type;
7802 h->prev_frame_num_offset= h->frame_num_offset;
7803 h->prev_frame_num= h->frame_num;
7805 h->prev_poc_msb= h->poc_msb;
7806 h->prev_poc_lsb= h->poc_lsb;
7807 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7811 * FIXME: Error handling code does not seem to support interlaced
7812 * when slices span multiple rows
7813 * The ff_er_add_slice calls don't work right for bottom
7814 * fields; they cause massive erroneous error concealing
7815 * Error marking covers both fields (top and bottom).
7816 * This causes a mismatched s->error_count
7817 * and a bad error table. Further, the error count goes to
7818 * INT_MAX when called for bottom field, because mb_y is
7819 * past end by one (callers fault) and resync_mb_y != 0
7820 * causes problems for the first MB line, too.
7827 if (s->first_field) {
7828 /* Wait for second field. */
7832 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7833 /* Derive top_field_first from field pocs. */
7834 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7836 //FIXME do something with unavailable reference frames
7838 /* Sort B-frames into display order */
7840 if(h->sps.bitstream_restriction_flag
7841 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7842 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7846 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7847 && !h->sps.bitstream_restriction_flag){
7848 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7853 while(h->delayed_pic[pics]) pics++;
7855 assert(pics <= MAX_DELAYED_PIC_COUNT);
7857 h->delayed_pic[pics++] = cur;
7858 if(cur->reference == 0)
7859 cur->reference = DELAYED_PIC_REF;
7862 for(i=0; h->delayed_pic[i]; i++)
7863 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7866 out = h->delayed_pic[0];
7868 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7869 if(h->delayed_pic[i]->poc < out->poc){
7870 out = h->delayed_pic[i];
7874 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7876 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7878 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7880 ((!cross_idr && out->poc > h->outputed_poc + 2)
7881 || cur->pict_type == FF_B_TYPE)))
7884 s->avctx->has_b_frames++;
7887 if(out_of_order || pics > s->avctx->has_b_frames){
7888 out->reference &= ~DELAYED_PIC_REF;
7889 for(i=out_idx; h->delayed_pic[i]; i++)
7890 h->delayed_pic[i] = h->delayed_pic[i+1];
7892 if(!out_of_order && pics > s->avctx->has_b_frames){
7893 *data_size = sizeof(AVFrame);
7895 h->outputed_poc = out->poc;
7896 *pict= *(AVFrame*)out;
7898 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7903 assert(pict->data[0] || !*data_size);
7904 ff_print_debug_info(s, pict);
7905 //printf("out %d\n", (int)pict->data[0]);
7908 /* Return the Picture timestamp as the frame number */
7909 /* we subtract 1 because it is added on utils.c */
7910 avctx->frame_number = s->picture_number - 1;
7912 return get_consumed_bytes(s, buf_index, buf_size);
7915 static inline void fill_mb_avail(H264Context *h){
7916 MpegEncContext * const s = &h->s;
7917 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7920 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7921 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7922 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7928 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7929 h->mb_avail[4]= 1; //FIXME move out
7930 h->mb_avail[5]= 0; //FIXME move out
7938 #define SIZE (COUNT*40)
7944 // int int_temp[10000];
7946 AVCodecContext avctx;
7948 dsputil_init(&dsp, &avctx);
7950 init_put_bits(&pb, temp, SIZE);
7951 printf("testing unsigned exp golomb\n");
7952 for(i=0; i<COUNT; i++){
7954 set_ue_golomb(&pb, i);
7955 STOP_TIMER("set_ue_golomb");
7957 flush_put_bits(&pb);
7959 init_get_bits(&gb, temp, 8*SIZE);
7960 for(i=0; i<COUNT; i++){
7963 s= show_bits(&gb, 24);
7966 j= get_ue_golomb(&gb);
7968 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7971 STOP_TIMER("get_ue_golomb");
7975 init_put_bits(&pb, temp, SIZE);
7976 printf("testing signed exp golomb\n");
7977 for(i=0; i<COUNT; i++){
7979 set_se_golomb(&pb, i - COUNT/2);
7980 STOP_TIMER("set_se_golomb");
7982 flush_put_bits(&pb);
7984 init_get_bits(&gb, temp, 8*SIZE);
7985 for(i=0; i<COUNT; i++){
7988 s= show_bits(&gb, 24);
7991 j= get_se_golomb(&gb);
7992 if(j != i - COUNT/2){
7993 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7996 STOP_TIMER("get_se_golomb");
8000 printf("testing 4x4 (I)DCT\n");
8003 uint8_t src[16], ref[16];
8004 uint64_t error= 0, max_error=0;
8006 for(i=0; i<COUNT; i++){
8008 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8009 for(j=0; j<16; j++){
8010 ref[j]= random()%255;
8011 src[j]= random()%255;
8014 h264_diff_dct_c(block, src, ref, 4);
8017 for(j=0; j<16; j++){
8018 // printf("%d ", block[j]);
8019 block[j]= block[j]*4;
8020 if(j&1) block[j]= (block[j]*4 + 2)/5;
8021 if(j&4) block[j]= (block[j]*4 + 2)/5;
8025 s->dsp.h264_idct_add(ref, block, 4);
8026 /* for(j=0; j<16; j++){
8027 printf("%d ", ref[j]);
8031 for(j=0; j<16; j++){
8032 int diff= FFABS(src[j] - ref[j]);
8035 max_error= FFMAX(max_error, diff);
8038 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8039 printf("testing quantizer\n");
8040 for(qp=0; qp<52; qp++){
8042 src1_block[i]= src2_block[i]= random()%255;
8045 printf("Testing NAL layer\n");
8047 uint8_t bitstream[COUNT];
8048 uint8_t nal[COUNT*2];
8050 memset(&h, 0, sizeof(H264Context));
8052 for(i=0; i<COUNT; i++){
8060 for(j=0; j<COUNT; j++){
8061 bitstream[j]= (random() % 255) + 1;
8064 for(j=0; j<zeros; j++){
8065 int pos= random() % COUNT;
8066 while(bitstream[pos] == 0){
8075 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8077 printf("encoding failed\n");
8081 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8085 if(out_length != COUNT){
8086 printf("incorrect length %d %d\n", out_length, COUNT);
8090 if(consumed != nal_length){
8091 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8095 if(memcmp(bitstream, out, COUNT)){
8096 printf("mismatch\n");
8102 printf("Testing RBSP\n");
8110 static av_cold int decode_end(AVCodecContext *avctx)
8112 H264Context *h = avctx->priv_data;
8113 MpegEncContext *s = &h->s;
8115 av_freep(&h->rbsp_buffer[0]);
8116 av_freep(&h->rbsp_buffer[1]);
8117 free_tables(h); //FIXME cleanup init stuff perhaps
8120 // memset(h, 0, sizeof(H264Context));
8126 AVCodec h264_decoder = {
8130 sizeof(H264Context),
8135 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8137 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),