2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 const uint8_t ff_rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 const uint8_t ff_div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
110 int topleft_partition= -1;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
164 left_block = left_block_options[1];
166 left_block= left_block_options[2];
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
181 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
187 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
189 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
190 for(list=0; list<h->list_count; list++){
191 if(USES_LIST(mb_type,list)){
192 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
193 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
194 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
195 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
201 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
202 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
204 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
205 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
207 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
208 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
213 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
214 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
215 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
216 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
217 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
220 if(IS_INTRA(mb_type)){
221 h->topleft_samples_available=
222 h->top_samples_available=
223 h->left_samples_available= 0xFFFF;
224 h->topright_samples_available= 0xEEEA;
226 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
227 h->topleft_samples_available= 0xB3FF;
228 h->top_samples_available= 0x33FF;
229 h->topright_samples_available= 0x26EA;
232 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
233 h->topleft_samples_available&= 0xDF5F;
234 h->left_samples_available&= 0x5F5F;
238 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
239 h->topleft_samples_available&= 0x7FFF;
241 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
242 h->topright_samples_available&= 0xFBFF;
244 if(IS_INTRA4x4(mb_type)){
245 if(IS_INTRA4x4(top_type)){
246 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
247 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
248 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
249 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
252 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
257 h->intra4x4_pred_mode_cache[4+8*0]=
258 h->intra4x4_pred_mode_cache[5+8*0]=
259 h->intra4x4_pred_mode_cache[6+8*0]=
260 h->intra4x4_pred_mode_cache[7+8*0]= pred;
263 if(IS_INTRA4x4(left_type[i])){
264 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
265 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
268 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
273 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
274 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
289 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
291 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
292 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
293 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
294 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
296 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
297 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
299 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
300 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
303 h->non_zero_count_cache[4+8*0]=
304 h->non_zero_count_cache[5+8*0]=
305 h->non_zero_count_cache[6+8*0]=
306 h->non_zero_count_cache[7+8*0]=
308 h->non_zero_count_cache[1+8*0]=
309 h->non_zero_count_cache[2+8*0]=
311 h->non_zero_count_cache[1+8*3]=
312 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
316 for (i=0; i<2; i++) {
318 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
319 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
320 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
321 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
323 h->non_zero_count_cache[3+8*1 + 2*8*i]=
324 h->non_zero_count_cache[3+8*2 + 2*8*i]=
325 h->non_zero_count_cache[0+8*1 + 8*i]=
326 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
333 h->top_cbp = h->cbp_table[top_xy];
334 } else if(IS_INTRA(mb_type)) {
341 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
342 } else if(IS_INTRA(mb_type)) {
348 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
351 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
356 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
358 for(list=0; list<h->list_count; list++){
359 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
360 /*if(!h->mv_cache_clean[list]){
361 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
362 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
363 h->mv_cache_clean[list]= 1;
367 h->mv_cache_clean[list]= 0;
369 if(USES_LIST(top_type, list)){
370 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
371 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
372 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
373 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
374 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
376 h->ref_cache[list][scan8[0] + 0 - 1*8]=
377 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
378 h->ref_cache[list][scan8[0] + 2 - 1*8]=
379 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
381 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
382 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
383 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
385 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
389 int cache_idx = scan8[0] - 1 + i*2*8;
390 if(USES_LIST(left_type[i], list)){
391 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
392 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
393 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
394 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
395 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
396 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
398 *(uint32_t*)h->mv_cache [list][cache_idx ]=
399 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
400 h->ref_cache[list][cache_idx ]=
401 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
405 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
408 if(USES_LIST(topleft_type, list)){
409 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
410 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
411 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
412 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
418 if(USES_LIST(topright_type, list)){
419 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
420 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
421 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
422 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
424 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
428 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
431 h->ref_cache[list][scan8[5 ]+1] =
432 h->ref_cache[list][scan8[7 ]+1] =
433 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
434 h->ref_cache[list][scan8[4 ]] =
435 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
436 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
437 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
438 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
439 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
440 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
443 /* XXX beurk, Load mvd */
444 if(USES_LIST(top_type, list)){
445 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
446 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
447 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
448 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
451 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
452 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
453 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
456 if(USES_LIST(left_type[0], list)){
457 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
458 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
459 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
461 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
464 if(USES_LIST(left_type[1], list)){
465 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
466 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
469 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
472 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
473 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
474 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
475 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
476 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
478 if(h->slice_type_nos == FF_B_TYPE){
479 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
481 if(IS_DIRECT(top_type)){
482 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
483 }else if(IS_8X8(top_type)){
484 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
485 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
486 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
488 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
491 if(IS_DIRECT(left_type[0]))
492 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
493 else if(IS_8X8(left_type[0]))
494 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
496 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
498 if(IS_DIRECT(left_type[1]))
499 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
500 else if(IS_8X8(left_type[1]))
501 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
503 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
509 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
510 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
511 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
512 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
513 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
515 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
516 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
517 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
518 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
520 #define MAP_F2F(idx, mb_type)\
521 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
522 h->ref_cache[list][idx] <<= 1;\
523 h->mv_cache[list][idx][1] /= 2;\
524 h->mvd_cache[list][idx][1] /= 2;\
529 #define MAP_F2F(idx, mb_type)\
530 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] >>= 1;\
532 h->mv_cache[list][idx][1] <<= 1;\
533 h->mvd_cache[list][idx][1] <<= 1;\
543 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
546 static inline void write_back_intra_pred_mode(H264Context *h){
547 const int mb_xy= h->mb_xy;
549 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
550 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
551 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
552 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
553 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
554 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
555 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
559 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
561 static inline int check_intra4x4_pred_mode(H264Context *h){
562 MpegEncContext * const s = &h->s;
563 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
564 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
567 if(!(h->top_samples_available&0x8000)){
569 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
571 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
574 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
579 if(!(h->left_samples_available&0x8000)){
581 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
583 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
586 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
592 } //FIXME cleanup like next
595 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
597 static inline int check_intra_pred_mode(H264Context *h, int mode){
598 MpegEncContext * const s = &h->s;
599 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
600 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
603 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
607 if(!(h->top_samples_available&0x8000)){
610 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
615 if(!(h->left_samples_available&0x8000)){
618 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
627 * gets the predicted intra4x4 prediction mode.
629 static inline int pred_intra_mode(H264Context *h, int n){
630 const int index8= scan8[n];
631 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
632 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
633 const int min= FFMIN(left, top);
635 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
637 if(min<0) return DC_PRED;
641 static inline void write_back_non_zero_count(H264Context *h){
642 const int mb_xy= h->mb_xy;
644 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
645 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
646 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
647 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
648 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
649 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
650 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
652 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
653 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
654 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
656 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
657 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
658 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
661 // store all luma nnzs, for deblocking
664 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
665 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
670 * gets the predicted number of non-zero coefficients.
671 * @param n block index
673 static inline int pred_non_zero_count(H264Context *h, int n){
674 const int index8= scan8[n];
675 const int left= h->non_zero_count_cache[index8 - 1];
676 const int top = h->non_zero_count_cache[index8 - 8];
679 if(i<64) i= (i+1)>>1;
681 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
686 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
687 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
688 MpegEncContext *s = &h->s;
690 /* there is no consistent mapping of mvs to neighboring locations that will
691 * make mbaff happy, so we can't move all this logic to fill_caches */
693 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
695 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
696 *C = h->mv_cache[list][scan8[0]-2];
699 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
700 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
701 if(IS_INTERLACED(mb_types[topright_xy])){
702 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
703 const int x4 = X4, y4 = Y4;\
704 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
705 if(!USES_LIST(mb_type,list))\
706 return LIST_NOT_USED;\
707 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
708 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
709 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
710 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
712 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
715 if(topright_ref == PART_NOT_AVAILABLE
716 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
717 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
719 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
720 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
723 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
725 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
726 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
732 if(topright_ref != PART_NOT_AVAILABLE){
733 *C= h->mv_cache[list][ i - 8 + part_width ];
736 tprintf(s->avctx, "topright MV not available\n");
738 *C= h->mv_cache[list][ i - 8 - 1 ];
739 return h->ref_cache[list][ i - 8 - 1 ];
744 * gets the predicted MV.
745 * @param n the block index
746 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
747 * @param mx the x component of the predicted motion vector
748 * @param my the y component of the predicted motion vector
750 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
751 const int index8= scan8[n];
752 const int top_ref= h->ref_cache[list][ index8 - 8 ];
753 const int left_ref= h->ref_cache[list][ index8 - 1 ];
754 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
755 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
757 int diagonal_ref, match_count;
759 assert(part_width==1 || part_width==2 || part_width==4);
769 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
770 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
771 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
772 if(match_count > 1){ //most common
773 *mx= mid_pred(A[0], B[0], C[0]);
774 *my= mid_pred(A[1], B[1], C[1]);
775 }else if(match_count==1){
779 }else if(top_ref==ref){
787 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
791 *mx= mid_pred(A[0], B[0], C[0]);
792 *my= mid_pred(A[1], B[1], C[1]);
796 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
800 * gets the directionally predicted 16x8 MV.
801 * @param n the block index
802 * @param mx the x component of the predicted motion vector
803 * @param my the y component of the predicted motion vector
805 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
807 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
808 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
810 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
818 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
819 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
821 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
831 pred_motion(h, n, 4, list, ref, mx, my);
835 * gets the directionally predicted 8x16 MV.
836 * @param n the block index
837 * @param mx the x component of the predicted motion vector
838 * @param my the y component of the predicted motion vector
840 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
842 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
843 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
845 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
856 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
858 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
860 if(diagonal_ref == ref){
868 pred_motion(h, n, 2, list, ref, mx, my);
871 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
872 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
873 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
875 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
877 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
878 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
879 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
885 pred_motion(h, 0, 4, 0, 0, mx, my);
890 static inline void direct_dist_scale_factor(H264Context * const h){
891 MpegEncContext * const s = &h->s;
892 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
893 const int poc1 = h->ref_list[1][0].poc;
895 for(i=0; i<h->ref_count[0]; i++){
896 int poc0 = h->ref_list[0][i].poc;
897 int td = av_clip(poc1 - poc0, -128, 127);
898 if(td == 0 || h->ref_list[0][i].long_ref){
899 h->dist_scale_factor[i] = 256;
901 int tb = av_clip(poc - poc0, -128, 127);
902 int tx = (16384 + (FFABS(td) >> 1)) / td;
903 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
907 for(i=0; i<h->ref_count[0]; i++){
908 h->dist_scale_factor_field[2*i] =
909 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
913 static inline void direct_ref_list_init(H264Context * const h){
914 MpegEncContext * const s = &h->s;
915 Picture * const ref1 = &h->ref_list[1][0];
916 Picture * const cur = s->current_picture_ptr;
918 int sidx= s->picture_structure&1;
919 int ref1sidx= ref1->reference&1;
920 for(list=0; list<2; list++){
921 cur->ref_count[sidx][list] = h->ref_count[list];
922 for(j=0; j<h->ref_count[list]; j++)
923 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
925 if(s->picture_structure == PICT_FRAME){
926 memcpy(cur->ref_count[0], cur->ref_count[1], sizeof(cur->ref_count[0]));
927 memcpy(cur->ref_poc [0], cur->ref_poc [1], sizeof(cur->ref_poc [0]));
929 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
931 for(list=0; list<2; list++){
932 for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
933 int poc = ref1->ref_poc[ref1sidx][list][i];
934 if(((poc&3) == 3) != (s->picture_structure == PICT_FRAME))
935 poc= (poc&~3) + s->picture_structure;
936 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
937 for(j=0; j<h->ref_count[list]; j++)
938 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
939 h->map_col_to_list0[list][i] = j;
945 for(list=0; list<2; list++){
946 for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
947 j = h->map_col_to_list0[list][i];
948 h->map_col_to_list0_field[list][2*i] = 2*j;
949 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
955 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
956 MpegEncContext * const s = &h->s;
957 int b8_stride = h->b8_stride;
958 int b4_stride = h->b_stride;
959 int mb_xy = h->mb_xy;
961 const int16_t (*l1mv0)[2], (*l1mv1)[2];
962 const int8_t *l1ref0, *l1ref1;
963 const int is_b8x8 = IS_8X8(*mb_type);
964 unsigned int sub_mb_type;
967 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
969 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
970 if(h->ref_list[1][0].reference == PICT_FRAME){ // AFL/AFR/FR/FL -> AFL
971 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL
972 int cur_poc = s->current_picture_ptr->poc;
973 int *col_poc = h->ref_list[1]->field_poc;
974 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
975 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
976 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
977 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
978 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
979 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
983 l1mv0 += 2*b4_stride;
984 l1mv1 += 2*b4_stride;
988 }else if(!(s->picture_structure & h->ref_list[1][0].reference)){// FL -> FL & differ parity
989 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
990 mb_xy += s->mb_stride*fieldoff;
993 }else{ // AFL/AFR/FR/FL -> AFR/FR
994 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
995 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
996 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
997 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1000 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1001 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1002 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1004 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1005 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1007 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1008 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1010 }else{ // AFR/FR -> AFR/FR
1013 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1014 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1015 /* FIXME save sub mb types from previous frames (or derive from MVs)
1016 * so we know exactly what block size to use */
1017 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1018 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1019 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1020 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1021 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1023 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1024 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1030 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1031 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1032 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1033 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1036 if(h->direct_spatial_mv_pred){
1041 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1043 /* ref = min(neighbors) */
1044 for(list=0; list<2; list++){
1045 int refa = h->ref_cache[list][scan8[0] - 1];
1046 int refb = h->ref_cache[list][scan8[0] - 8];
1047 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1048 if(refc == PART_NOT_AVAILABLE)
1049 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1050 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1055 if(ref[0] < 0 && ref[1] < 0){
1056 ref[0] = ref[1] = 0;
1057 mv[0][0] = mv[0][1] =
1058 mv[1][0] = mv[1][1] = 0;
1060 for(list=0; list<2; list++){
1062 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1064 mv[list][0] = mv[list][1] = 0;
1070 *mb_type &= ~MB_TYPE_L1;
1071 sub_mb_type &= ~MB_TYPE_L1;
1072 }else if(ref[0] < 0){
1074 *mb_type &= ~MB_TYPE_L0;
1075 sub_mb_type &= ~MB_TYPE_L0;
1078 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1079 for(i8=0; i8<4; i8++){
1082 int xy8 = x8+y8*b8_stride;
1083 int xy4 = 3*x8+y8*b4_stride;
1086 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1088 h->sub_mb_type[i8] = sub_mb_type;
1090 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1091 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1092 if(!IS_INTRA(mb_type_col[y8])
1093 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1094 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1096 a= pack16to32(mv[0][0],mv[0][1]);
1098 b= pack16to32(mv[1][0],mv[1][1]);
1100 a= pack16to32(mv[0][0],mv[0][1]);
1101 b= pack16to32(mv[1][0],mv[1][1]);
1103 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1104 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1106 }else if(IS_16X16(*mb_type)){
1109 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1110 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1111 if(!IS_INTRA(mb_type_col[0])
1112 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1113 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1114 && (h->x264_build>33 || !h->x264_build)))){
1116 a= pack16to32(mv[0][0],mv[0][1]);
1118 b= pack16to32(mv[1][0],mv[1][1]);
1120 a= pack16to32(mv[0][0],mv[0][1]);
1121 b= pack16to32(mv[1][0],mv[1][1]);
1123 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1124 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1126 for(i8=0; i8<4; i8++){
1127 const int x8 = i8&1;
1128 const int y8 = i8>>1;
1130 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1132 h->sub_mb_type[i8] = sub_mb_type;
1134 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1135 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1136 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1137 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1140 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1141 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1142 && (h->x264_build>33 || !h->x264_build)))){
1143 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1144 if(IS_SUB_8X8(sub_mb_type)){
1145 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1146 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1148 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1150 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1153 for(i4=0; i4<4; i4++){
1154 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1155 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1157 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1159 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1165 }else{ /* direct temporal mv pred */
1166 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1167 const int *dist_scale_factor = h->dist_scale_factor;
1169 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1170 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1171 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1172 dist_scale_factor = h->dist_scale_factor_field;
1174 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1175 /* FIXME assumes direct_8x8_inference == 1 */
1179 if(IS_INTERLACED(*mb_type)){
1180 /* frame to field scaling */
1182 ref_shift= FRAME_MBAFF ? 0 : 1;
1185 ref_shift= FRAME_MBAFF ? 2 : 1;
1188 for(i8=0; i8<4; i8++){
1189 const int x8 = i8&1;
1190 const int y8 = i8>>1;
1192 const int16_t (*l1mv)[2]= l1mv0;
1194 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1196 h->sub_mb_type[i8] = sub_mb_type;
1198 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1199 if(IS_INTRA(mb_type_col[y8])){
1200 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1201 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1202 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1206 ref0 = l1ref0[x8 + y8*b8_stride];
1208 ref0 = map_col_to_list0[0][ref0*2>>ref_shift];
1210 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride]*2>>ref_shift];
1213 scale = dist_scale_factor[ref0];
1214 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1217 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1218 int my_col = (mv_col[1]<<y_shift)/2;
1219 int mx = (scale * mv_col[0] + 128) >> 8;
1220 int my = (scale * my_col + 128) >> 8;
1221 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1222 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1228 /* one-to-one mv scaling */
1230 if(IS_16X16(*mb_type)){
1233 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1234 if(IS_INTRA(mb_type_col[0])){
1237 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1238 : map_col_to_list0[1][l1ref1[0]];
1239 const int scale = dist_scale_factor[ref0];
1240 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1242 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1243 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1245 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1246 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1248 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1249 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1250 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1252 for(i8=0; i8<4; i8++){
1253 const int x8 = i8&1;
1254 const int y8 = i8>>1;
1256 const int16_t (*l1mv)[2]= l1mv0;
1258 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1260 h->sub_mb_type[i8] = sub_mb_type;
1261 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1262 if(IS_INTRA(mb_type_col[0])){
1263 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1264 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1265 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1269 ref0 = l1ref0[x8 + y8*h->b8_stride];
1271 ref0 = map_col_to_list0[0][ref0];
1273 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1276 scale = dist_scale_factor[ref0];
1278 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1279 if(IS_SUB_8X8(sub_mb_type)){
1280 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1281 int mx = (scale * mv_col[0] + 128) >> 8;
1282 int my = (scale * mv_col[1] + 128) >> 8;
1283 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1284 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1286 for(i4=0; i4<4; i4++){
1287 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1288 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1289 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1290 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1291 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1292 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1299 static inline void write_back_motion(H264Context *h, int mb_type){
1300 MpegEncContext * const s = &h->s;
1301 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1302 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1305 if(!USES_LIST(mb_type, 0))
1306 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1308 for(list=0; list<h->list_count; list++){
1310 if(!USES_LIST(mb_type, list))
1314 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1315 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1317 if( h->pps.cabac ) {
1318 if(IS_SKIP(mb_type))
1319 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1322 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1323 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1328 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1329 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1330 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1331 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1332 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1336 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1337 if(IS_8X8(mb_type)){
1338 uint8_t *direct_table = &h->direct_table[b8_xy];
1339 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1340 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1341 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1347 * Decodes a network abstraction layer unit.
1348 * @param consumed is the number of bytes used as input
1349 * @param length is the length of the array
1350 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1351 * @returns decoded bytes, might be src+1 if no escapes
1353 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1358 // src[0]&0x80; //forbidden bit
1359 h->nal_ref_idc= src[0]>>5;
1360 h->nal_unit_type= src[0]&0x1F;
1364 for(i=0; i<length; i++)
1365 printf("%2X ", src[i]);
1367 for(i=0; i+1<length; i+=2){
1368 if(src[i]) continue;
1369 if(i>0 && src[i-1]==0) i--;
1370 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1372 /* startcode, so we must be past the end */
1379 if(i>=length-1){ //no escaped 0
1380 *dst_length= length;
1381 *consumed= length+1; //+1 for the header
1385 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1386 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1387 dst= h->rbsp_buffer[bufidx];
1393 //printf("decoding esc\n");
1396 //remove escapes (very rare 1:2^22)
1397 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1398 if(src[si+2]==3){ //escape
1403 }else //next start code
1407 dst[di++]= src[si++];
1411 *consumed= si + 1;//+1 for the header
1412 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1417 * identifies the exact end of the bitstream
1418 * @return the length of the trailing, or 0 if damaged
1420 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1424 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1434 * IDCT transforms the 16 dc values and dequantizes them.
1435 * @param qp quantization parameter
1437 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1440 int temp[16]; //FIXME check if this is a good idea
1441 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1442 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1444 //memset(block, 64, 2*256);
1447 const int offset= y_offset[i];
1448 const int z0= block[offset+stride*0] + block[offset+stride*4];
1449 const int z1= block[offset+stride*0] - block[offset+stride*4];
1450 const int z2= block[offset+stride*1] - block[offset+stride*5];
1451 const int z3= block[offset+stride*1] + block[offset+stride*5];
1460 const int offset= x_offset[i];
1461 const int z0= temp[4*0+i] + temp[4*2+i];
1462 const int z1= temp[4*0+i] - temp[4*2+i];
1463 const int z2= temp[4*1+i] - temp[4*3+i];
1464 const int z3= temp[4*1+i] + temp[4*3+i];
1466 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1467 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1468 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1469 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1475 * DCT transforms the 16 dc values.
1476 * @param qp quantization parameter ??? FIXME
1478 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1479 // const int qmul= dequant_coeff[qp][0];
1481 int temp[16]; //FIXME check if this is a good idea
1482 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1483 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1486 const int offset= y_offset[i];
1487 const int z0= block[offset+stride*0] + block[offset+stride*4];
1488 const int z1= block[offset+stride*0] - block[offset+stride*4];
1489 const int z2= block[offset+stride*1] - block[offset+stride*5];
1490 const int z3= block[offset+stride*1] + block[offset+stride*5];
1499 const int offset= x_offset[i];
1500 const int z0= temp[4*0+i] + temp[4*2+i];
1501 const int z1= temp[4*0+i] - temp[4*2+i];
1502 const int z2= temp[4*1+i] - temp[4*3+i];
1503 const int z3= temp[4*1+i] + temp[4*3+i];
1505 block[stride*0 +offset]= (z0 + z3)>>1;
1506 block[stride*2 +offset]= (z1 + z2)>>1;
1507 block[stride*8 +offset]= (z1 - z2)>>1;
1508 block[stride*10+offset]= (z0 - z3)>>1;
1516 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1517 const int stride= 16*2;
1518 const int xStride= 16;
1521 a= block[stride*0 + xStride*0];
1522 b= block[stride*0 + xStride*1];
1523 c= block[stride*1 + xStride*0];
1524 d= block[stride*1 + xStride*1];
1531 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1532 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1533 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1534 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1538 static void chroma_dc_dct_c(DCTELEM *block){
1539 const int stride= 16*2;
1540 const int xStride= 16;
1543 a= block[stride*0 + xStride*0];
1544 b= block[stride*0 + xStride*1];
1545 c= block[stride*1 + xStride*0];
1546 d= block[stride*1 + xStride*1];
1553 block[stride*0 + xStride*0]= (a+c);
1554 block[stride*0 + xStride*1]= (e+b);
1555 block[stride*1 + xStride*0]= (a-c);
1556 block[stride*1 + xStride*1]= (e-b);
1561 * gets the chroma qp.
1563 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1564 return h->pps.chroma_qp_table[t][qscale];
1567 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1568 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1569 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1571 const int * const quant_table= quant_coeff[qscale];
1572 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1573 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1574 const unsigned int threshold2= (threshold1<<1);
1580 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1581 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1582 const unsigned int dc_threshold2= (dc_threshold1<<1);
1584 int level= block[0]*quant_coeff[qscale+18][0];
1585 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1587 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1590 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1593 // last_non_zero = i;
1598 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1599 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1600 const unsigned int dc_threshold2= (dc_threshold1<<1);
1602 int level= block[0]*quant_table[0];
1603 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1605 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1608 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1611 // last_non_zero = i;
1624 const int j= scantable[i];
1625 int level= block[j]*quant_table[j];
1627 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1628 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1629 if(((unsigned)(level+threshold1))>threshold2){
1631 level= (bias + level)>>QUANT_SHIFT;
1634 level= (bias - level)>>QUANT_SHIFT;
1643 return last_non_zero;
1646 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1647 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1648 int src_x_offset, int src_y_offset,
1649 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1650 MpegEncContext * const s = &h->s;
1651 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1652 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1653 const int luma_xy= (mx&3) + ((my&3)<<2);
1654 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1655 uint8_t * src_cb, * src_cr;
1656 int extra_width= h->emu_edge_width;
1657 int extra_height= h->emu_edge_height;
1659 const int full_mx= mx>>2;
1660 const int full_my= my>>2;
1661 const int pic_width = 16*s->mb_width;
1662 const int pic_height = 16*s->mb_height >> MB_FIELD;
1664 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1667 if(mx&7) extra_width -= 3;
1668 if(my&7) extra_height -= 3;
1670 if( full_mx < 0-extra_width
1671 || full_my < 0-extra_height
1672 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1673 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1674 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1675 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1679 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1681 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1684 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1687 // chroma offset when predicting from a field of opposite parity
1688 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1689 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1691 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1692 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1695 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1696 src_cb= s->edge_emu_buffer;
1698 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1701 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1702 src_cr= s->edge_emu_buffer;
1704 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1707 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1708 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1709 int x_offset, int y_offset,
1710 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1711 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1712 int list0, int list1){
1713 MpegEncContext * const s = &h->s;
1714 qpel_mc_func *qpix_op= qpix_put;
1715 h264_chroma_mc_func chroma_op= chroma_put;
1717 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1718 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1719 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1720 x_offset += 8*s->mb_x;
1721 y_offset += 8*(s->mb_y >> MB_FIELD);
1724 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1725 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1726 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1727 qpix_op, chroma_op);
1730 chroma_op= chroma_avg;
1734 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1735 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1736 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1737 qpix_op, chroma_op);
1741 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1742 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1743 int x_offset, int y_offset,
1744 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1745 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1746 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1747 int list0, int list1){
1748 MpegEncContext * const s = &h->s;
1750 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1751 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1752 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1753 x_offset += 8*s->mb_x;
1754 y_offset += 8*(s->mb_y >> MB_FIELD);
1757 /* don't optimize for luma-only case, since B-frames usually
1758 * use implicit weights => chroma too. */
1759 uint8_t *tmp_cb = s->obmc_scratchpad;
1760 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1761 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1762 int refn0 = h->ref_cache[0][ scan8[n] ];
1763 int refn1 = h->ref_cache[1][ scan8[n] ];
1765 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1766 dest_y, dest_cb, dest_cr,
1767 x_offset, y_offset, qpix_put, chroma_put);
1768 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1769 tmp_y, tmp_cb, tmp_cr,
1770 x_offset, y_offset, qpix_put, chroma_put);
1772 if(h->use_weight == 2){
1773 int weight0 = h->implicit_weight[refn0][refn1];
1774 int weight1 = 64 - weight0;
1775 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1776 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1777 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1779 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1780 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1781 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1782 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1783 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1784 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1785 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1786 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1787 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1790 int list = list1 ? 1 : 0;
1791 int refn = h->ref_cache[list][ scan8[n] ];
1792 Picture *ref= &h->ref_list[list][refn];
1793 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1794 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1795 qpix_put, chroma_put);
1797 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1798 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1799 if(h->use_weight_chroma){
1800 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1801 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1802 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1803 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1808 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1809 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1810 int x_offset, int y_offset,
1811 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1812 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1813 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1814 int list0, int list1){
1815 if((h->use_weight==2 && list0 && list1
1816 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1817 || h->use_weight==1)
1818 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1819 x_offset, y_offset, qpix_put, chroma_put,
1820 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1822 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1823 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1826 static inline void prefetch_motion(H264Context *h, int list){
1827 /* fetch pixels for estimated mv 4 macroblocks ahead
1828 * optimized for 64byte cache lines */
1829 MpegEncContext * const s = &h->s;
1830 const int refn = h->ref_cache[list][scan8[0]];
1832 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1833 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1834 uint8_t **src= h->ref_list[list][refn].data;
1835 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1836 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1837 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1838 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1842 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1843 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1844 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1845 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1846 MpegEncContext * const s = &h->s;
1847 const int mb_xy= h->mb_xy;
1848 const int mb_type= s->current_picture.mb_type[mb_xy];
1850 assert(IS_INTER(mb_type));
1852 prefetch_motion(h, 0);
1854 if(IS_16X16(mb_type)){
1855 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1856 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1857 &weight_op[0], &weight_avg[0],
1858 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1859 }else if(IS_16X8(mb_type)){
1860 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1861 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1862 &weight_op[1], &weight_avg[1],
1863 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1864 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1865 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1866 &weight_op[1], &weight_avg[1],
1867 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1868 }else if(IS_8X16(mb_type)){
1869 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1870 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1871 &weight_op[2], &weight_avg[2],
1872 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1873 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1874 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1875 &weight_op[2], &weight_avg[2],
1876 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1880 assert(IS_8X8(mb_type));
1883 const int sub_mb_type= h->sub_mb_type[i];
1885 int x_offset= (i&1)<<2;
1886 int y_offset= (i&2)<<1;
1888 if(IS_SUB_8X8(sub_mb_type)){
1889 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1890 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1891 &weight_op[3], &weight_avg[3],
1892 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1893 }else if(IS_SUB_8X4(sub_mb_type)){
1894 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1895 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1896 &weight_op[4], &weight_avg[4],
1897 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1898 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1899 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1900 &weight_op[4], &weight_avg[4],
1901 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1902 }else if(IS_SUB_4X8(sub_mb_type)){
1903 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1904 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1905 &weight_op[5], &weight_avg[5],
1906 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1907 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1908 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1909 &weight_op[5], &weight_avg[5],
1910 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1913 assert(IS_SUB_4X4(sub_mb_type));
1915 int sub_x_offset= x_offset + 2*(j&1);
1916 int sub_y_offset= y_offset + (j&2);
1917 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1918 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1919 &weight_op[6], &weight_avg[6],
1920 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1926 prefetch_motion(h, 1);
1929 static av_cold void decode_init_vlc(void){
1930 static int done = 0;
1937 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1938 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1939 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1940 &chroma_dc_coeff_token_len [0], 1, 1,
1941 &chroma_dc_coeff_token_bits[0], 1, 1,
1942 INIT_VLC_USE_NEW_STATIC);
1946 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1947 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1948 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1949 &coeff_token_len [i][0], 1, 1,
1950 &coeff_token_bits[i][0], 1, 1,
1951 INIT_VLC_USE_NEW_STATIC);
1952 offset += coeff_token_vlc_tables_size[i];
1955 * This is a one time safety check to make sure that
1956 * the packed static coeff_token_vlc table sizes
1957 * were initialized correctly.
1959 assert(offset == sizeof(coeff_token_vlc_tables)/(sizeof(VLC_TYPE)*2));
1962 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1963 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1964 init_vlc(&chroma_dc_total_zeros_vlc[i],
1965 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1966 &chroma_dc_total_zeros_len [i][0], 1, 1,
1967 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1968 INIT_VLC_USE_NEW_STATIC);
1970 for(i=0; i<15; i++){
1971 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1972 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1973 init_vlc(&total_zeros_vlc[i],
1974 TOTAL_ZEROS_VLC_BITS, 16,
1975 &total_zeros_len [i][0], 1, 1,
1976 &total_zeros_bits[i][0], 1, 1,
1977 INIT_VLC_USE_NEW_STATIC);
1981 run_vlc[i].table = run_vlc_tables[i];
1982 run_vlc[i].table_allocated = run_vlc_tables_size;
1983 init_vlc(&run_vlc[i],
1985 &run_len [i][0], 1, 1,
1986 &run_bits[i][0], 1, 1,
1987 INIT_VLC_USE_NEW_STATIC);
1989 run7_vlc.table = run7_vlc_table,
1990 run7_vlc.table_allocated = run7_vlc_table_size;
1991 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1992 &run_len [6][0], 1, 1,
1993 &run_bits[6][0], 1, 1,
1994 INIT_VLC_USE_NEW_STATIC);
1998 static void free_tables(H264Context *h){
2001 av_freep(&h->intra4x4_pred_mode);
2002 av_freep(&h->chroma_pred_mode_table);
2003 av_freep(&h->cbp_table);
2004 av_freep(&h->mvd_table[0]);
2005 av_freep(&h->mvd_table[1]);
2006 av_freep(&h->direct_table);
2007 av_freep(&h->non_zero_count);
2008 av_freep(&h->slice_table_base);
2009 h->slice_table= NULL;
2011 av_freep(&h->mb2b_xy);
2012 av_freep(&h->mb2b8_xy);
2014 for(i = 0; i < MAX_SPS_COUNT; i++)
2015 av_freep(h->sps_buffers + i);
2017 for(i = 0; i < MAX_PPS_COUNT; i++)
2018 av_freep(h->pps_buffers + i);
2020 for(i = 0; i < h->s.avctx->thread_count; i++) {
2021 hx = h->thread_context[i];
2023 av_freep(&hx->top_borders[1]);
2024 av_freep(&hx->top_borders[0]);
2025 av_freep(&hx->s.obmc_scratchpad);
2029 static void init_dequant8_coeff_table(H264Context *h){
2031 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2032 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2033 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2035 for(i=0; i<2; i++ ){
2036 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2037 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2041 for(q=0; q<52; q++){
2042 int shift = ff_div6[q];
2043 int idx = ff_rem6[q];
2045 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2046 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2047 h->pps.scaling_matrix8[i][x]) << shift;
2052 static void init_dequant4_coeff_table(H264Context *h){
2054 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2055 for(i=0; i<6; i++ ){
2056 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2058 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2059 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2066 for(q=0; q<52; q++){
2067 int shift = ff_div6[q] + 2;
2068 int idx = ff_rem6[q];
2070 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2071 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2072 h->pps.scaling_matrix4[i][x]) << shift;
2077 static void init_dequant_tables(H264Context *h){
2079 init_dequant4_coeff_table(h);
2080 if(h->pps.transform_8x8_mode)
2081 init_dequant8_coeff_table(h);
2082 if(h->sps.transform_bypass){
2085 h->dequant4_coeff[i][0][x] = 1<<6;
2086 if(h->pps.transform_8x8_mode)
2089 h->dequant8_coeff[i][0][x] = 1<<6;
2096 * needs width/height
2098 static int alloc_tables(H264Context *h){
2099 MpegEncContext * const s = &h->s;
2100 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2103 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2105 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2106 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2107 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2109 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2110 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2111 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2112 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2114 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2115 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2117 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2118 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2119 for(y=0; y<s->mb_height; y++){
2120 for(x=0; x<s->mb_width; x++){
2121 const int mb_xy= x + y*s->mb_stride;
2122 const int b_xy = 4*x + 4*y*h->b_stride;
2123 const int b8_xy= 2*x + 2*y*h->b8_stride;
2125 h->mb2b_xy [mb_xy]= b_xy;
2126 h->mb2b8_xy[mb_xy]= b8_xy;
2130 s->obmc_scratchpad = NULL;
2132 if(!h->dequant4_coeff[0])
2133 init_dequant_tables(h);
2142 * Mimic alloc_tables(), but for every context thread.
2144 static void clone_tables(H264Context *dst, H264Context *src){
2145 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2146 dst->non_zero_count = src->non_zero_count;
2147 dst->slice_table = src->slice_table;
2148 dst->cbp_table = src->cbp_table;
2149 dst->mb2b_xy = src->mb2b_xy;
2150 dst->mb2b8_xy = src->mb2b8_xy;
2151 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2152 dst->mvd_table[0] = src->mvd_table[0];
2153 dst->mvd_table[1] = src->mvd_table[1];
2154 dst->direct_table = src->direct_table;
2156 dst->s.obmc_scratchpad = NULL;
2157 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2162 * Allocate buffers which are not shared amongst multiple threads.
2164 static int context_init(H264Context *h){
2165 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2166 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2170 return -1; // free_tables will clean up for us
2173 static av_cold void common_init(H264Context *h){
2174 MpegEncContext * const s = &h->s;
2176 s->width = s->avctx->width;
2177 s->height = s->avctx->height;
2178 s->codec_id= s->avctx->codec->id;
2180 ff_h264_pred_init(&h->hpc, s->codec_id);
2182 h->dequant_coeff_pps= -1;
2183 s->unrestricted_mv=1;
2184 s->decode=1; //FIXME
2186 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2187 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2190 static av_cold int decode_init(AVCodecContext *avctx){
2191 H264Context *h= avctx->priv_data;
2192 MpegEncContext * const s = &h->s;
2194 MPV_decode_defaults(s);
2199 s->out_format = FMT_H264;
2200 s->workaround_bugs= avctx->workaround_bugs;
2203 // s->decode_mb= ff_h263_decode_mb;
2204 s->quarter_sample = 1;
2207 if(avctx->codec_id == CODEC_ID_SVQ3)
2208 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2210 avctx->pix_fmt= PIX_FMT_YUV420P;
2214 if(avctx->extradata_size > 0 && avctx->extradata &&
2215 *(char *)avctx->extradata == 1){
2222 h->thread_context[0] = h;
2223 h->outputed_poc = INT_MIN;
2227 static int frame_start(H264Context *h){
2228 MpegEncContext * const s = &h->s;
2231 if(MPV_frame_start(s, s->avctx) < 0)
2233 ff_er_frame_start(s);
2235 * MPV_frame_start uses pict_type to derive key_frame.
2236 * This is incorrect for H.264; IDR markings must be used.
2237 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2238 * See decode_nal_units().
2240 s->current_picture_ptr->key_frame= 0;
2242 assert(s->linesize && s->uvlinesize);
2244 for(i=0; i<16; i++){
2245 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2246 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2249 h->block_offset[16+i]=
2250 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2251 h->block_offset[24+16+i]=
2252 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2255 /* can't be in alloc_tables because linesize isn't known there.
2256 * FIXME: redo bipred weight to not require extra buffer? */
2257 for(i = 0; i < s->avctx->thread_count; i++)
2258 if(!h->thread_context[i]->s.obmc_scratchpad)
2259 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2261 /* some macroblocks will be accessed before they're available */
2262 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2263 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2265 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2267 // We mark the current picture as non-reference after allocating it, so
2268 // that if we break out due to an error it can be released automatically
2269 // in the next MPV_frame_start().
2270 // SVQ3 as well as most other codecs have only last/next/current and thus
2271 // get released even with set reference, besides SVQ3 and others do not
2272 // mark frames as reference later "naturally".
2273 if(s->codec_id != CODEC_ID_SVQ3)
2274 s->current_picture_ptr->reference= 0;
2276 s->current_picture_ptr->field_poc[0]=
2277 s->current_picture_ptr->field_poc[1]= INT_MAX;
2278 assert(s->current_picture_ptr->long_ref==0);
2283 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2284 MpegEncContext * const s = &h->s;
2288 src_cb -= uvlinesize;
2289 src_cr -= uvlinesize;
2291 // There are two lines saved, the line above the the top macroblock of a pair,
2292 // and the line above the bottom macroblock
2293 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2294 for(i=1; i<17; i++){
2295 h->left_border[i]= src_y[15+i* linesize];
2298 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2299 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2301 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2302 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2303 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2305 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2306 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2308 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2309 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2313 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2314 MpegEncContext * const s = &h->s;
2321 if(h->deblocking_filter == 2) {
2323 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2324 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2326 deblock_left = (s->mb_x > 0);
2327 deblock_top = (s->mb_y > 0);
2330 src_y -= linesize + 1;
2331 src_cb -= uvlinesize + 1;
2332 src_cr -= uvlinesize + 1;
2334 #define XCHG(a,b,t,xchg)\
2341 for(i = !deblock_top; i<17; i++){
2342 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2347 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2348 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2349 if(s->mb_x+1 < s->mb_width){
2350 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2354 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2356 for(i = !deblock_top; i<9; i++){
2357 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2358 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2362 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2363 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2368 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2369 MpegEncContext * const s = &h->s;
2372 src_y -= 2 * linesize;
2373 src_cb -= 2 * uvlinesize;
2374 src_cr -= 2 * uvlinesize;
2376 // There are two lines saved, the line above the the top macroblock of a pair,
2377 // and the line above the bottom macroblock
2378 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2379 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2380 for(i=2; i<34; i++){
2381 h->left_border[i]= src_y[15+i* linesize];
2384 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2385 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2386 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2387 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2389 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2390 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2391 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2392 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2393 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2394 for(i=2; i<18; i++){
2395 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2396 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2398 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2399 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2400 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2401 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2405 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2406 MpegEncContext * const s = &h->s;
2409 int deblock_left = (s->mb_x > 0);
2410 int deblock_top = (s->mb_y > 1);
2412 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2414 src_y -= 2 * linesize + 1;
2415 src_cb -= 2 * uvlinesize + 1;
2416 src_cr -= 2 * uvlinesize + 1;
2418 #define XCHG(a,b,t,xchg)\
2425 for(i = (!deblock_top)<<1; i<34; i++){
2426 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2431 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2432 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2433 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2434 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2435 if(s->mb_x+1 < s->mb_width){
2436 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2437 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2441 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2443 for(i = (!deblock_top) << 1; i<18; i++){
2444 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2445 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2449 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2450 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2451 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2452 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2457 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2458 MpegEncContext * const s = &h->s;
2459 const int mb_x= s->mb_x;
2460 const int mb_y= s->mb_y;
2461 const int mb_xy= h->mb_xy;
2462 const int mb_type= s->current_picture.mb_type[mb_xy];
2463 uint8_t *dest_y, *dest_cb, *dest_cr;
2464 int linesize, uvlinesize /*dct_offset*/;
2466 int *block_offset = &h->block_offset[0];
2467 const unsigned int bottom = mb_y & 1;
2468 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2469 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2470 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2472 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2473 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2474 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2476 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2477 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2479 if (!simple && MB_FIELD) {
2480 linesize = h->mb_linesize = s->linesize * 2;
2481 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2482 block_offset = &h->block_offset[24];
2483 if(mb_y&1){ //FIXME move out of this function?
2484 dest_y -= s->linesize*15;
2485 dest_cb-= s->uvlinesize*7;
2486 dest_cr-= s->uvlinesize*7;
2490 for(list=0; list<h->list_count; list++){
2491 if(!USES_LIST(mb_type, list))
2493 if(IS_16X16(mb_type)){
2494 int8_t *ref = &h->ref_cache[list][scan8[0]];
2495 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2497 for(i=0; i<16; i+=4){
2498 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2499 int ref = h->ref_cache[list][scan8[i]];
2501 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2507 linesize = h->mb_linesize = s->linesize;
2508 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2509 // dct_offset = s->linesize * 16;
2512 if(transform_bypass){
2514 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2515 }else if(IS_8x8DCT(mb_type)){
2516 idct_dc_add = s->dsp.h264_idct8_dc_add;
2517 idct_add = s->dsp.h264_idct8_add;
2519 idct_dc_add = s->dsp.h264_idct_dc_add;
2520 idct_add = s->dsp.h264_idct_add;
2523 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2524 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2525 int mbt_y = mb_y&~1;
2526 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2527 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2528 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2529 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2532 if (!simple && IS_INTRA_PCM(mb_type)) {
2533 for (i=0; i<16; i++) {
2534 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2536 for (i=0; i<8; i++) {
2537 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2538 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2541 if(IS_INTRA(mb_type)){
2542 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2543 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2545 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2546 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2547 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2550 if(IS_INTRA4x4(mb_type)){
2551 if(simple || !s->encoding){
2552 if(IS_8x8DCT(mb_type)){
2553 for(i=0; i<16; i+=4){
2554 uint8_t * const ptr= dest_y + block_offset[i];
2555 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2556 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2557 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2558 (h->topright_samples_available<<i)&0x4000, linesize);
2560 if(nnz == 1 && h->mb[i*16])
2561 idct_dc_add(ptr, h->mb + i*16, linesize);
2563 idct_add(ptr, h->mb + i*16, linesize);
2567 for(i=0; i<16; i++){
2568 uint8_t * const ptr= dest_y + block_offset[i];
2570 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2573 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2574 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2575 assert(mb_y || linesize <= block_offset[i]);
2576 if(!topright_avail){
2577 tr= ptr[3 - linesize]*0x01010101;
2578 topright= (uint8_t*) &tr;
2580 topright= ptr + 4 - linesize;
2584 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2585 nnz = h->non_zero_count_cache[ scan8[i] ];
2588 if(nnz == 1 && h->mb[i*16])
2589 idct_dc_add(ptr, h->mb + i*16, linesize);
2591 idct_add(ptr, h->mb + i*16, linesize);
2593 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2598 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2600 if(!transform_bypass)
2601 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2603 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2605 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2606 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2608 hl_motion(h, dest_y, dest_cb, dest_cr,
2609 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2610 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2611 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2615 if(!IS_INTRA4x4(mb_type)){
2617 if(IS_INTRA16x16(mb_type)){
2618 for(i=0; i<16; i++){
2619 if(h->non_zero_count_cache[ scan8[i] ])
2620 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2621 else if(h->mb[i*16])
2622 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2625 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2626 for(i=0; i<16; i+=di){
2627 int nnz = h->non_zero_count_cache[ scan8[i] ];
2629 if(nnz==1 && h->mb[i*16])
2630 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2632 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2637 for(i=0; i<16; i++){
2638 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2639 uint8_t * const ptr= dest_y + block_offset[i];
2640 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2646 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2647 uint8_t *dest[2] = {dest_cb, dest_cr};
2648 if(transform_bypass){
2649 idct_add = idct_dc_add = s->dsp.add_pixels4;
2651 idct_add = s->dsp.h264_idct_add;
2652 idct_dc_add = s->dsp.h264_idct_dc_add;
2653 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2654 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2657 for(i=16; i<16+8; i++){
2658 if(h->non_zero_count_cache[ scan8[i] ])
2659 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2660 else if(h->mb[i*16])
2661 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2664 for(i=16; i<16+8; i++){
2665 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2666 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2667 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2673 if(h->deblocking_filter) {
2674 if (!simple && FRAME_MBAFF) {
2675 //FIXME try deblocking one mb at a time?
2676 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2677 const int mb_y = s->mb_y - 1;
2678 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2679 const int mb_xy= mb_x + mb_y*s->mb_stride;
2680 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2681 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2682 if (!bottom) return;
2683 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2684 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2685 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2687 if(IS_INTRA(mb_type_top | mb_type_bottom))
2688 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2690 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2693 s->mb_y--; h->mb_xy -= s->mb_stride;
2694 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2695 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2696 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2697 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2698 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2700 s->mb_y++; h->mb_xy += s->mb_stride;
2701 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2702 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2703 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2704 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2705 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2707 tprintf(h->s.avctx, "call filter_mb\n");
2708 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2709 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2710 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2711 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2712 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2718 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2720 static void hl_decode_mb_simple(H264Context *h){
2721 hl_decode_mb_internal(h, 1);
2725 * Process a macroblock; this handles edge cases, such as interlacing.
2727 static void av_noinline hl_decode_mb_complex(H264Context *h){
2728 hl_decode_mb_internal(h, 0);
2731 static void hl_decode_mb(H264Context *h){
2732 MpegEncContext * const s = &h->s;
2733 const int mb_xy= h->mb_xy;
2734 const int mb_type= s->current_picture.mb_type[mb_xy];
2735 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2736 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2738 if(ENABLE_H264_ENCODER && !s->decode)
2742 hl_decode_mb_complex(h);
2743 else hl_decode_mb_simple(h);
2746 static void pic_as_field(Picture *pic, const int parity){
2748 for (i = 0; i < 4; ++i) {
2749 if (parity == PICT_BOTTOM_FIELD)
2750 pic->data[i] += pic->linesize[i];
2751 pic->reference = parity;
2752 pic->linesize[i] *= 2;
2754 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2757 static int split_field_copy(Picture *dest, Picture *src,
2758 int parity, int id_add){
2759 int match = !!(src->reference & parity);
2763 if(parity != PICT_FRAME){
2764 pic_as_field(dest, parity);
2766 dest->pic_id += id_add;
2773 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2777 while(i[0]<len || i[1]<len){
2778 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2780 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2783 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2784 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2787 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2788 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2795 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2800 best_poc= dir ? INT_MIN : INT_MAX;
2802 for(i=0; i<len; i++){
2803 const int poc= src[i]->poc;
2804 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2806 sorted[out_i]= src[i];
2809 if(best_poc == (dir ? INT_MIN : INT_MAX))
2811 limit= sorted[out_i++]->poc - dir;
2817 * fills the default_ref_list.
2819 static int fill_default_ref_list(H264Context *h){
2820 MpegEncContext * const s = &h->s;
2823 if(h->slice_type_nos==FF_B_TYPE){
2824 Picture *sorted[32];
2829 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2831 cur_poc= s->current_picture_ptr->poc;
2833 for(list= 0; list<2; list++){
2834 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2835 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2837 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2838 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2841 if(len < h->ref_count[list])
2842 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2846 if(lens[0] == lens[1] && lens[1] > 1){
2847 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2849 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2852 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2853 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2855 if(len < h->ref_count[0])
2856 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2859 for (i=0; i<h->ref_count[0]; i++) {
2860 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2862 if(h->slice_type_nos==FF_B_TYPE){
2863 for (i=0; i<h->ref_count[1]; i++) {
2864 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2871 static void print_short_term(H264Context *h);
2872 static void print_long_term(H264Context *h);
2875 * Extract structure information about the picture described by pic_num in
2876 * the current decoding context (frame or field). Note that pic_num is
2877 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2878 * @param pic_num picture number for which to extract structure information
2879 * @param structure one of PICT_XXX describing structure of picture
2881 * @return frame number (short term) or long term index of picture
2882 * described by pic_num
2884 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2885 MpegEncContext * const s = &h->s;
2887 *structure = s->picture_structure;
2890 /* opposite field */
2891 *structure ^= PICT_FRAME;
2898 static int decode_ref_pic_list_reordering(H264Context *h){
2899 MpegEncContext * const s = &h->s;
2900 int list, index, pic_structure;
2902 print_short_term(h);
2905 for(list=0; list<h->list_count; list++){
2906 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2908 if(get_bits1(&s->gb)){
2909 int pred= h->curr_pic_num;
2911 for(index=0; ; index++){
2912 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2913 unsigned int pic_id;
2915 Picture *ref = NULL;
2917 if(reordering_of_pic_nums_idc==3)
2920 if(index >= h->ref_count[list]){
2921 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2925 if(reordering_of_pic_nums_idc<3){
2926 if(reordering_of_pic_nums_idc<2){
2927 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2930 if(abs_diff_pic_num > h->max_pic_num){
2931 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2935 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2936 else pred+= abs_diff_pic_num;
2937 pred &= h->max_pic_num - 1;
2939 frame_num = pic_num_extract(h, pred, &pic_structure);
2941 for(i= h->short_ref_count-1; i>=0; i--){
2942 ref = h->short_ref[i];
2943 assert(ref->reference);
2944 assert(!ref->long_ref);
2946 ref->frame_num == frame_num &&
2947 (ref->reference & pic_structure)
2955 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2957 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2960 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2963 ref = h->long_ref[long_idx];
2964 assert(!(ref && !ref->reference));
2965 if(ref && (ref->reference & pic_structure)){
2966 ref->pic_id= pic_id;
2967 assert(ref->long_ref);
2975 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2976 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2978 for(i=index; i+1<h->ref_count[list]; i++){
2979 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2982 for(; i > index; i--){
2983 h->ref_list[list][i]= h->ref_list[list][i-1];
2985 h->ref_list[list][index]= *ref;
2987 pic_as_field(&h->ref_list[list][index], pic_structure);
2991 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2997 for(list=0; list<h->list_count; list++){
2998 for(index= 0; index < h->ref_count[list]; index++){
2999 if(!h->ref_list[list][index].data[0]){
3000 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
3001 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
3006 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3007 direct_dist_scale_factor(h);
3008 direct_ref_list_init(h);
3012 static void fill_mbaff_ref_list(H264Context *h){
3014 for(list=0; list<2; list++){ //FIXME try list_count
3015 for(i=0; i<h->ref_count[list]; i++){
3016 Picture *frame = &h->ref_list[list][i];
3017 Picture *field = &h->ref_list[list][16+2*i];
3020 field[0].linesize[j] <<= 1;
3021 field[0].reference = PICT_TOP_FIELD;
3022 field[1] = field[0];
3024 field[1].data[j] += frame->linesize[j];
3025 field[1].reference = PICT_BOTTOM_FIELD;
3027 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3028 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3030 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3031 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3035 for(j=0; j<h->ref_count[1]; j++){
3036 for(i=0; i<h->ref_count[0]; i++)
3037 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3038 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3039 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3043 static int pred_weight_table(H264Context *h){
3044 MpegEncContext * const s = &h->s;
3046 int luma_def, chroma_def;
3049 h->use_weight_chroma= 0;
3050 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3051 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3052 luma_def = 1<<h->luma_log2_weight_denom;
3053 chroma_def = 1<<h->chroma_log2_weight_denom;
3055 for(list=0; list<2; list++){
3056 for(i=0; i<h->ref_count[list]; i++){
3057 int luma_weight_flag, chroma_weight_flag;
3059 luma_weight_flag= get_bits1(&s->gb);
3060 if(luma_weight_flag){
3061 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3062 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3063 if( h->luma_weight[list][i] != luma_def
3064 || h->luma_offset[list][i] != 0)
3067 h->luma_weight[list][i]= luma_def;
3068 h->luma_offset[list][i]= 0;
3072 chroma_weight_flag= get_bits1(&s->gb);
3073 if(chroma_weight_flag){
3076 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3077 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3078 if( h->chroma_weight[list][i][j] != chroma_def
3079 || h->chroma_offset[list][i][j] != 0)
3080 h->use_weight_chroma= 1;
3085 h->chroma_weight[list][i][j]= chroma_def;
3086 h->chroma_offset[list][i][j]= 0;
3091 if(h->slice_type_nos != FF_B_TYPE) break;
3093 h->use_weight= h->use_weight || h->use_weight_chroma;
3097 static void implicit_weight_table(H264Context *h){
3098 MpegEncContext * const s = &h->s;
3100 int cur_poc = s->current_picture_ptr->poc;
3102 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3103 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3105 h->use_weight_chroma= 0;
3110 h->use_weight_chroma= 2;
3111 h->luma_log2_weight_denom= 5;
3112 h->chroma_log2_weight_denom= 5;
3114 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3115 int poc0 = h->ref_list[0][ref0].poc;
3116 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3117 int poc1 = h->ref_list[1][ref1].poc;
3118 int td = av_clip(poc1 - poc0, -128, 127);
3120 int tb = av_clip(cur_poc - poc0, -128, 127);
3121 int tx = (16384 + (FFABS(td) >> 1)) / td;
3122 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3123 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3124 h->implicit_weight[ref0][ref1] = 32;
3126 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3128 h->implicit_weight[ref0][ref1] = 32;
3134 * Mark a picture as no longer needed for reference. The refmask
3135 * argument allows unreferencing of individual fields or the whole frame.
3136 * If the picture becomes entirely unreferenced, but is being held for
3137 * display purposes, it is marked as such.
3138 * @param refmask mask of fields to unreference; the mask is bitwise
3139 * anded with the reference marking of pic
3140 * @return non-zero if pic becomes entirely unreferenced (except possibly
3141 * for display purposes) zero if one of the fields remains in
3144 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3146 if (pic->reference &= refmask) {
3149 for(i = 0; h->delayed_pic[i]; i++)
3150 if(pic == h->delayed_pic[i]){
3151 pic->reference=DELAYED_PIC_REF;
3159 * instantaneous decoder refresh.
3161 static void idr(H264Context *h){
3164 for(i=0; i<16; i++){
3165 remove_long(h, i, 0);
3167 assert(h->long_ref_count==0);
3169 for(i=0; i<h->short_ref_count; i++){
3170 unreference_pic(h, h->short_ref[i], 0);
3171 h->short_ref[i]= NULL;
3173 h->short_ref_count=0;
3174 h->prev_frame_num= 0;
3175 h->prev_frame_num_offset= 0;
3180 /* forget old pics after a seek */
3181 static void flush_dpb(AVCodecContext *avctx){
3182 H264Context *h= avctx->priv_data;
3184 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3185 if(h->delayed_pic[i])
3186 h->delayed_pic[i]->reference= 0;
3187 h->delayed_pic[i]= NULL;
3189 h->outputed_poc= INT_MIN;
3191 if(h->s.current_picture_ptr)
3192 h->s.current_picture_ptr->reference= 0;
3193 h->s.first_field= 0;
3194 ff_mpeg_flush(avctx);
3198 * Find a Picture in the short term reference list by frame number.
3199 * @param frame_num frame number to search for
3200 * @param idx the index into h->short_ref where returned picture is found
3201 * undefined if no picture found.
3202 * @return pointer to the found picture, or NULL if no pic with the provided
3203 * frame number is found
3205 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3206 MpegEncContext * const s = &h->s;
3209 for(i=0; i<h->short_ref_count; i++){
3210 Picture *pic= h->short_ref[i];
3211 if(s->avctx->debug&FF_DEBUG_MMCO)
3212 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3213 if(pic->frame_num == frame_num) {
3222 * Remove a picture from the short term reference list by its index in
3223 * that list. This does no checking on the provided index; it is assumed
3224 * to be valid. Other list entries are shifted down.
3225 * @param i index into h->short_ref of picture to remove.
3227 static void remove_short_at_index(H264Context *h, int i){
3228 assert(i >= 0 && i < h->short_ref_count);
3229 h->short_ref[i]= NULL;
3230 if (--h->short_ref_count)
3231 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3236 * @return the removed picture or NULL if an error occurs
3238 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3239 MpegEncContext * const s = &h->s;
3243 if(s->avctx->debug&FF_DEBUG_MMCO)
3244 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3246 pic = find_short(h, frame_num, &i);
3248 if(unreference_pic(h, pic, ref_mask))
3249 remove_short_at_index(h, i);
3256 * Remove a picture from the long term reference list by its index in
3258 * @return the removed picture or NULL if an error occurs
3260 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3263 pic= h->long_ref[i];
3265 if(unreference_pic(h, pic, ref_mask)){
3266 assert(h->long_ref[i]->long_ref == 1);
3267 h->long_ref[i]->long_ref= 0;
3268 h->long_ref[i]= NULL;
3269 h->long_ref_count--;
3277 * print short term list
3279 static void print_short_term(H264Context *h) {
3281 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3282 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3283 for(i=0; i<h->short_ref_count; i++){
3284 Picture *pic= h->short_ref[i];
3285 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3291 * print long term list
3293 static void print_long_term(H264Context *h) {
3295 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3296 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3297 for(i = 0; i < 16; i++){
3298 Picture *pic= h->long_ref[i];
3300 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3307 * Executes the reference picture marking (memory management control operations).
3309 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3310 MpegEncContext * const s = &h->s;
3312 int current_ref_assigned=0;
3315 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3316 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3318 for(i=0; i<mmco_count; i++){
3319 int structure, frame_num;
3320 if(s->avctx->debug&FF_DEBUG_MMCO)
3321 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3323 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3324 || mmco[i].opcode == MMCO_SHORT2LONG){
3325 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3326 pic = find_short(h, frame_num, &j);
3328 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3329 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3330 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3335 switch(mmco[i].opcode){
3336 case MMCO_SHORT2UNUSED:
3337 if(s->avctx->debug&FF_DEBUG_MMCO)
3338 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3339 remove_short(h, frame_num, structure ^ PICT_FRAME);
3341 case MMCO_SHORT2LONG:
3342 if (h->long_ref[mmco[i].long_arg] != pic)
3343 remove_long(h, mmco[i].long_arg, 0);
3345 remove_short_at_index(h, j);
3346 h->long_ref[ mmco[i].long_arg ]= pic;
3347 if (h->long_ref[ mmco[i].long_arg ]){
3348 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3349 h->long_ref_count++;
3352 case MMCO_LONG2UNUSED:
3353 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3354 pic = h->long_ref[j];
3356 remove_long(h, j, structure ^ PICT_FRAME);
3357 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3358 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3361 // Comment below left from previous code as it is an interresting note.
3362 /* First field in pair is in short term list or
3363 * at a different long term index.
3364 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3365 * Report the problem and keep the pair where it is,
3366 * and mark this field valid.
3369 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3370 remove_long(h, mmco[i].long_arg, 0);
3372 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3373 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3374 h->long_ref_count++;
3377 s->current_picture_ptr->reference |= s->picture_structure;
3378 current_ref_assigned=1;
3380 case MMCO_SET_MAX_LONG:
3381 assert(mmco[i].long_arg <= 16);
3382 // just remove the long term which index is greater than new max
3383 for(j = mmco[i].long_arg; j<16; j++){
3384 remove_long(h, j, 0);
3388 while(h->short_ref_count){
3389 remove_short(h, h->short_ref[0]->frame_num, 0);
3391 for(j = 0; j < 16; j++) {
3392 remove_long(h, j, 0);
3394 s->current_picture_ptr->poc=
3395 s->current_picture_ptr->field_poc[0]=
3396 s->current_picture_ptr->field_poc[1]=
3400 s->current_picture_ptr->frame_num= 0;
3406 if (!current_ref_assigned) {
3407 /* Second field of complementary field pair; the first field of
3408 * which is already referenced. If short referenced, it
3409 * should be first entry in short_ref. If not, it must exist
3410 * in long_ref; trying to put it on the short list here is an
3411 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3413 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3414 /* Just mark the second field valid */
3415 s->current_picture_ptr->reference = PICT_FRAME;
3416 } else if (s->current_picture_ptr->long_ref) {
3417 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3418 "assignment for second field "
3419 "in complementary field pair "
3420 "(first field is long term)\n");
3422 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3424 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3427 if(h->short_ref_count)
3428 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3430 h->short_ref[0]= s->current_picture_ptr;
3431 h->short_ref_count++;
3432 s->current_picture_ptr->reference |= s->picture_structure;
3436 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3438 /* We have too many reference frames, probably due to corrupted
3439 * stream. Need to discard one frame. Prevents overrun of the
3440 * short_ref and long_ref buffers.
3442 av_log(h->s.avctx, AV_LOG_ERROR,
3443 "number of reference frames exceeds max (probably "
3444 "corrupt input), discarding one\n");
3446 if (h->long_ref_count && !h->short_ref_count) {
3447 for (i = 0; i < 16; ++i)
3452 remove_long(h, i, 0);
3454 pic = h->short_ref[h->short_ref_count - 1];
3455 remove_short(h, pic->frame_num, 0);
3459 print_short_term(h);
3464 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3465 MpegEncContext * const s = &h->s;
3469 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3470 s->broken_link= get_bits1(gb) -1;
3472 h->mmco[0].opcode= MMCO_LONG;
3473 h->mmco[0].long_arg= 0;
3477 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3478 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3479 MMCOOpcode opcode= get_ue_golomb(gb);
3481 h->mmco[i].opcode= opcode;
3482 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3483 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3484 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3485 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3489 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3490 unsigned int long_arg= get_ue_golomb(gb);
3491 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3492 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3495 h->mmco[i].long_arg= long_arg;
3498 if(opcode > (unsigned)MMCO_LONG){
3499 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3502 if(opcode == MMCO_END)
3507 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3509 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3510 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3511 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3512 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3514 if (FIELD_PICTURE) {
3515 h->mmco[0].short_pic_num *= 2;
3516 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3517 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3527 static int init_poc(H264Context *h){
3528 MpegEncContext * const s = &h->s;
3529 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3531 Picture *cur = s->current_picture_ptr;
3533 h->frame_num_offset= h->prev_frame_num_offset;
3534 if(h->frame_num < h->prev_frame_num)
3535 h->frame_num_offset += max_frame_num;
3537 if(h->sps.poc_type==0){
3538 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3540 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3541 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3542 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3543 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3545 h->poc_msb = h->prev_poc_msb;
3546 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3548 field_poc[1] = h->poc_msb + h->poc_lsb;
3549 if(s->picture_structure == PICT_FRAME)
3550 field_poc[1] += h->delta_poc_bottom;
3551 }else if(h->sps.poc_type==1){
3552 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3555 if(h->sps.poc_cycle_length != 0)
3556 abs_frame_num = h->frame_num_offset + h->frame_num;
3560 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3563 expected_delta_per_poc_cycle = 0;
3564 for(i=0; i < h->sps.poc_cycle_length; i++)
3565 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3567 if(abs_frame_num > 0){
3568 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3569 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3571 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3572 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3573 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3577 if(h->nal_ref_idc == 0)
3578 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3580 field_poc[0] = expectedpoc + h->delta_poc[0];
3581 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3583 if(s->picture_structure == PICT_FRAME)
3584 field_poc[1] += h->delta_poc[1];
3586 int poc= 2*(h->frame_num_offset + h->frame_num);
3595 if(s->picture_structure != PICT_BOTTOM_FIELD)
3596 s->current_picture_ptr->field_poc[0]= field_poc[0];
3597 if(s->picture_structure != PICT_TOP_FIELD)
3598 s->current_picture_ptr->field_poc[1]= field_poc[1];
3599 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3606 * initialize scan tables
3608 static void init_scan_tables(H264Context *h){
3609 MpegEncContext * const s = &h->s;
3611 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3612 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3613 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3615 for(i=0; i<16; i++){
3616 #define T(x) (x>>2) | ((x<<2) & 0xF)
3617 h->zigzag_scan[i] = T(zigzag_scan[i]);
3618 h-> field_scan[i] = T( field_scan[i]);
3622 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3623 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3624 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3625 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3626 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3628 for(i=0; i<64; i++){
3629 #define T(x) (x>>3) | ((x&7)<<3)
3630 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3631 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3632 h->field_scan8x8[i] = T(field_scan8x8[i]);
3633 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3637 if(h->sps.transform_bypass){ //FIXME same ugly
3638 h->zigzag_scan_q0 = zigzag_scan;
3639 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3640 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3641 h->field_scan_q0 = field_scan;
3642 h->field_scan8x8_q0 = field_scan8x8;
3643 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3645 h->zigzag_scan_q0 = h->zigzag_scan;
3646 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3647 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3648 h->field_scan_q0 = h->field_scan;
3649 h->field_scan8x8_q0 = h->field_scan8x8;
3650 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3655 * Replicates H264 "master" context to thread contexts.
3657 static void clone_slice(H264Context *dst, H264Context *src)
3659 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3660 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3661 dst->s.current_picture = src->s.current_picture;
3662 dst->s.linesize = src->s.linesize;
3663 dst->s.uvlinesize = src->s.uvlinesize;
3664 dst->s.first_field = src->s.first_field;
3666 dst->prev_poc_msb = src->prev_poc_msb;
3667 dst->prev_poc_lsb = src->prev_poc_lsb;
3668 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3669 dst->prev_frame_num = src->prev_frame_num;
3670 dst->short_ref_count = src->short_ref_count;
3672 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3673 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3674 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3675 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3677 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3678 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3682 * decodes a slice header.
3683 * This will also call MPV_common_init() and frame_start() as needed.
3685 * @param h h264context
3686 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3688 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3690 static int decode_slice_header(H264Context *h, H264Context *h0){
3691 MpegEncContext * const s = &h->s;
3692 MpegEncContext * const s0 = &h0->s;
3693 unsigned int first_mb_in_slice;
3694 unsigned int pps_id;
3695 int num_ref_idx_active_override_flag;
3696 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3697 unsigned int slice_type, tmp, i, j;
3698 int default_ref_list_done = 0;
3699 int last_pic_structure;
3701 s->dropable= h->nal_ref_idc == 0;
3703 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3704 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3705 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3707 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3708 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3711 first_mb_in_slice= get_ue_golomb(&s->gb);
3713 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3714 h0->current_slice = 0;
3715 if (!s0->first_field)
3716 s->current_picture_ptr= NULL;
3719 slice_type= get_ue_golomb(&s->gb);
3721 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3726 h->slice_type_fixed=1;
3728 h->slice_type_fixed=0;
3730 slice_type= slice_type_map[ slice_type ];
3731 if (slice_type == FF_I_TYPE
3732 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3733 default_ref_list_done = 1;
3735 h->slice_type= slice_type;
3736 h->slice_type_nos= slice_type & 3;
3738 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3739 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3740 av_log(h->s.avctx, AV_LOG_ERROR,
3741 "B picture before any references, skipping\n");
3745 pps_id= get_ue_golomb(&s->gb);
3746 if(pps_id>=MAX_PPS_COUNT){
3747 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3750 if(!h0->pps_buffers[pps_id]) {
3751 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3754 h->pps= *h0->pps_buffers[pps_id];
3756 if(!h0->sps_buffers[h->pps.sps_id]) {
3757 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3760 h->sps = *h0->sps_buffers[h->pps.sps_id];
3762 if(h == h0 && h->dequant_coeff_pps != pps_id){
3763 h->dequant_coeff_pps = pps_id;
3764 init_dequant_tables(h);
3767 s->mb_width= h->sps.mb_width;
3768 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3770 h->b_stride= s->mb_width*4;
3771 h->b8_stride= s->mb_width*2;
3773 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3774 if(h->sps.frame_mbs_only_flag)
3775 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3777 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3779 if (s->context_initialized
3780 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3782 return -1; // width / height changed during parallelized decoding
3786 if (!s->context_initialized) {
3788 return -1; // we cant (re-)initialize context during parallel decoding
3789 if (MPV_common_init(s) < 0)
3793 init_scan_tables(h);
3796 for(i = 1; i < s->avctx->thread_count; i++) {
3798 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3799 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3800 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3803 init_scan_tables(c);
3807 for(i = 0; i < s->avctx->thread_count; i++)
3808 if(context_init(h->thread_context[i]) < 0)
3811 s->avctx->width = s->width;
3812 s->avctx->height = s->height;
3813 s->avctx->sample_aspect_ratio= h->sps.sar;
3814 if(!s->avctx->sample_aspect_ratio.den)
3815 s->avctx->sample_aspect_ratio.den = 1;
3817 if(h->sps.timing_info_present_flag){
3818 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3819 if(h->x264_build > 0 && h->x264_build < 44)
3820 s->avctx->time_base.den *= 2;
3821 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3822 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3826 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3829 h->mb_aff_frame = 0;
3830 last_pic_structure = s0->picture_structure;
3831 if(h->sps.frame_mbs_only_flag){
3832 s->picture_structure= PICT_FRAME;
3834 if(get_bits1(&s->gb)) { //field_pic_flag
3835 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3837 s->picture_structure= PICT_FRAME;
3838 h->mb_aff_frame = h->sps.mb_aff;
3841 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3843 if(h0->current_slice == 0){
3844 while(h->frame_num != h->prev_frame_num &&
3845 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3846 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3848 h->prev_frame_num++;
3849 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3850 s->current_picture_ptr->frame_num= h->prev_frame_num;
3851 execute_ref_pic_marking(h, NULL, 0);
3854 /* See if we have a decoded first field looking for a pair... */
3855 if (s0->first_field) {
3856 assert(s0->current_picture_ptr);
3857 assert(s0->current_picture_ptr->data[0]);
3858 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3860 /* figure out if we have a complementary field pair */
3861 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3863 * Previous field is unmatched. Don't display it, but let it
3864 * remain for reference if marked as such.
3866 s0->current_picture_ptr = NULL;
3867 s0->first_field = FIELD_PICTURE;
3870 if (h->nal_ref_idc &&
3871 s0->current_picture_ptr->reference &&
3872 s0->current_picture_ptr->frame_num != h->frame_num) {
3874 * This and previous field were reference, but had
3875 * different frame_nums. Consider this field first in
3876 * pair. Throw away previous field except for reference
3879 s0->first_field = 1;
3880 s0->current_picture_ptr = NULL;
3883 /* Second field in complementary pair */
3884 s0->first_field = 0;
3889 /* Frame or first field in a potentially complementary pair */
3890 assert(!s0->current_picture_ptr);
3891 s0->first_field = FIELD_PICTURE;
3894 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3895 s0->first_field = 0;
3902 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3904 assert(s->mb_num == s->mb_width * s->mb_height);
3905 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3906 first_mb_in_slice >= s->mb_num){
3907 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3910 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3911 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3912 if (s->picture_structure == PICT_BOTTOM_FIELD)
3913 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3914 assert(s->mb_y < s->mb_height);
3916 if(s->picture_structure==PICT_FRAME){
3917 h->curr_pic_num= h->frame_num;
3918 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3920 h->curr_pic_num= 2*h->frame_num + 1;
3921 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3924 if(h->nal_unit_type == NAL_IDR_SLICE){
3925 get_ue_golomb(&s->gb); /* idr_pic_id */
3928 if(h->sps.poc_type==0){
3929 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3931 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3932 h->delta_poc_bottom= get_se_golomb(&s->gb);
3936 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3937 h->delta_poc[0]= get_se_golomb(&s->gb);
3939 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3940 h->delta_poc[1]= get_se_golomb(&s->gb);
3945 if(h->pps.redundant_pic_cnt_present){
3946 h->redundant_pic_count= get_ue_golomb(&s->gb);
3949 //set defaults, might be overridden a few lines later
3950 h->ref_count[0]= h->pps.ref_count[0];
3951 h->ref_count[1]= h->pps.ref_count[1];
3953 if(h->slice_type_nos != FF_I_TYPE){
3954 if(h->slice_type_nos == FF_B_TYPE){
3955 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3957 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3959 if(num_ref_idx_active_override_flag){
3960 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3961 if(h->slice_type_nos==FF_B_TYPE)
3962 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3964 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3965 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3966 h->ref_count[0]= h->ref_count[1]= 1;
3970 if(h->slice_type_nos == FF_B_TYPE)
3977 if(!default_ref_list_done){
3978 fill_default_ref_list(h);
3981 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3984 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3985 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3986 pred_weight_table(h);
3987 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3988 implicit_weight_table(h);
3993 decode_ref_pic_marking(h0, &s->gb);
3996 fill_mbaff_ref_list(h);
3998 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3999 tmp = get_ue_golomb(&s->gb);
4001 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4004 h->cabac_init_idc= tmp;
4007 h->last_qscale_diff = 0;
4008 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4010 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4014 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4015 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4016 //FIXME qscale / qp ... stuff
4017 if(h->slice_type == FF_SP_TYPE){
4018 get_bits1(&s->gb); /* sp_for_switch_flag */
4020 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4021 get_se_golomb(&s->gb); /* slice_qs_delta */
4024 h->deblocking_filter = 1;
4025 h->slice_alpha_c0_offset = 0;
4026 h->slice_beta_offset = 0;
4027 if( h->pps.deblocking_filter_parameters_present ) {
4028 tmp= get_ue_golomb(&s->gb);
4030 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4033 h->deblocking_filter= tmp;
4034 if(h->deblocking_filter < 2)
4035 h->deblocking_filter^= 1; // 1<->0
4037 if( h->deblocking_filter ) {
4038 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4039 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4043 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4044 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4045 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4046 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4047 h->deblocking_filter= 0;
4049 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4050 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4051 /* Cheat slightly for speed:
4052 Do not bother to deblock across slices. */
4053 h->deblocking_filter = 2;
4055 h0->max_contexts = 1;
4056 if(!h0->single_decode_warning) {
4057 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4058 h0->single_decode_warning = 1;
4061 return 1; // deblocking switched inside frame
4066 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4067 slice_group_change_cycle= get_bits(&s->gb, ?);
4070 h0->last_slice_type = slice_type;
4071 h->slice_num = ++h0->current_slice;
4074 int *ref2frm= h->ref2frm[h->slice_num&15][j];
4078 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4079 +(h->ref_list[j][i].reference&3);
4082 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4083 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4085 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4086 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4088 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4090 av_get_pict_type_char(h->slice_type),
4091 pps_id, h->frame_num,
4092 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4093 h->ref_count[0], h->ref_count[1],
4095 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4097 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4098 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4108 static inline int get_level_prefix(GetBitContext *gb){
4112 OPEN_READER(re, gb);
4113 UPDATE_CACHE(re, gb);
4114 buf=GET_CACHE(re, gb);
4116 log= 32 - av_log2(buf);
4118 print_bin(buf>>(32-log), log);
4119 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4122 LAST_SKIP_BITS(re, gb, log);
4123 CLOSE_READER(re, gb);
4128 static inline int get_dct8x8_allowed(H264Context *h){
4131 if(!IS_SUB_8X8(h->sub_mb_type[i])
4132 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4139 * decodes a residual block.
4140 * @param n block index
4141 * @param scantable scantable
4142 * @param max_coeff number of coefficients in the block
4143 * @return <0 if an error occurred
4145 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4146 MpegEncContext * const s = &h->s;
4147 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4149 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4151 //FIXME put trailing_onex into the context
4153 if(n == CHROMA_DC_BLOCK_INDEX){
4154 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4155 total_coeff= coeff_token>>2;
4157 if(n == LUMA_DC_BLOCK_INDEX){
4158 total_coeff= pred_non_zero_count(h, 0);
4159 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4160 total_coeff= coeff_token>>2;
4162 total_coeff= pred_non_zero_count(h, n);
4163 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4164 total_coeff= coeff_token>>2;
4165 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4169 //FIXME set last_non_zero?
4173 if(total_coeff > (unsigned)max_coeff) {
4174 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4178 trailing_ones= coeff_token&3;
4179 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4180 assert(total_coeff<=16);
4182 for(i=0; i<trailing_ones; i++){
4183 level[i]= 1 - 2*get_bits1(gb);
4187 int level_code, mask;
4188 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4189 int prefix= get_level_prefix(gb);
4191 //first coefficient has suffix_length equal to 0 or 1
4192 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4194 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4196 level_code= (prefix<<suffix_length); //part
4197 }else if(prefix==14){
4199 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4201 level_code= prefix + get_bits(gb, 4); //part
4203 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4204 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4206 level_code += (1<<(prefix-3))-4096;
4209 if(trailing_ones < 3) level_code += 2;
4214 mask= -(level_code&1);
4215 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4218 //remaining coefficients have suffix_length > 0
4219 for(;i<total_coeff;i++) {
4220 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4221 prefix = get_level_prefix(gb);
4223 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4225 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4227 level_code += (1<<(prefix-3))-4096;
4229 mask= -(level_code&1);
4230 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4231 if(level_code > suffix_limit[suffix_length])
4236 if(total_coeff == max_coeff)
4239 if(n == CHROMA_DC_BLOCK_INDEX)
4240 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4242 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4245 coeff_num = zeros_left + total_coeff - 1;
4246 j = scantable[coeff_num];
4248 block[j] = level[0];
4249 for(i=1;i<total_coeff;i++) {
4252 else if(zeros_left < 7){
4253 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4255 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4257 zeros_left -= run_before;
4258 coeff_num -= 1 + run_before;
4259 j= scantable[ coeff_num ];
4264 block[j] = (level[0] * qmul[j] + 32)>>6;
4265 for(i=1;i<total_coeff;i++) {
4268 else if(zeros_left < 7){
4269 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4271 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4273 zeros_left -= run_before;
4274 coeff_num -= 1 + run_before;
4275 j= scantable[ coeff_num ];
4277 block[j]= (level[i] * qmul[j] + 32)>>6;
4282 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4289 static void predict_field_decoding_flag(H264Context *h){
4290 MpegEncContext * const s = &h->s;
4291 const int mb_xy= h->mb_xy;
4292 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4293 ? s->current_picture.mb_type[mb_xy-1]
4294 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4295 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4297 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4301 * decodes a P_SKIP or B_SKIP macroblock
4303 static void decode_mb_skip(H264Context *h){
4304 MpegEncContext * const s = &h->s;
4305 const int mb_xy= h->mb_xy;
4308 memset(h->non_zero_count[mb_xy], 0, 16);
4309 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4312 mb_type|= MB_TYPE_INTERLACED;
4314 if( h->slice_type_nos == FF_B_TYPE )
4316 // just for fill_caches. pred_direct_motion will set the real mb_type
4317 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4319 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4320 pred_direct_motion(h, &mb_type);
4321 mb_type|= MB_TYPE_SKIP;
4326 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4328 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4329 pred_pskip_motion(h, &mx, &my);
4330 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4331 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4334 write_back_motion(h, mb_type);
4335 s->current_picture.mb_type[mb_xy]= mb_type;
4336 s->current_picture.qscale_table[mb_xy]= s->qscale;
4337 h->slice_table[ mb_xy ]= h->slice_num;
4338 h->prev_mb_skipped= 1;
4342 * decodes a macroblock
4343 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4345 static int decode_mb_cavlc(H264Context *h){
4346 MpegEncContext * const s = &h->s;
4348 int partition_count;
4349 unsigned int mb_type, cbp;
4350 int dct8x8_allowed= h->pps.transform_8x8_mode;
4352 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4354 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4356 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4357 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4359 if(h->slice_type_nos != FF_I_TYPE){
4360 if(s->mb_skip_run==-1)
4361 s->mb_skip_run= get_ue_golomb(&s->gb);
4363 if (s->mb_skip_run--) {
4364 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4365 if(s->mb_skip_run==0)
4366 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4368 predict_field_decoding_flag(h);
4375 if( (s->mb_y&1) == 0 )
4376 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4379 h->prev_mb_skipped= 0;
4381 mb_type= get_ue_golomb(&s->gb);
4382 if(h->slice_type_nos == FF_B_TYPE){
4384 partition_count= b_mb_type_info[mb_type].partition_count;
4385 mb_type= b_mb_type_info[mb_type].type;
4388 goto decode_intra_mb;
4390 }else if(h->slice_type_nos == FF_P_TYPE){
4392 partition_count= p_mb_type_info[mb_type].partition_count;
4393 mb_type= p_mb_type_info[mb_type].type;
4396 goto decode_intra_mb;
4399 assert(h->slice_type_nos == FF_I_TYPE);
4400 if(h->slice_type == FF_SI_TYPE && mb_type)
4404 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4408 cbp= i_mb_type_info[mb_type].cbp;
4409 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4410 mb_type= i_mb_type_info[mb_type].type;
4414 mb_type |= MB_TYPE_INTERLACED;
4416 h->slice_table[ mb_xy ]= h->slice_num;
4418 if(IS_INTRA_PCM(mb_type)){
4421 // We assume these blocks are very rare so we do not optimize it.
4422 align_get_bits(&s->gb);
4424 // The pixels are stored in the same order as levels in h->mb array.
4425 for(x=0; x < (CHROMA ? 384 : 256); x++){
4426 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4429 // In deblocking, the quantizer is 0
4430 s->current_picture.qscale_table[mb_xy]= 0;
4431 // All coeffs are present
4432 memset(h->non_zero_count[mb_xy], 16, 16);
4434 s->current_picture.mb_type[mb_xy]= mb_type;
4439 h->ref_count[0] <<= 1;
4440 h->ref_count[1] <<= 1;
4443 fill_caches(h, mb_type, 0);
4446 if(IS_INTRA(mb_type)){
4448 // init_top_left_availability(h);
4449 if(IS_INTRA4x4(mb_type)){
4452 if(dct8x8_allowed && get_bits1(&s->gb)){
4453 mb_type |= MB_TYPE_8x8DCT;
4457 // fill_intra4x4_pred_table(h);
4458 for(i=0; i<16; i+=di){
4459 int mode= pred_intra_mode(h, i);
4461 if(!get_bits1(&s->gb)){
4462 const int rem_mode= get_bits(&s->gb, 3);
4463 mode = rem_mode + (rem_mode >= mode);
4467 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4469 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4471 write_back_intra_pred_mode(h);
4472 if( check_intra4x4_pred_mode(h) < 0)
4475 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4476 if(h->intra16x16_pred_mode < 0)
4480 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4483 h->chroma_pred_mode= pred_mode;
4485 }else if(partition_count==4){
4486 int i, j, sub_partition_count[4], list, ref[2][4];
4488 if(h->slice_type_nos == FF_B_TYPE){
4490 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4491 if(h->sub_mb_type[i] >=13){
4492 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4495 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4496 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4498 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4499 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4500 pred_direct_motion(h, &mb_type);
4501 h->ref_cache[0][scan8[4]] =
4502 h->ref_cache[1][scan8[4]] =
4503 h->ref_cache[0][scan8[12]] =
4504 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4507 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4509 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4510 if(h->sub_mb_type[i] >=4){
4511 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4514 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4515 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4519 for(list=0; list<h->list_count; list++){
4520 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4522 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4523 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4524 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4526 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4538 dct8x8_allowed = get_dct8x8_allowed(h);
4540 for(list=0; list<h->list_count; list++){
4542 if(IS_DIRECT(h->sub_mb_type[i])) {
4543 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4546 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4547 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4549 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4550 const int sub_mb_type= h->sub_mb_type[i];
4551 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4552 for(j=0; j<sub_partition_count[i]; j++){
4554 const int index= 4*i + block_width*j;
4555 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4556 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4557 mx += get_se_golomb(&s->gb);
4558 my += get_se_golomb(&s->gb);
4559 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4561 if(IS_SUB_8X8(sub_mb_type)){
4563 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4565 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4566 }else if(IS_SUB_8X4(sub_mb_type)){
4567 mv_cache[ 1 ][0]= mx;
4568 mv_cache[ 1 ][1]= my;
4569 }else if(IS_SUB_4X8(sub_mb_type)){
4570 mv_cache[ 8 ][0]= mx;
4571 mv_cache[ 8 ][1]= my;
4573 mv_cache[ 0 ][0]= mx;
4574 mv_cache[ 0 ][1]= my;
4577 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4583 }else if(IS_DIRECT(mb_type)){
4584 pred_direct_motion(h, &mb_type);
4585 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4587 int list, mx, my, i;
4588 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4589 if(IS_16X16(mb_type)){
4590 for(list=0; list<h->list_count; list++){
4592 if(IS_DIR(mb_type, 0, list)){
4593 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4594 if(val >= h->ref_count[list]){
4595 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4599 val= LIST_NOT_USED&0xFF;
4600 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4602 for(list=0; list<h->list_count; list++){
4604 if(IS_DIR(mb_type, 0, list)){
4605 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4606 mx += get_se_golomb(&s->gb);
4607 my += get_se_golomb(&s->gb);
4608 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4610 val= pack16to32(mx,my);
4613 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4616 else if(IS_16X8(mb_type)){
4617 for(list=0; list<h->list_count; list++){
4620 if(IS_DIR(mb_type, i, list)){
4621 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4622 if(val >= h->ref_count[list]){
4623 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4627 val= LIST_NOT_USED&0xFF;
4628 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4631 for(list=0; list<h->list_count; list++){
4634 if(IS_DIR(mb_type, i, list)){
4635 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4636 mx += get_se_golomb(&s->gb);
4637 my += get_se_golomb(&s->gb);
4638 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4640 val= pack16to32(mx,my);
4643 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4647 assert(IS_8X16(mb_type));
4648 for(list=0; list<h->list_count; list++){
4651 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4652 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4653 if(val >= h->ref_count[list]){
4654 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4658 val= LIST_NOT_USED&0xFF;
4659 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4662 for(list=0; list<h->list_count; list++){
4665 if(IS_DIR(mb_type, i, list)){
4666 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4667 mx += get_se_golomb(&s->gb);
4668 my += get_se_golomb(&s->gb);
4669 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4671 val= pack16to32(mx,my);
4674 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4680 if(IS_INTER(mb_type))
4681 write_back_motion(h, mb_type);
4683 if(!IS_INTRA16x16(mb_type)){
4684 cbp= get_ue_golomb(&s->gb);
4686 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4691 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4692 else cbp= golomb_to_inter_cbp [cbp];
4694 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4695 else cbp= golomb_to_inter_cbp_gray[cbp];
4700 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4701 if(get_bits1(&s->gb)){
4702 mb_type |= MB_TYPE_8x8DCT;
4703 h->cbp_table[mb_xy]= cbp;
4706 s->current_picture.mb_type[mb_xy]= mb_type;
4708 if(cbp || IS_INTRA16x16(mb_type)){
4709 int i8x8, i4x4, chroma_idx;
4711 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4712 const uint8_t *scan, *scan8x8, *dc_scan;
4714 // fill_non_zero_count_cache(h);
4716 if(IS_INTERLACED(mb_type)){
4717 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4718 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4719 dc_scan= luma_dc_field_scan;
4721 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4722 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4723 dc_scan= luma_dc_zigzag_scan;
4726 dquant= get_se_golomb(&s->gb);
4728 if( dquant > 25 || dquant < -26 ){
4729 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4733 s->qscale += dquant;
4734 if(((unsigned)s->qscale) > 51){
4735 if(s->qscale<0) s->qscale+= 52;
4736 else s->qscale-= 52;
4739 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4740 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4741 if(IS_INTRA16x16(mb_type)){
4742 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4743 return -1; //FIXME continue if partitioned and other return -1 too
4746 assert((cbp&15) == 0 || (cbp&15) == 15);
4749 for(i8x8=0; i8x8<4; i8x8++){
4750 for(i4x4=0; i4x4<4; i4x4++){
4751 const int index= i4x4 + 4*i8x8;
4752 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4758 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4761 for(i8x8=0; i8x8<4; i8x8++){
4762 if(cbp & (1<<i8x8)){
4763 if(IS_8x8DCT(mb_type)){
4764 DCTELEM *buf = &h->mb[64*i8x8];
4766 for(i4x4=0; i4x4<4; i4x4++){
4767 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4768 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4771 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4772 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4774 for(i4x4=0; i4x4<4; i4x4++){
4775 const int index= i4x4 + 4*i8x8;
4777 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4783 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4784 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4790 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4791 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4797 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4798 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4799 for(i4x4=0; i4x4<4; i4x4++){
4800 const int index= 16 + 4*chroma_idx + i4x4;
4801 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4807 uint8_t * const nnz= &h->non_zero_count_cache[0];
4808 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4809 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4812 uint8_t * const nnz= &h->non_zero_count_cache[0];
4813 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4814 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4815 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4817 s->current_picture.qscale_table[mb_xy]= s->qscale;
4818 write_back_non_zero_count(h);
4821 h->ref_count[0] >>= 1;
4822 h->ref_count[1] >>= 1;
4828 static int decode_cabac_field_decoding_flag(H264Context *h) {
4829 MpegEncContext * const s = &h->s;
4830 const int mb_x = s->mb_x;
4831 const int mb_y = s->mb_y & ~1;
4832 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4833 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4835 unsigned int ctx = 0;
4837 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4840 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4844 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4847 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4848 uint8_t *state= &h->cabac_state[ctx_base];
4852 MpegEncContext * const s = &h->s;
4853 const int mba_xy = h->left_mb_xy[0];
4854 const int mbb_xy = h->top_mb_xy;
4856 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4858 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4860 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4861 return 0; /* I4x4 */
4864 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4865 return 0; /* I4x4 */
4868 if( get_cabac_terminate( &h->cabac ) )
4869 return 25; /* PCM */
4871 mb_type = 1; /* I16x16 */
4872 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4873 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4874 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4875 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4876 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4880 static int decode_cabac_mb_type( H264Context *h ) {
4881 MpegEncContext * const s = &h->s;
4883 if( h->slice_type_nos == FF_I_TYPE ) {
4884 return decode_cabac_intra_mb_type(h, 3, 1);
4885 } else if( h->slice_type_nos == FF_P_TYPE ) {
4886 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4888 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4889 /* P_L0_D16x16, P_8x8 */
4890 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4892 /* P_L0_D8x16, P_L0_D16x8 */
4893 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4896 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4898 } else if( h->slice_type_nos == FF_B_TYPE ) {
4899 const int mba_xy = h->left_mb_xy[0];
4900 const int mbb_xy = h->top_mb_xy;
4904 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4906 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4909 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4910 return 0; /* B_Direct_16x16 */
4912 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4913 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4916 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4917 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4918 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4919 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4921 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4922 else if( bits == 13 ) {
4923 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4924 } else if( bits == 14 )
4925 return 11; /* B_L1_L0_8x16 */
4926 else if( bits == 15 )
4927 return 22; /* B_8x8 */
4929 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4930 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4932 /* TODO SI/SP frames? */
4937 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4938 MpegEncContext * const s = &h->s;
4942 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4943 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4946 && h->slice_table[mba_xy] == h->slice_num
4947 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4948 mba_xy += s->mb_stride;
4950 mbb_xy = mb_xy - s->mb_stride;
4952 && h->slice_table[mbb_xy] == h->slice_num
4953 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4954 mbb_xy -= s->mb_stride;
4956 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4958 int mb_xy = h->mb_xy;
4960 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4963 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4965 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4968 if( h->slice_type_nos == FF_B_TYPE )
4970 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4973 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4976 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4979 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4980 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4981 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4983 if( mode >= pred_mode )
4989 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4990 const int mba_xy = h->left_mb_xy[0];
4991 const int mbb_xy = h->top_mb_xy;
4995 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4996 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4999 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5002 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5005 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5007 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5013 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5014 int cbp_b, cbp_a, ctx, cbp = 0;
5016 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5017 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5019 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5020 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5021 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5022 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5023 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5024 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5025 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5026 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5029 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5033 cbp_a = (h->left_cbp>>4)&0x03;
5034 cbp_b = (h-> top_cbp>>4)&0x03;
5037 if( cbp_a > 0 ) ctx++;
5038 if( cbp_b > 0 ) ctx += 2;
5039 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5043 if( cbp_a == 2 ) ctx++;
5044 if( cbp_b == 2 ) ctx += 2;
5045 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5047 static int decode_cabac_mb_dqp( H264Context *h) {
5051 if( h->last_qscale_diff != 0 )
5054 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5060 if(val > 102) //prevent infinite loop
5067 return -(val + 1)/2;
5069 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5070 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5072 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5074 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5078 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5080 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5081 return 0; /* B_Direct_8x8 */
5082 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5083 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5085 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5086 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5087 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5090 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5091 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5095 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5096 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5099 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5100 int refa = h->ref_cache[list][scan8[n] - 1];
5101 int refb = h->ref_cache[list][scan8[n] - 8];
5105 if( h->slice_type_nos == FF_B_TYPE) {
5106 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5108 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5117 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5123 if(ref >= 32 /*h->ref_list[list]*/){
5124 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5125 return 0; //FIXME we should return -1 and check the return everywhere
5131 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5132 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5133 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5134 int ctxbase = (l == 0) ? 40 : 47;
5139 else if( amvd > 32 )
5144 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5149 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5157 while( get_cabac_bypass( &h->cabac ) ) {
5161 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5166 if( get_cabac_bypass( &h->cabac ) )
5170 return get_cabac_bypass_sign( &h->cabac, -mvd );
5173 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5179 nza = h->left_cbp&0x100;
5180 nzb = h-> top_cbp&0x100;
5182 nza = (h->left_cbp>>(6+idx))&0x01;
5183 nzb = (h-> top_cbp>>(6+idx))&0x01;
5187 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5188 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5190 assert(cat == 1 || cat == 2);
5191 nza = h->non_zero_count_cache[scan8[idx] - 1];
5192 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5202 return ctx + 4 * cat;
5205 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5206 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5207 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5208 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5209 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5212 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5213 static const int significant_coeff_flag_offset[2][6] = {
5214 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5215 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5217 static const int last_coeff_flag_offset[2][6] = {
5218 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5219 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5221 static const int coeff_abs_level_m1_offset[6] = {
5222 227+0, 227+10, 227+20, 227+30, 227+39, 426
5224 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5225 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5226 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5227 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5228 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5229 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5230 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5231 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5232 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5234 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5235 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5236 * map node ctx => cabac ctx for level=1 */
5237 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5238 /* map node ctx => cabac ctx for level>1 */
5239 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5240 static const uint8_t coeff_abs_level_transition[2][8] = {
5241 /* update node ctx after decoding a level=1 */
5242 { 1, 2, 3, 3, 4, 5, 6, 7 },
5243 /* update node ctx after decoding a level>1 */
5244 { 4, 4, 4, 4, 5, 6, 7, 7 }
5250 int coeff_count = 0;
5253 uint8_t *significant_coeff_ctx_base;
5254 uint8_t *last_coeff_ctx_base;
5255 uint8_t *abs_level_m1_ctx_base;
5258 #define CABAC_ON_STACK
5260 #ifdef CABAC_ON_STACK
5263 cc.range = h->cabac.range;
5264 cc.low = h->cabac.low;
5265 cc.bytestream= h->cabac.bytestream;
5267 #define CC &h->cabac
5271 /* cat: 0-> DC 16x16 n = 0
5272 * 1-> AC 16x16 n = luma4x4idx
5273 * 2-> Luma4x4 n = luma4x4idx
5274 * 3-> DC Chroma n = iCbCr
5275 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5276 * 5-> Luma8x8 n = 4 * luma8x8idx
5279 /* read coded block flag */
5280 if( is_dc || cat != 5 ) {
5281 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5284 h->non_zero_count_cache[scan8[16+n]] = 0;
5286 h->non_zero_count_cache[scan8[n]] = 0;
5289 #ifdef CABAC_ON_STACK
5290 h->cabac.range = cc.range ;
5291 h->cabac.low = cc.low ;
5292 h->cabac.bytestream= cc.bytestream;
5298 significant_coeff_ctx_base = h->cabac_state
5299 + significant_coeff_flag_offset[MB_FIELD][cat];
5300 last_coeff_ctx_base = h->cabac_state
5301 + last_coeff_flag_offset[MB_FIELD][cat];
5302 abs_level_m1_ctx_base = h->cabac_state
5303 + coeff_abs_level_m1_offset[cat];
5305 if( !is_dc && cat == 5 ) {
5306 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5307 for(last= 0; last < coefs; last++) { \
5308 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5309 if( get_cabac( CC, sig_ctx )) { \
5310 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5311 index[coeff_count++] = last; \
5312 if( get_cabac( CC, last_ctx ) ) { \
5318 if( last == max_coeff -1 ) {\
5319 index[coeff_count++] = last;\
5321 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5322 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5323 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5325 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5327 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5329 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5332 assert(coeff_count > 0);
5336 h->cbp_table[h->mb_xy] |= 0x100;
5338 h->cbp_table[h->mb_xy] |= 0x40 << n;
5341 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5343 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5345 assert( cat == 1 || cat == 2 );
5346 h->non_zero_count_cache[scan8[n]] = coeff_count;
5351 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5353 int j= scantable[index[--coeff_count]];
5355 if( get_cabac( CC, ctx ) == 0 ) {
5356 node_ctx = coeff_abs_level_transition[0][node_ctx];
5358 block[j] = get_cabac_bypass_sign( CC, -1);
5360 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5364 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5365 node_ctx = coeff_abs_level_transition[1][node_ctx];
5367 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5371 if( coeff_abs >= 15 ) {
5373 while( get_cabac_bypass( CC ) ) {
5379 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5385 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5387 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5390 } while( coeff_count );
5391 #ifdef CABAC_ON_STACK
5392 h->cabac.range = cc.range ;
5393 h->cabac.low = cc.low ;
5394 h->cabac.bytestream= cc.bytestream;
5399 #ifndef CONFIG_SMALL
5400 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5401 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5404 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5405 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5409 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5411 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5413 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5414 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5418 static inline void compute_mb_neighbors(H264Context *h)
5420 MpegEncContext * const s = &h->s;
5421 const int mb_xy = h->mb_xy;
5422 h->top_mb_xy = mb_xy - s->mb_stride;
5423 h->left_mb_xy[0] = mb_xy - 1;
5425 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5426 const int top_pair_xy = pair_xy - s->mb_stride;
5427 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5428 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5429 const int curr_mb_frame_flag = !MB_FIELD;
5430 const int bottom = (s->mb_y & 1);
5432 ? !curr_mb_frame_flag // bottom macroblock
5433 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5435 h->top_mb_xy -= s->mb_stride;
5437 if (left_mb_frame_flag != curr_mb_frame_flag) {
5438 h->left_mb_xy[0] = pair_xy - 1;
5440 } else if (FIELD_PICTURE) {
5441 h->top_mb_xy -= s->mb_stride;
5447 * decodes a macroblock
5448 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5450 static int decode_mb_cabac(H264Context *h) {
5451 MpegEncContext * const s = &h->s;
5453 int mb_type, partition_count, cbp = 0;
5454 int dct8x8_allowed= h->pps.transform_8x8_mode;
5456 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5458 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5460 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5461 if( h->slice_type_nos != FF_I_TYPE ) {
5463 /* a skipped mb needs the aff flag from the following mb */
5464 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5465 predict_field_decoding_flag(h);
5466 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5467 skip = h->next_mb_skipped;
5469 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5470 /* read skip flags */
5472 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5473 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5474 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5475 if(h->next_mb_skipped)
5476 predict_field_decoding_flag(h);
5478 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5483 h->cbp_table[mb_xy] = 0;
5484 h->chroma_pred_mode_table[mb_xy] = 0;
5485 h->last_qscale_diff = 0;
5492 if( (s->mb_y&1) == 0 )
5494 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5497 h->prev_mb_skipped = 0;
5499 compute_mb_neighbors(h);
5500 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5501 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5505 if( h->slice_type_nos == FF_B_TYPE ) {
5507 partition_count= b_mb_type_info[mb_type].partition_count;
5508 mb_type= b_mb_type_info[mb_type].type;
5511 goto decode_intra_mb;
5513 } else if( h->slice_type_nos == FF_P_TYPE ) {
5515 partition_count= p_mb_type_info[mb_type].partition_count;
5516 mb_type= p_mb_type_info[mb_type].type;
5519 goto decode_intra_mb;
5522 if(h->slice_type == FF_SI_TYPE && mb_type)
5524 assert(h->slice_type_nos == FF_I_TYPE);
5526 partition_count = 0;
5527 cbp= i_mb_type_info[mb_type].cbp;
5528 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5529 mb_type= i_mb_type_info[mb_type].type;
5532 mb_type |= MB_TYPE_INTERLACED;
5534 h->slice_table[ mb_xy ]= h->slice_num;
5536 if(IS_INTRA_PCM(mb_type)) {
5539 // We assume these blocks are very rare so we do not optimize it.
5540 // FIXME The two following lines get the bitstream position in the cabac
5541 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5542 ptr= h->cabac.bytestream;
5543 if(h->cabac.low&0x1) ptr--;
5545 if(h->cabac.low&0x1FF) ptr--;
5548 // The pixels are stored in the same order as levels in h->mb array.
5549 memcpy(h->mb, ptr, 256); ptr+=256;
5551 memcpy(h->mb+128, ptr, 128); ptr+=128;
5554 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5556 // All blocks are present
5557 h->cbp_table[mb_xy] = 0x1ef;
5558 h->chroma_pred_mode_table[mb_xy] = 0;
5559 // In deblocking, the quantizer is 0
5560 s->current_picture.qscale_table[mb_xy]= 0;
5561 // All coeffs are present
5562 memset(h->non_zero_count[mb_xy], 16, 16);
5563 s->current_picture.mb_type[mb_xy]= mb_type;
5564 h->last_qscale_diff = 0;
5569 h->ref_count[0] <<= 1;
5570 h->ref_count[1] <<= 1;
5573 fill_caches(h, mb_type, 0);
5575 if( IS_INTRA( mb_type ) ) {
5577 if( IS_INTRA4x4( mb_type ) ) {
5578 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5579 mb_type |= MB_TYPE_8x8DCT;
5580 for( i = 0; i < 16; i+=4 ) {
5581 int pred = pred_intra_mode( h, i );
5582 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5583 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5586 for( i = 0; i < 16; i++ ) {
5587 int pred = pred_intra_mode( h, i );
5588 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5590 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5593 write_back_intra_pred_mode(h);
5594 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5596 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5597 if( h->intra16x16_pred_mode < 0 ) return -1;
5600 h->chroma_pred_mode_table[mb_xy] =
5601 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5603 pred_mode= check_intra_pred_mode( h, pred_mode );
5604 if( pred_mode < 0 ) return -1;
5605 h->chroma_pred_mode= pred_mode;
5607 } else if( partition_count == 4 ) {
5608 int i, j, sub_partition_count[4], list, ref[2][4];
5610 if( h->slice_type_nos == FF_B_TYPE ) {
5611 for( i = 0; i < 4; i++ ) {
5612 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5613 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5614 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5616 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5617 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5618 pred_direct_motion(h, &mb_type);
5619 h->ref_cache[0][scan8[4]] =
5620 h->ref_cache[1][scan8[4]] =
5621 h->ref_cache[0][scan8[12]] =
5622 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5623 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5624 for( i = 0; i < 4; i++ )
5625 if( IS_DIRECT(h->sub_mb_type[i]) )
5626 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5630 for( i = 0; i < 4; i++ ) {
5631 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5632 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5633 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5637 for( list = 0; list < h->list_count; list++ ) {
5638 for( i = 0; i < 4; i++ ) {
5639 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5640 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5641 if( h->ref_count[list] > 1 )
5642 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5648 h->ref_cache[list][ scan8[4*i]+1 ]=
5649 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5654 dct8x8_allowed = get_dct8x8_allowed(h);
5656 for(list=0; list<h->list_count; list++){
5658 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5659 if(IS_DIRECT(h->sub_mb_type[i])){
5660 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5664 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5665 const int sub_mb_type= h->sub_mb_type[i];
5666 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5667 for(j=0; j<sub_partition_count[i]; j++){
5670 const int index= 4*i + block_width*j;
5671 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5672 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5673 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5675 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5676 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5677 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5679 if(IS_SUB_8X8(sub_mb_type)){
5681 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5683 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5686 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5688 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5689 }else if(IS_SUB_8X4(sub_mb_type)){
5690 mv_cache[ 1 ][0]= mx;
5691 mv_cache[ 1 ][1]= my;
5693 mvd_cache[ 1 ][0]= mx - mpx;
5694 mvd_cache[ 1 ][1]= my - mpy;
5695 }else if(IS_SUB_4X8(sub_mb_type)){
5696 mv_cache[ 8 ][0]= mx;
5697 mv_cache[ 8 ][1]= my;
5699 mvd_cache[ 8 ][0]= mx - mpx;
5700 mvd_cache[ 8 ][1]= my - mpy;
5702 mv_cache[ 0 ][0]= mx;
5703 mv_cache[ 0 ][1]= my;
5705 mvd_cache[ 0 ][0]= mx - mpx;
5706 mvd_cache[ 0 ][1]= my - mpy;
5709 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5710 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5711 p[0] = p[1] = p[8] = p[9] = 0;
5712 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5716 } else if( IS_DIRECT(mb_type) ) {
5717 pred_direct_motion(h, &mb_type);
5718 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5719 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5720 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5722 int list, mx, my, i, mpx, mpy;
5723 if(IS_16X16(mb_type)){
5724 for(list=0; list<h->list_count; list++){
5725 if(IS_DIR(mb_type, 0, list)){
5726 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5727 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5729 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5731 for(list=0; list<h->list_count; list++){
5732 if(IS_DIR(mb_type, 0, list)){
5733 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5735 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5736 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5737 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5739 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5740 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5742 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5745 else if(IS_16X8(mb_type)){
5746 for(list=0; list<h->list_count; list++){
5748 if(IS_DIR(mb_type, i, list)){
5749 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5750 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5752 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5755 for(list=0; list<h->list_count; list++){
5757 if(IS_DIR(mb_type, i, list)){
5758 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5759 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5760 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5761 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5763 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5764 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5766 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5767 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5772 assert(IS_8X16(mb_type));
5773 for(list=0; list<h->list_count; list++){
5775 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5776 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5777 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5779 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5782 for(list=0; list<h->list_count; list++){
5784 if(IS_DIR(mb_type, i, list)){
5785 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5786 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5787 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5789 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5790 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5791 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5793 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5794 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5801 if( IS_INTER( mb_type ) ) {
5802 h->chroma_pred_mode_table[mb_xy] = 0;
5803 write_back_motion( h, mb_type );
5806 if( !IS_INTRA16x16( mb_type ) ) {
5807 cbp = decode_cabac_mb_cbp_luma( h );
5809 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5812 h->cbp_table[mb_xy] = h->cbp = cbp;
5814 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5815 if( decode_cabac_mb_transform_size( h ) )
5816 mb_type |= MB_TYPE_8x8DCT;
5818 s->current_picture.mb_type[mb_xy]= mb_type;
5820 if( cbp || IS_INTRA16x16( mb_type ) ) {
5821 const uint8_t *scan, *scan8x8, *dc_scan;
5822 const uint32_t *qmul;
5825 if(IS_INTERLACED(mb_type)){
5826 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5827 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5828 dc_scan= luma_dc_field_scan;
5830 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5831 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5832 dc_scan= luma_dc_zigzag_scan;
5835 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5836 if( dqp == INT_MIN ){
5837 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5841 if(((unsigned)s->qscale) > 51){
5842 if(s->qscale<0) s->qscale+= 52;
5843 else s->qscale-= 52;
5845 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5846 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5848 if( IS_INTRA16x16( mb_type ) ) {
5850 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5851 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5854 qmul = h->dequant4_coeff[0][s->qscale];
5855 for( i = 0; i < 16; i++ ) {
5856 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5857 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5860 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5864 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5865 if( cbp & (1<<i8x8) ) {
5866 if( IS_8x8DCT(mb_type) ) {
5867 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5868 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5870 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5871 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5872 const int index = 4*i8x8 + i4x4;
5873 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5875 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5876 //STOP_TIMER("decode_residual")
5880 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5881 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5888 for( c = 0; c < 2; c++ ) {
5889 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5890 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5896 for( c = 0; c < 2; c++ ) {
5897 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5898 for( i = 0; i < 4; i++ ) {
5899 const int index = 16 + 4 * c + i;
5900 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5901 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5905 uint8_t * const nnz= &h->non_zero_count_cache[0];
5906 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5907 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5910 uint8_t * const nnz= &h->non_zero_count_cache[0];
5911 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5912 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5913 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5914 h->last_qscale_diff = 0;
5917 s->current_picture.qscale_table[mb_xy]= s->qscale;
5918 write_back_non_zero_count(h);
5921 h->ref_count[0] >>= 1;
5922 h->ref_count[1] >>= 1;
5929 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5931 const int index_a = qp + h->slice_alpha_c0_offset;
5932 const int alpha = (alpha_table+52)[index_a];
5933 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5938 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5939 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5941 /* 16px edge length, because bS=4 is triggered by being at
5942 * the edge of an intra MB, so all 4 bS are the same */
5943 for( d = 0; d < 16; d++ ) {
5944 const int p0 = pix[-1];
5945 const int p1 = pix[-2];
5946 const int p2 = pix[-3];
5948 const int q0 = pix[0];
5949 const int q1 = pix[1];
5950 const int q2 = pix[2];
5952 if( FFABS( p0 - q0 ) < alpha &&
5953 FFABS( p1 - p0 ) < beta &&
5954 FFABS( q1 - q0 ) < beta ) {
5956 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5957 if( FFABS( p2 - p0 ) < beta)
5959 const int p3 = pix[-4];
5961 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5962 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5963 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5966 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5968 if( FFABS( q2 - q0 ) < beta)
5970 const int q3 = pix[3];
5972 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5973 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5974 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5977 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5981 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5982 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5984 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5990 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5992 const int index_a = qp + h->slice_alpha_c0_offset;
5993 const int alpha = (alpha_table+52)[index_a];
5994 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5999 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6000 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6002 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6006 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6008 for( i = 0; i < 16; i++, pix += stride) {
6014 int bS_index = (i >> 1);
6017 bS_index |= (i & 1);
6020 if( bS[bS_index] == 0 ) {
6024 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6025 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6026 alpha = (alpha_table+52)[index_a];
6027 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6029 if( bS[bS_index] < 4 ) {
6030 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6031 const int p0 = pix[-1];
6032 const int p1 = pix[-2];
6033 const int p2 = pix[-3];
6034 const int q0 = pix[0];
6035 const int q1 = pix[1];
6036 const int q2 = pix[2];
6038 if( FFABS( p0 - q0 ) < alpha &&
6039 FFABS( p1 - p0 ) < beta &&
6040 FFABS( q1 - q0 ) < beta ) {
6044 if( FFABS( p2 - p0 ) < beta ) {
6045 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6048 if( FFABS( q2 - q0 ) < beta ) {
6049 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6053 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6054 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6055 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6056 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6059 const int p0 = pix[-1];
6060 const int p1 = pix[-2];
6061 const int p2 = pix[-3];
6063 const int q0 = pix[0];
6064 const int q1 = pix[1];
6065 const int q2 = pix[2];
6067 if( FFABS( p0 - q0 ) < alpha &&
6068 FFABS( p1 - p0 ) < beta &&
6069 FFABS( q1 - q0 ) < beta ) {
6071 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6072 if( FFABS( p2 - p0 ) < beta)
6074 const int p3 = pix[-4];
6076 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6077 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6078 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6081 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6083 if( FFABS( q2 - q0 ) < beta)
6085 const int q3 = pix[3];
6087 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6088 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6089 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6092 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6096 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6097 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6099 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6104 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6106 for( i = 0; i < 8; i++, pix += stride) {
6114 if( bS[bS_index] == 0 ) {
6118 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6119 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6120 alpha = (alpha_table+52)[index_a];
6121 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6123 if( bS[bS_index] < 4 ) {
6124 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6125 const int p0 = pix[-1];
6126 const int p1 = pix[-2];
6127 const int q0 = pix[0];
6128 const int q1 = pix[1];
6130 if( FFABS( p0 - q0 ) < alpha &&
6131 FFABS( p1 - p0 ) < beta &&
6132 FFABS( q1 - q0 ) < beta ) {
6133 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6135 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6136 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6137 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6140 const int p0 = pix[-1];
6141 const int p1 = pix[-2];
6142 const int q0 = pix[0];
6143 const int q1 = pix[1];
6145 if( FFABS( p0 - q0 ) < alpha &&
6146 FFABS( p1 - p0 ) < beta &&
6147 FFABS( q1 - q0 ) < beta ) {
6149 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6150 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6151 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6157 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6159 const int index_a = qp + h->slice_alpha_c0_offset;
6160 const int alpha = (alpha_table+52)[index_a];
6161 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6162 const int pix_next = stride;
6167 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6168 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6170 /* 16px edge length, see filter_mb_edgev */
6171 for( d = 0; d < 16; d++ ) {
6172 const int p0 = pix[-1*pix_next];
6173 const int p1 = pix[-2*pix_next];
6174 const int p2 = pix[-3*pix_next];
6175 const int q0 = pix[0];
6176 const int q1 = pix[1*pix_next];
6177 const int q2 = pix[2*pix_next];
6179 if( FFABS( p0 - q0 ) < alpha &&
6180 FFABS( p1 - p0 ) < beta &&
6181 FFABS( q1 - q0 ) < beta ) {
6183 const int p3 = pix[-4*pix_next];
6184 const int q3 = pix[ 3*pix_next];
6186 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6187 if( FFABS( p2 - p0 ) < beta) {
6189 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6190 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6191 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6194 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6196 if( FFABS( q2 - q0 ) < beta) {
6198 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6199 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6200 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6203 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6207 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6208 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6210 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6217 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6219 const int index_a = qp + h->slice_alpha_c0_offset;
6220 const int alpha = (alpha_table+52)[index_a];
6221 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6226 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6227 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6229 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6233 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6234 MpegEncContext * const s = &h->s;
6235 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6237 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6241 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6243 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6244 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6245 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6248 assert(!FRAME_MBAFF);
6250 mb_type = s->current_picture.mb_type[mb_xy];
6251 qp = s->current_picture.qscale_table[mb_xy];
6252 qp0 = s->current_picture.qscale_table[mb_xy-1];
6253 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6254 qpc = get_chroma_qp( h, 0, qp );
6255 qpc0 = get_chroma_qp( h, 0, qp0 );
6256 qpc1 = get_chroma_qp( h, 0, qp1 );
6257 qp0 = (qp + qp0 + 1) >> 1;
6258 qp1 = (qp + qp1 + 1) >> 1;
6259 qpc0 = (qpc + qpc0 + 1) >> 1;
6260 qpc1 = (qpc + qpc1 + 1) >> 1;
6261 qp_thresh = 15 - h->slice_alpha_c0_offset;
6262 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6263 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6266 if( IS_INTRA(mb_type) ) {
6267 int16_t bS4[4] = {4,4,4,4};
6268 int16_t bS3[4] = {3,3,3,3};
6269 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6270 if( IS_8x8DCT(mb_type) ) {
6271 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6272 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6273 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6274 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6276 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6277 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6278 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6279 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6280 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6281 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6282 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6283 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6285 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6286 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6287 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6288 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6289 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6290 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6291 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6292 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6295 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6296 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6298 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6300 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6302 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6303 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6304 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6305 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6307 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6308 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6309 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6310 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6312 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6313 bSv[0][0] = 0x0004000400040004ULL;
6314 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6315 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6317 #define FILTER(hv,dir,edge)\
6318 if(bSv[dir][edge]) {\
6319 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6321 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6322 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6328 } else if( IS_8x8DCT(mb_type) ) {
6347 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6348 MpegEncContext * const s = &h->s;
6349 const int mb_xy= mb_x + mb_y*s->mb_stride;
6350 const int mb_type = s->current_picture.mb_type[mb_xy];
6351 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6352 int first_vertical_edge_done = 0;
6355 //for sufficiently low qp, filtering wouldn't do anything
6356 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6358 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6359 int qp = s->current_picture.qscale_table[mb_xy];
6361 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6362 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6367 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6368 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6369 int top_type, left_type[2];
6370 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6371 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6372 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6374 if(IS_8x8DCT(top_type)){
6375 h->non_zero_count_cache[4+8*0]=
6376 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6377 h->non_zero_count_cache[6+8*0]=
6378 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6380 if(IS_8x8DCT(left_type[0])){
6381 h->non_zero_count_cache[3+8*1]=
6382 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6384 if(IS_8x8DCT(left_type[1])){
6385 h->non_zero_count_cache[3+8*3]=
6386 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6389 if(IS_8x8DCT(mb_type)){
6390 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6391 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6393 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6394 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6396 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6397 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6399 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6400 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6405 // left mb is in picture
6406 && h->slice_table[mb_xy-1] != 255
6407 // and current and left pair do not have the same interlaced type
6408 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6409 // and left mb is in the same slice if deblocking_filter == 2
6410 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6411 /* First vertical edge is different in MBAFF frames
6412 * There are 8 different bS to compute and 2 different Qp
6414 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6415 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6420 int mb_qp, mbn0_qp, mbn1_qp;
6422 first_vertical_edge_done = 1;
6424 if( IS_INTRA(mb_type) )
6425 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6427 for( i = 0; i < 8; i++ ) {
6428 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6430 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6432 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6433 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6434 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6441 mb_qp = s->current_picture.qscale_table[mb_xy];
6442 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6443 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6444 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6445 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6446 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6447 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6448 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6449 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6450 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6451 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6452 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6453 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6456 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6457 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6458 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6459 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6460 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6462 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6463 for( dir = 0; dir < 2; dir++ )
6466 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6467 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6468 int (*ref2frm) [48+2] = h->ref2frm[ h->slice_num &15 ];
6469 int (*ref2frmm)[48+2] = h->ref2frm[ h->slice_table[mbm_xy]&15 ];
6470 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6472 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6473 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6474 // how often to recheck mv-based bS when iterating between edges
6475 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6476 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6477 // how often to recheck mv-based bS when iterating along each edge
6478 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6480 if (first_vertical_edge_done) {
6482 first_vertical_edge_done = 0;
6485 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6488 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6489 && !IS_INTERLACED(mb_type)
6490 && IS_INTERLACED(mbm_type)
6492 // This is a special case in the norm where the filtering must
6493 // be done twice (one each of the field) even if we are in a
6494 // frame macroblock.
6496 static const int nnz_idx[4] = {4,5,6,3};
6497 unsigned int tmp_linesize = 2 * linesize;
6498 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6499 int mbn_xy = mb_xy - 2 * s->mb_stride;
6504 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6505 if( IS_INTRA(mb_type) ||
6506 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6507 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6509 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6510 for( i = 0; i < 4; i++ ) {
6511 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6512 mbn_nnz[nnz_idx[i]] != 0 )
6518 // Do not use s->qscale as luma quantizer because it has not the same
6519 // value in IPCM macroblocks.
6520 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6521 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6522 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6523 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6524 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6525 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6526 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6527 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6534 for( edge = start; edge < edges; edge++ ) {
6535 /* mbn_xy: neighbor macroblock */
6536 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6537 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6538 int (*ref2frmn)[48+2] = edge > 0 ? ref2frm : ref2frmm;
6542 if( (edge&1) && IS_8x8DCT(mb_type) )
6545 if( IS_INTRA(mb_type) ||
6546 IS_INTRA(mbn_type) ) {
6549 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6550 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6559 bS[0] = bS[1] = bS[2] = bS[3] = value;
6564 if( edge & mask_edge ) {
6565 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6568 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6569 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6572 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6573 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6574 int bn_idx= b_idx - (dir ? 8:1);
6577 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6578 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6579 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6580 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6583 if(h->slice_type_nos == FF_B_TYPE && v){
6585 for( l = 0; !v && l < 2; l++ ) {
6587 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6588 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6589 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6593 bS[0] = bS[1] = bS[2] = bS[3] = v;
6599 for( i = 0; i < 4; i++ ) {
6600 int x = dir == 0 ? edge : i;
6601 int y = dir == 0 ? i : edge;
6602 int b_idx= 8 + 4 + x + 8*y;
6603 int bn_idx= b_idx - (dir ? 8:1);
6605 if( h->non_zero_count_cache[b_idx] != 0 ||
6606 h->non_zero_count_cache[bn_idx] != 0 ) {
6612 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6613 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6614 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6615 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6621 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6623 for( l = 0; l < 2; l++ ) {
6625 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6626 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6627 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6636 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6641 // Do not use s->qscale as luma quantizer because it has not the same
6642 // value in IPCM macroblocks.
6643 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6644 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6645 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6646 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6648 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6649 if( (edge&1) == 0 ) {
6650 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6651 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6652 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6653 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6656 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6657 if( (edge&1) == 0 ) {
6658 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6659 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6660 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6661 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6668 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6669 MpegEncContext * const s = &h->s;
6670 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6674 if( h->pps.cabac ) {
6678 align_get_bits( &s->gb );
6681 ff_init_cabac_states( &h->cabac);
6682 ff_init_cabac_decoder( &h->cabac,
6683 s->gb.buffer + get_bits_count(&s->gb)/8,
6684 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6685 /* calculate pre-state */
6686 for( i= 0; i < 460; i++ ) {
6688 if( h->slice_type_nos == FF_I_TYPE )
6689 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6691 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6694 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6696 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6701 int ret = decode_mb_cabac(h);
6703 //STOP_TIMER("decode_mb_cabac")
6705 if(ret>=0) hl_decode_mb(h);
6707 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6710 if(ret>=0) ret = decode_mb_cabac(h);
6712 if(ret>=0) hl_decode_mb(h);
6715 eos = get_cabac_terminate( &h->cabac );
6717 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6718 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6719 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6723 if( ++s->mb_x >= s->mb_width ) {
6725 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6727 if(FIELD_OR_MBAFF_PICTURE) {
6732 if( eos || s->mb_y >= s->mb_height ) {
6733 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6734 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6741 int ret = decode_mb_cavlc(h);
6743 if(ret>=0) hl_decode_mb(h);
6745 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6747 ret = decode_mb_cavlc(h);
6749 if(ret>=0) hl_decode_mb(h);
6754 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6755 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6760 if(++s->mb_x >= s->mb_width){
6762 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6764 if(FIELD_OR_MBAFF_PICTURE) {
6767 if(s->mb_y >= s->mb_height){
6768 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6770 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6771 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6775 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6782 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6783 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6784 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6785 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6789 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6798 for(;s->mb_y < s->mb_height; s->mb_y++){
6799 for(;s->mb_x < s->mb_width; s->mb_x++){
6800 int ret= decode_mb(h);
6805 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6806 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6811 if(++s->mb_x >= s->mb_width){
6813 if(++s->mb_y >= s->mb_height){
6814 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6815 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6819 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6826 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6827 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6828 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6832 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6839 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6842 return -1; //not reached
6845 static int decode_unregistered_user_data(H264Context *h, int size){
6846 MpegEncContext * const s = &h->s;
6847 uint8_t user_data[16+256];
6853 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6854 user_data[i]= get_bits(&s->gb, 8);
6858 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6859 if(e==1 && build>=0)
6860 h->x264_build= build;
6862 if(s->avctx->debug & FF_DEBUG_BUGS)
6863 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6866 skip_bits(&s->gb, 8);
6871 static int decode_sei(H264Context *h){
6872 MpegEncContext * const s = &h->s;
6874 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6879 type+= show_bits(&s->gb, 8);
6880 }while(get_bits(&s->gb, 8) == 255);
6884 size+= show_bits(&s->gb, 8);
6885 }while(get_bits(&s->gb, 8) == 255);
6889 if(decode_unregistered_user_data(h, size) < 0)
6893 skip_bits(&s->gb, 8*size);
6896 //FIXME check bits here
6897 align_get_bits(&s->gb);
6903 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6904 MpegEncContext * const s = &h->s;
6906 cpb_count = get_ue_golomb(&s->gb) + 1;
6907 get_bits(&s->gb, 4); /* bit_rate_scale */
6908 get_bits(&s->gb, 4); /* cpb_size_scale */
6909 for(i=0; i<cpb_count; i++){
6910 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6911 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6912 get_bits1(&s->gb); /* cbr_flag */
6914 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6915 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6916 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6917 get_bits(&s->gb, 5); /* time_offset_length */
6920 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6921 MpegEncContext * const s = &h->s;
6922 int aspect_ratio_info_present_flag;
6923 unsigned int aspect_ratio_idc;
6924 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6926 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6928 if( aspect_ratio_info_present_flag ) {
6929 aspect_ratio_idc= get_bits(&s->gb, 8);
6930 if( aspect_ratio_idc == EXTENDED_SAR ) {
6931 sps->sar.num= get_bits(&s->gb, 16);
6932 sps->sar.den= get_bits(&s->gb, 16);
6933 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
6934 sps->sar= pixel_aspect[aspect_ratio_idc];
6936 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6943 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6945 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6946 get_bits1(&s->gb); /* overscan_appropriate_flag */
6949 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6950 get_bits(&s->gb, 3); /* video_format */
6951 get_bits1(&s->gb); /* video_full_range_flag */
6952 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6953 get_bits(&s->gb, 8); /* colour_primaries */
6954 get_bits(&s->gb, 8); /* transfer_characteristics */
6955 get_bits(&s->gb, 8); /* matrix_coefficients */
6959 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6960 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6961 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6964 sps->timing_info_present_flag = get_bits1(&s->gb);
6965 if(sps->timing_info_present_flag){
6966 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6967 sps->time_scale = get_bits_long(&s->gb, 32);
6968 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6971 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6972 if(nal_hrd_parameters_present_flag)
6973 decode_hrd_parameters(h, sps);
6974 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6975 if(vcl_hrd_parameters_present_flag)
6976 decode_hrd_parameters(h, sps);
6977 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6978 get_bits1(&s->gb); /* low_delay_hrd_flag */
6979 get_bits1(&s->gb); /* pic_struct_present_flag */
6981 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6982 if(sps->bitstream_restriction_flag){
6983 unsigned int num_reorder_frames;
6984 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6985 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6986 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6987 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6988 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6989 num_reorder_frames= get_ue_golomb(&s->gb);
6990 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6992 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6993 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
6997 sps->num_reorder_frames= num_reorder_frames;
7003 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7004 const uint8_t *jvt_list, const uint8_t *fallback_list){
7005 MpegEncContext * const s = &h->s;
7006 int i, last = 8, next = 8;
7007 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7008 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7009 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7011 for(i=0;i<size;i++){
7013 next = (last + get_se_golomb(&s->gb)) & 0xff;
7014 if(!i && !next){ /* matrix not written, we use the preset one */
7015 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7018 last = factors[scan[i]] = next ? next : last;
7022 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7023 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7024 MpegEncContext * const s = &h->s;
7025 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7026 const uint8_t *fallback[4] = {
7027 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7028 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7029 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7030 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7032 if(get_bits1(&s->gb)){
7033 sps->scaling_matrix_present |= is_sps;
7034 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7035 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7036 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7037 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7038 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7039 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7040 if(is_sps || pps->transform_8x8_mode){
7041 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7042 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7044 } else if(fallback_sps) {
7045 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7046 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7051 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7054 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7055 const size_t size, const char *name)
7058 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7063 vec[id] = av_mallocz(size);
7065 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7070 static inline int decode_seq_parameter_set(H264Context *h){
7071 MpegEncContext * const s = &h->s;
7072 int profile_idc, level_idc;
7073 unsigned int sps_id, tmp, mb_width, mb_height;
7077 profile_idc= get_bits(&s->gb, 8);
7078 get_bits1(&s->gb); //constraint_set0_flag
7079 get_bits1(&s->gb); //constraint_set1_flag
7080 get_bits1(&s->gb); //constraint_set2_flag
7081 get_bits1(&s->gb); //constraint_set3_flag
7082 get_bits(&s->gb, 4); // reserved
7083 level_idc= get_bits(&s->gb, 8);
7084 sps_id= get_ue_golomb(&s->gb);
7086 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7090 sps->profile_idc= profile_idc;
7091 sps->level_idc= level_idc;
7093 if(sps->profile_idc >= 100){ //high profile
7094 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7095 if(sps->chroma_format_idc == 3)
7096 get_bits1(&s->gb); //residual_color_transform_flag
7097 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7098 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7099 sps->transform_bypass = get_bits1(&s->gb);
7100 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7102 sps->scaling_matrix_present = 0;
7103 sps->chroma_format_idc= 1;
7106 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7107 sps->poc_type= get_ue_golomb(&s->gb);
7109 if(sps->poc_type == 0){ //FIXME #define
7110 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7111 } else if(sps->poc_type == 1){//FIXME #define
7112 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7113 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7114 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7115 tmp= get_ue_golomb(&s->gb);
7117 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7118 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7121 sps->poc_cycle_length= tmp;
7123 for(i=0; i<sps->poc_cycle_length; i++)
7124 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7125 }else if(sps->poc_type != 2){
7126 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7130 tmp= get_ue_golomb(&s->gb);
7131 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7132 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7135 sps->ref_frame_count= tmp;
7136 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7137 mb_width= get_ue_golomb(&s->gb) + 1;
7138 mb_height= get_ue_golomb(&s->gb) + 1;
7139 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7140 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7141 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7144 sps->mb_width = mb_width;
7145 sps->mb_height= mb_height;
7147 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7148 if(!sps->frame_mbs_only_flag)
7149 sps->mb_aff= get_bits1(&s->gb);
7153 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7155 #ifndef ALLOW_INTERLACE
7157 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7159 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7160 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7162 sps->crop= get_bits1(&s->gb);
7164 sps->crop_left = get_ue_golomb(&s->gb);
7165 sps->crop_right = get_ue_golomb(&s->gb);
7166 sps->crop_top = get_ue_golomb(&s->gb);
7167 sps->crop_bottom= get_ue_golomb(&s->gb);
7168 if(sps->crop_left || sps->crop_top){
7169 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7171 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7172 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7178 sps->crop_bottom= 0;
7181 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7182 if( sps->vui_parameters_present_flag )
7183 decode_vui_parameters(h, sps);
7185 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7186 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7187 sps_id, sps->profile_idc, sps->level_idc,
7189 sps->ref_frame_count,
7190 sps->mb_width, sps->mb_height,
7191 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7192 sps->direct_8x8_inference_flag ? "8B8" : "",
7193 sps->crop_left, sps->crop_right,
7194 sps->crop_top, sps->crop_bottom,
7195 sps->vui_parameters_present_flag ? "VUI" : "",
7196 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7203 build_qp_table(PPS *pps, int t, int index)
7206 for(i = 0; i < 52; i++)
7207 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7210 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7211 MpegEncContext * const s = &h->s;
7212 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7215 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7219 tmp= get_ue_golomb(&s->gb);
7220 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7221 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7226 pps->cabac= get_bits1(&s->gb);
7227 pps->pic_order_present= get_bits1(&s->gb);
7228 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7229 if(pps->slice_group_count > 1 ){
7230 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7231 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7232 switch(pps->mb_slice_group_map_type){
7235 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7236 | run_length[ i ] |1 |ue(v) |
7241 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7243 | top_left_mb[ i ] |1 |ue(v) |
7244 | bottom_right_mb[ i ] |1 |ue(v) |
7252 | slice_group_change_direction_flag |1 |u(1) |
7253 | slice_group_change_rate_minus1 |1 |ue(v) |
7258 | slice_group_id_cnt_minus1 |1 |ue(v) |
7259 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7261 | slice_group_id[ i ] |1 |u(v) |
7266 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7267 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7268 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7269 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7270 pps->ref_count[0]= pps->ref_count[1]= 1;
7274 pps->weighted_pred= get_bits1(&s->gb);
7275 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7276 pps->init_qp= get_se_golomb(&s->gb) + 26;
7277 pps->init_qs= get_se_golomb(&s->gb) + 26;
7278 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7279 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7280 pps->constrained_intra_pred= get_bits1(&s->gb);
7281 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7283 pps->transform_8x8_mode= 0;
7284 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7285 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7286 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7288 if(get_bits_count(&s->gb) < bit_length){
7289 pps->transform_8x8_mode= get_bits1(&s->gb);
7290 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7291 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7293 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7296 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7297 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7298 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7299 h->pps.chroma_qp_diff= 1;
7301 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7302 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7303 pps_id, pps->sps_id,
7304 pps->cabac ? "CABAC" : "CAVLC",
7305 pps->slice_group_count,
7306 pps->ref_count[0], pps->ref_count[1],
7307 pps->weighted_pred ? "weighted" : "",
7308 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7309 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7310 pps->constrained_intra_pred ? "CONSTR" : "",
7311 pps->redundant_pic_cnt_present ? "REDU" : "",
7312 pps->transform_8x8_mode ? "8x8DCT" : ""
7320 * Call decode_slice() for each context.
7322 * @param h h264 master context
7323 * @param context_count number of contexts to execute
7325 static void execute_decode_slices(H264Context *h, int context_count){
7326 MpegEncContext * const s = &h->s;
7327 AVCodecContext * const avctx= s->avctx;
7331 if(context_count == 1) {
7332 decode_slice(avctx, h);
7334 for(i = 1; i < context_count; i++) {
7335 hx = h->thread_context[i];
7336 hx->s.error_resilience = avctx->error_resilience;
7337 hx->s.error_count = 0;
7340 avctx->execute(avctx, (void *)decode_slice,
7341 (void **)h->thread_context, NULL, context_count);
7343 /* pull back stuff from slices to master context */
7344 hx = h->thread_context[context_count - 1];
7345 s->mb_x = hx->s.mb_x;
7346 s->mb_y = hx->s.mb_y;
7347 s->dropable = hx->s.dropable;
7348 s->picture_structure = hx->s.picture_structure;
7349 for(i = 1; i < context_count; i++)
7350 h->s.error_count += h->thread_context[i]->s.error_count;
7355 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7356 MpegEncContext * const s = &h->s;
7357 AVCodecContext * const avctx= s->avctx;
7359 H264Context *hx; ///< thread context
7360 int context_count = 0;
7362 h->max_contexts = avctx->thread_count;
7365 for(i=0; i<50; i++){
7366 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7369 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7370 h->current_slice = 0;
7371 if (!s->first_field)
7372 s->current_picture_ptr= NULL;
7384 if(buf_index >= buf_size) break;
7386 for(i = 0; i < h->nal_length_size; i++)
7387 nalsize = (nalsize << 8) | buf[buf_index++];
7388 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7393 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7398 // start code prefix search
7399 for(; buf_index + 3 < buf_size; buf_index++){
7400 // This should always succeed in the first iteration.
7401 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7405 if(buf_index+3 >= buf_size) break;
7410 hx = h->thread_context[context_count];
7412 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7413 if (ptr==NULL || dst_length < 0){
7416 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7418 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7420 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7421 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7424 if (h->is_avc && (nalsize != consumed)){
7425 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7429 buf_index += consumed;
7431 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7432 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7437 switch(hx->nal_unit_type){
7439 if (h->nal_unit_type != NAL_IDR_SLICE) {
7440 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7443 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7445 init_get_bits(&hx->s.gb, ptr, bit_length);
7447 hx->inter_gb_ptr= &hx->s.gb;
7448 hx->s.data_partitioning = 0;
7450 if((err = decode_slice_header(hx, h)))
7453 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7454 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7455 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7456 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7457 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7458 && avctx->skip_frame < AVDISCARD_ALL)
7462 init_get_bits(&hx->s.gb, ptr, bit_length);
7464 hx->inter_gb_ptr= NULL;
7465 hx->s.data_partitioning = 1;
7467 err = decode_slice_header(hx, h);
7470 init_get_bits(&hx->intra_gb, ptr, bit_length);
7471 hx->intra_gb_ptr= &hx->intra_gb;
7474 init_get_bits(&hx->inter_gb, ptr, bit_length);
7475 hx->inter_gb_ptr= &hx->inter_gb;
7477 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7478 && s->context_initialized
7480 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7481 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7482 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7483 && avctx->skip_frame < AVDISCARD_ALL)
7487 init_get_bits(&s->gb, ptr, bit_length);
7491 init_get_bits(&s->gb, ptr, bit_length);
7492 decode_seq_parameter_set(h);
7494 if(s->flags& CODEC_FLAG_LOW_DELAY)
7497 if(avctx->has_b_frames < 2)
7498 avctx->has_b_frames= !s->low_delay;
7501 init_get_bits(&s->gb, ptr, bit_length);
7503 decode_picture_parameter_set(h, bit_length);
7507 case NAL_END_SEQUENCE:
7508 case NAL_END_STREAM:
7509 case NAL_FILLER_DATA:
7511 case NAL_AUXILIARY_SLICE:
7514 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7517 if(context_count == h->max_contexts) {
7518 execute_decode_slices(h, context_count);
7523 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7525 /* Slice could not be decoded in parallel mode, copy down
7526 * NAL unit stuff to context 0 and restart. Note that
7527 * rbsp_buffer is not transferred, but since we no longer
7528 * run in parallel mode this should not be an issue. */
7529 h->nal_unit_type = hx->nal_unit_type;
7530 h->nal_ref_idc = hx->nal_ref_idc;
7536 execute_decode_slices(h, context_count);
7541 * returns the number of bytes consumed for building the current frame
7543 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7544 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7545 if(pos+10>buf_size) pos=buf_size; // oops ;)
7550 static int decode_frame(AVCodecContext *avctx,
7551 void *data, int *data_size,
7552 const uint8_t *buf, int buf_size)
7554 H264Context *h = avctx->priv_data;
7555 MpegEncContext *s = &h->s;
7556 AVFrame *pict = data;
7559 s->flags= avctx->flags;
7560 s->flags2= avctx->flags2;
7562 /* end of stream, output what is still in the buffers */
7563 if (buf_size == 0) {
7567 //FIXME factorize this with the output code below
7568 out = h->delayed_pic[0];
7570 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7571 if(h->delayed_pic[i]->poc < out->poc){
7572 out = h->delayed_pic[i];
7576 for(i=out_idx; h->delayed_pic[i]; i++)
7577 h->delayed_pic[i] = h->delayed_pic[i+1];
7580 *data_size = sizeof(AVFrame);
7581 *pict= *(AVFrame*)out;
7587 if(h->is_avc && !h->got_avcC) {
7588 int i, cnt, nalsize;
7589 unsigned char *p = avctx->extradata;
7590 if(avctx->extradata_size < 7) {
7591 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7595 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7598 /* sps and pps in the avcC always have length coded with 2 bytes,
7599 so put a fake nal_length_size = 2 while parsing them */
7600 h->nal_length_size = 2;
7601 // Decode sps from avcC
7602 cnt = *(p+5) & 0x1f; // Number of sps
7604 for (i = 0; i < cnt; i++) {
7605 nalsize = AV_RB16(p) + 2;
7606 if(decode_nal_units(h, p, nalsize) < 0) {
7607 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7612 // Decode pps from avcC
7613 cnt = *(p++); // Number of pps
7614 for (i = 0; i < cnt; i++) {
7615 nalsize = AV_RB16(p) + 2;
7616 if(decode_nal_units(h, p, nalsize) != nalsize) {
7617 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7622 // Now store right nal length size, that will be use to parse all other nals
7623 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7624 // Do not reparse avcC
7628 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7629 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7633 buf_index=decode_nal_units(h, buf, buf_size);
7637 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7638 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7639 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7643 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7644 Picture *out = s->current_picture_ptr;
7645 Picture *cur = s->current_picture_ptr;
7646 int i, pics, cross_idr, out_of_order, out_idx;
7650 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7651 s->current_picture_ptr->pict_type= s->pict_type;
7654 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7655 h->prev_poc_msb= h->poc_msb;
7656 h->prev_poc_lsb= h->poc_lsb;
7658 h->prev_frame_num_offset= h->frame_num_offset;
7659 h->prev_frame_num= h->frame_num;
7662 * FIXME: Error handling code does not seem to support interlaced
7663 * when slices span multiple rows
7664 * The ff_er_add_slice calls don't work right for bottom
7665 * fields; they cause massive erroneous error concealing
7666 * Error marking covers both fields (top and bottom).
7667 * This causes a mismatched s->error_count
7668 * and a bad error table. Further, the error count goes to
7669 * INT_MAX when called for bottom field, because mb_y is
7670 * past end by one (callers fault) and resync_mb_y != 0
7671 * causes problems for the first MB line, too.
7678 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7679 /* Wait for second field. */
7683 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7684 /* Derive top_field_first from field pocs. */
7685 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7687 //FIXME do something with unavailable reference frames
7689 /* Sort B-frames into display order */
7691 if(h->sps.bitstream_restriction_flag
7692 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7693 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7697 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7698 && !h->sps.bitstream_restriction_flag){
7699 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7704 while(h->delayed_pic[pics]) pics++;
7706 assert(pics <= MAX_DELAYED_PIC_COUNT);
7708 h->delayed_pic[pics++] = cur;
7709 if(cur->reference == 0)
7710 cur->reference = DELAYED_PIC_REF;
7712 out = h->delayed_pic[0];
7714 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7715 if(h->delayed_pic[i]->poc < out->poc){
7716 out = h->delayed_pic[i];
7719 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i];
7721 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7723 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7725 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7727 ((!cross_idr && out->poc > h->outputed_poc + 2)
7728 || cur->pict_type == FF_B_TYPE)))
7731 s->avctx->has_b_frames++;
7734 if(out_of_order || pics > s->avctx->has_b_frames){
7735 out->reference &= ~DELAYED_PIC_REF;
7736 for(i=out_idx; h->delayed_pic[i]; i++)
7737 h->delayed_pic[i] = h->delayed_pic[i+1];
7739 if(!out_of_order && pics > s->avctx->has_b_frames){
7740 *data_size = sizeof(AVFrame);
7742 h->outputed_poc = out->poc;
7743 *pict= *(AVFrame*)out;
7745 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7750 assert(pict->data[0] || !*data_size);
7751 ff_print_debug_info(s, pict);
7752 //printf("out %d\n", (int)pict->data[0]);
7755 /* Return the Picture timestamp as the frame number */
7756 /* we subtract 1 because it is added on utils.c */
7757 avctx->frame_number = s->picture_number - 1;
7759 return get_consumed_bytes(s, buf_index, buf_size);
7762 static inline void fill_mb_avail(H264Context *h){
7763 MpegEncContext * const s = &h->s;
7764 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7767 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7768 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7769 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7775 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7776 h->mb_avail[4]= 1; //FIXME move out
7777 h->mb_avail[5]= 0; //FIXME move out
7785 #define SIZE (COUNT*40)
7791 // int int_temp[10000];
7793 AVCodecContext avctx;
7795 dsputil_init(&dsp, &avctx);
7797 init_put_bits(&pb, temp, SIZE);
7798 printf("testing unsigned exp golomb\n");
7799 for(i=0; i<COUNT; i++){
7801 set_ue_golomb(&pb, i);
7802 STOP_TIMER("set_ue_golomb");
7804 flush_put_bits(&pb);
7806 init_get_bits(&gb, temp, 8*SIZE);
7807 for(i=0; i<COUNT; i++){
7810 s= show_bits(&gb, 24);
7813 j= get_ue_golomb(&gb);
7815 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7818 STOP_TIMER("get_ue_golomb");
7822 init_put_bits(&pb, temp, SIZE);
7823 printf("testing signed exp golomb\n");
7824 for(i=0; i<COUNT; i++){
7826 set_se_golomb(&pb, i - COUNT/2);
7827 STOP_TIMER("set_se_golomb");
7829 flush_put_bits(&pb);
7831 init_get_bits(&gb, temp, 8*SIZE);
7832 for(i=0; i<COUNT; i++){
7835 s= show_bits(&gb, 24);
7838 j= get_se_golomb(&gb);
7839 if(j != i - COUNT/2){
7840 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7843 STOP_TIMER("get_se_golomb");
7847 printf("testing 4x4 (I)DCT\n");
7850 uint8_t src[16], ref[16];
7851 uint64_t error= 0, max_error=0;
7853 for(i=0; i<COUNT; i++){
7855 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7856 for(j=0; j<16; j++){
7857 ref[j]= random()%255;
7858 src[j]= random()%255;
7861 h264_diff_dct_c(block, src, ref, 4);
7864 for(j=0; j<16; j++){
7865 // printf("%d ", block[j]);
7866 block[j]= block[j]*4;
7867 if(j&1) block[j]= (block[j]*4 + 2)/5;
7868 if(j&4) block[j]= (block[j]*4 + 2)/5;
7872 s->dsp.h264_idct_add(ref, block, 4);
7873 /* for(j=0; j<16; j++){
7874 printf("%d ", ref[j]);
7878 for(j=0; j<16; j++){
7879 int diff= FFABS(src[j] - ref[j]);
7882 max_error= FFMAX(max_error, diff);
7885 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7886 printf("testing quantizer\n");
7887 for(qp=0; qp<52; qp++){
7889 src1_block[i]= src2_block[i]= random()%255;
7892 printf("Testing NAL layer\n");
7894 uint8_t bitstream[COUNT];
7895 uint8_t nal[COUNT*2];
7897 memset(&h, 0, sizeof(H264Context));
7899 for(i=0; i<COUNT; i++){
7907 for(j=0; j<COUNT; j++){
7908 bitstream[j]= (random() % 255) + 1;
7911 for(j=0; j<zeros; j++){
7912 int pos= random() % COUNT;
7913 while(bitstream[pos] == 0){
7922 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7924 printf("encoding failed\n");
7928 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7932 if(out_length != COUNT){
7933 printf("incorrect length %d %d\n", out_length, COUNT);
7937 if(consumed != nal_length){
7938 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7942 if(memcmp(bitstream, out, COUNT)){
7943 printf("mismatch\n");
7949 printf("Testing RBSP\n");
7957 static av_cold int decode_end(AVCodecContext *avctx)
7959 H264Context *h = avctx->priv_data;
7960 MpegEncContext *s = &h->s;
7962 av_freep(&h->rbsp_buffer[0]);
7963 av_freep(&h->rbsp_buffer[1]);
7964 free_tables(h); //FIXME cleanup init stuff perhaps
7967 // memset(h, 0, sizeof(H264Context));
7973 AVCodec h264_decoder = {
7977 sizeof(H264Context),
7982 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7984 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),