2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 const uint8_t ff_rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 const uint8_t ff_div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
110 int topleft_partition= -1;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
164 left_block = left_block_options[1];
166 left_block= left_block_options[2];
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
181 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
187 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
189 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
190 for(list=0; list<h->list_count; list++){
191 if(USES_LIST(mb_type,list)){
192 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
193 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
194 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
195 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
201 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
202 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
204 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
205 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
207 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
208 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
213 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
214 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
215 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
216 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
217 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
220 if(IS_INTRA(mb_type)){
221 h->topleft_samples_available=
222 h->top_samples_available=
223 h->left_samples_available= 0xFFFF;
224 h->topright_samples_available= 0xEEEA;
226 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
227 h->topleft_samples_available= 0xB3FF;
228 h->top_samples_available= 0x33FF;
229 h->topright_samples_available= 0x26EA;
232 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
233 h->topleft_samples_available&= 0xDF5F;
234 h->left_samples_available&= 0x5F5F;
238 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
239 h->topleft_samples_available&= 0x7FFF;
241 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
242 h->topright_samples_available&= 0xFBFF;
244 if(IS_INTRA4x4(mb_type)){
245 if(IS_INTRA4x4(top_type)){
246 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
247 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
248 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
249 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
252 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
257 h->intra4x4_pred_mode_cache[4+8*0]=
258 h->intra4x4_pred_mode_cache[5+8*0]=
259 h->intra4x4_pred_mode_cache[6+8*0]=
260 h->intra4x4_pred_mode_cache[7+8*0]= pred;
263 if(IS_INTRA4x4(left_type[i])){
264 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
265 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
268 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
273 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
274 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
289 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
291 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
292 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
293 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
294 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
296 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
297 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
299 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
300 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
303 h->non_zero_count_cache[4+8*0]=
304 h->non_zero_count_cache[5+8*0]=
305 h->non_zero_count_cache[6+8*0]=
306 h->non_zero_count_cache[7+8*0]=
308 h->non_zero_count_cache[1+8*0]=
309 h->non_zero_count_cache[2+8*0]=
311 h->non_zero_count_cache[1+8*3]=
312 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
316 for (i=0; i<2; i++) {
318 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
319 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
320 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
321 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
323 h->non_zero_count_cache[3+8*1 + 2*8*i]=
324 h->non_zero_count_cache[3+8*2 + 2*8*i]=
325 h->non_zero_count_cache[0+8*1 + 8*i]=
326 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
333 h->top_cbp = h->cbp_table[top_xy];
334 } else if(IS_INTRA(mb_type)) {
341 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
342 } else if(IS_INTRA(mb_type)) {
348 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
351 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
356 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
358 for(list=0; list<h->list_count; list++){
359 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
360 /*if(!h->mv_cache_clean[list]){
361 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
362 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
363 h->mv_cache_clean[list]= 1;
367 h->mv_cache_clean[list]= 0;
369 if(USES_LIST(top_type, list)){
370 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
371 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
372 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
373 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
374 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
376 h->ref_cache[list][scan8[0] + 0 - 1*8]=
377 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
378 h->ref_cache[list][scan8[0] + 2 - 1*8]=
379 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
381 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
382 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
383 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
385 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
389 int cache_idx = scan8[0] - 1 + i*2*8;
390 if(USES_LIST(left_type[i], list)){
391 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
392 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
393 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
394 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
395 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
396 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
398 *(uint32_t*)h->mv_cache [list][cache_idx ]=
399 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
400 h->ref_cache[list][cache_idx ]=
401 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
405 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
408 if(USES_LIST(topleft_type, list)){
409 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
410 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
411 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
412 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
418 if(USES_LIST(topright_type, list)){
419 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
420 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
421 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
422 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
424 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
428 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
431 h->ref_cache[list][scan8[5 ]+1] =
432 h->ref_cache[list][scan8[7 ]+1] =
433 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
434 h->ref_cache[list][scan8[4 ]] =
435 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
436 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
437 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
438 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
439 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
440 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
443 /* XXX beurk, Load mvd */
444 if(USES_LIST(top_type, list)){
445 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
446 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
447 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
448 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
451 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
452 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
453 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
456 if(USES_LIST(left_type[0], list)){
457 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
458 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
459 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
461 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
464 if(USES_LIST(left_type[1], list)){
465 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
466 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
469 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
472 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
473 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
474 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
475 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
476 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
478 if(h->slice_type_nos == FF_B_TYPE){
479 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
481 if(IS_DIRECT(top_type)){
482 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
483 }else if(IS_8X8(top_type)){
484 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
485 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
486 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
488 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
491 if(IS_DIRECT(left_type[0]))
492 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
493 else if(IS_8X8(left_type[0]))
494 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
496 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
498 if(IS_DIRECT(left_type[1]))
499 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
500 else if(IS_8X8(left_type[1]))
501 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
503 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
509 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
510 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
511 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
512 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
513 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
515 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
516 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
517 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
518 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
520 #define MAP_F2F(idx, mb_type)\
521 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
522 h->ref_cache[list][idx] <<= 1;\
523 h->mv_cache[list][idx][1] /= 2;\
524 h->mvd_cache[list][idx][1] /= 2;\
529 #define MAP_F2F(idx, mb_type)\
530 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] >>= 1;\
532 h->mv_cache[list][idx][1] <<= 1;\
533 h->mvd_cache[list][idx][1] <<= 1;\
543 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
546 static inline void write_back_intra_pred_mode(H264Context *h){
547 const int mb_xy= h->mb_xy;
549 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
550 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
551 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
552 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
553 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
554 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
555 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
559 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
561 static inline int check_intra4x4_pred_mode(H264Context *h){
562 MpegEncContext * const s = &h->s;
563 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
564 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
567 if(!(h->top_samples_available&0x8000)){
569 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
571 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
574 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
579 if(!(h->left_samples_available&0x8000)){
581 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
583 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
586 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
592 } //FIXME cleanup like next
595 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
597 static inline int check_intra_pred_mode(H264Context *h, int mode){
598 MpegEncContext * const s = &h->s;
599 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
600 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
603 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
607 if(!(h->top_samples_available&0x8000)){
610 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
615 if(!(h->left_samples_available&0x8000)){
618 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
627 * gets the predicted intra4x4 prediction mode.
629 static inline int pred_intra_mode(H264Context *h, int n){
630 const int index8= scan8[n];
631 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
632 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
633 const int min= FFMIN(left, top);
635 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
637 if(min<0) return DC_PRED;
641 static inline void write_back_non_zero_count(H264Context *h){
642 const int mb_xy= h->mb_xy;
644 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
645 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
646 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
647 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
648 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
649 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
650 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
652 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
653 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
654 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
656 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
657 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
658 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
661 // store all luma nnzs, for deblocking
664 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
665 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
670 * gets the predicted number of non-zero coefficients.
671 * @param n block index
673 static inline int pred_non_zero_count(H264Context *h, int n){
674 const int index8= scan8[n];
675 const int left= h->non_zero_count_cache[index8 - 1];
676 const int top = h->non_zero_count_cache[index8 - 8];
679 if(i<64) i= (i+1)>>1;
681 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
686 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
687 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
688 MpegEncContext *s = &h->s;
690 /* there is no consistent mapping of mvs to neighboring locations that will
691 * make mbaff happy, so we can't move all this logic to fill_caches */
693 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
695 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
696 *C = h->mv_cache[list][scan8[0]-2];
699 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
700 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
701 if(IS_INTERLACED(mb_types[topright_xy])){
702 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
703 const int x4 = X4, y4 = Y4;\
704 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
705 if(!USES_LIST(mb_type,list))\
706 return LIST_NOT_USED;\
707 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
708 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
709 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
710 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
712 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
715 if(topright_ref == PART_NOT_AVAILABLE
716 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
717 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
719 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
720 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
723 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
725 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
726 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
732 if(topright_ref != PART_NOT_AVAILABLE){
733 *C= h->mv_cache[list][ i - 8 + part_width ];
736 tprintf(s->avctx, "topright MV not available\n");
738 *C= h->mv_cache[list][ i - 8 - 1 ];
739 return h->ref_cache[list][ i - 8 - 1 ];
744 * gets the predicted MV.
745 * @param n the block index
746 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
747 * @param mx the x component of the predicted motion vector
748 * @param my the y component of the predicted motion vector
750 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
751 const int index8= scan8[n];
752 const int top_ref= h->ref_cache[list][ index8 - 8 ];
753 const int left_ref= h->ref_cache[list][ index8 - 1 ];
754 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
755 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
757 int diagonal_ref, match_count;
759 assert(part_width==1 || part_width==2 || part_width==4);
769 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
770 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
771 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
772 if(match_count > 1){ //most common
773 *mx= mid_pred(A[0], B[0], C[0]);
774 *my= mid_pred(A[1], B[1], C[1]);
775 }else if(match_count==1){
779 }else if(top_ref==ref){
787 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
791 *mx= mid_pred(A[0], B[0], C[0]);
792 *my= mid_pred(A[1], B[1], C[1]);
796 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
800 * gets the directionally predicted 16x8 MV.
801 * @param n the block index
802 * @param mx the x component of the predicted motion vector
803 * @param my the y component of the predicted motion vector
805 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
807 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
808 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
810 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
818 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
819 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
821 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
831 pred_motion(h, n, 4, list, ref, mx, my);
835 * gets the directionally predicted 8x16 MV.
836 * @param n the block index
837 * @param mx the x component of the predicted motion vector
838 * @param my the y component of the predicted motion vector
840 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
842 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
843 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
845 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
856 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
858 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
860 if(diagonal_ref == ref){
868 pred_motion(h, n, 2, list, ref, mx, my);
871 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
872 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
873 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
875 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
877 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
878 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
879 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
885 pred_motion(h, 0, 4, 0, 0, mx, my);
890 static inline void direct_dist_scale_factor(H264Context * const h){
891 MpegEncContext * const s = &h->s;
892 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
893 const int poc1 = h->ref_list[1][0].poc;
895 for(i=0; i<h->ref_count[0]; i++){
896 int poc0 = h->ref_list[0][i].poc;
897 int td = av_clip(poc1 - poc0, -128, 127);
898 if(td == 0 || h->ref_list[0][i].long_ref){
899 h->dist_scale_factor[i] = 256;
901 int tb = av_clip(poc - poc0, -128, 127);
902 int tx = (16384 + (FFABS(td) >> 1)) / td;
903 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
907 for(i=0; i<h->ref_count[0]; i++){
908 h->dist_scale_factor_field[2*i] =
909 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
913 static inline void direct_ref_list_init(H264Context * const h){
914 MpegEncContext * const s = &h->s;
915 Picture * const ref1 = &h->ref_list[1][0];
916 Picture * const cur = s->current_picture_ptr;
918 int sidx= s->picture_structure&1;
919 int ref1sidx= ref1->reference&1;
920 for(list=0; list<2; list++){
921 cur->ref_count[sidx][list] = h->ref_count[list];
922 for(j=0; j<h->ref_count[list]; j++)
923 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
925 if(s->picture_structure == PICT_FRAME){
926 memcpy(cur->ref_count[0], cur->ref_count[1], sizeof(cur->ref_count[0]));
927 memcpy(cur->ref_poc [0], cur->ref_poc [1], sizeof(cur->ref_poc [0]));
929 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
931 for(list=0; list<2; list++){
932 for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
933 int poc = ref1->ref_poc[ref1sidx][list][i];
934 if(((poc&3) == 3) != (s->picture_structure == PICT_FRAME))
935 poc= (poc&~3) + s->picture_structure;
936 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
937 for(j=0; j<h->ref_count[list]; j++)
938 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
939 h->map_col_to_list0[list][i] = j;
945 for(list=0; list<2; list++){
946 for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
947 j = h->map_col_to_list0[list][i];
948 h->map_col_to_list0_field[list][2*i] = 2*j;
949 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
955 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
956 MpegEncContext * const s = &h->s;
957 int b8_stride = h->b8_stride;
958 int b4_stride = h->b_stride;
959 int mb_xy = h->mb_xy;
961 const int16_t (*l1mv0)[2], (*l1mv1)[2];
962 const int8_t *l1ref0, *l1ref1;
963 const int is_b8x8 = IS_8X8(*mb_type);
964 unsigned int sub_mb_type;
967 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
969 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
970 if(h->ref_list[1][0].reference == PICT_FRAME){ // AFL/AFR/FR/FL -> AFL
971 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL
972 int cur_poc = s->current_picture_ptr->poc;
973 int *col_poc = h->ref_list[1]->field_poc;
974 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
975 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
978 }else if(!(s->picture_structure & h->ref_list[1][0].reference)){// FL -> FL & differ parity
979 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
980 mb_xy += s->mb_stride*fieldoff;
983 }else{ // AFL/AFR/FR/FL -> AFR/FR
984 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
985 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
986 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
987 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
990 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
991 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
992 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
994 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
995 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
997 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
998 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1000 }else{ // AFR/FR -> AFR/FR
1003 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1004 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1005 /* FIXME save sub mb types from previous frames (or derive from MVs)
1006 * so we know exactly what block size to use */
1007 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1008 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1009 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1010 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1011 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1013 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1014 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1019 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1020 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1021 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1022 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1025 l1ref0 += h->b8_stride;
1026 l1ref1 += h->b8_stride;
1027 l1mv0 += 2*b4_stride;
1028 l1mv1 += 2*b4_stride;
1032 if(h->direct_spatial_mv_pred){
1037 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1039 /* ref = min(neighbors) */
1040 for(list=0; list<2; list++){
1041 int refa = h->ref_cache[list][scan8[0] - 1];
1042 int refb = h->ref_cache[list][scan8[0] - 8];
1043 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1044 if(refc == PART_NOT_AVAILABLE)
1045 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1046 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1051 if(ref[0] < 0 && ref[1] < 0){
1052 ref[0] = ref[1] = 0;
1053 mv[0][0] = mv[0][1] =
1054 mv[1][0] = mv[1][1] = 0;
1056 for(list=0; list<2; list++){
1058 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1060 mv[list][0] = mv[list][1] = 0;
1066 *mb_type &= ~MB_TYPE_L1;
1067 sub_mb_type &= ~MB_TYPE_L1;
1068 }else if(ref[0] < 0){
1070 *mb_type &= ~MB_TYPE_L0;
1071 sub_mb_type &= ~MB_TYPE_L0;
1074 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1075 for(i8=0; i8<4; i8++){
1078 int xy8 = x8+y8*b8_stride;
1079 int xy4 = 3*x8+y8*b4_stride;
1082 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1084 h->sub_mb_type[i8] = sub_mb_type;
1086 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1087 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1088 if(!IS_INTRA(mb_type_col[y8])
1089 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1090 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1092 a= pack16to32(mv[0][0],mv[0][1]);
1094 b= pack16to32(mv[1][0],mv[1][1]);
1096 a= pack16to32(mv[0][0],mv[0][1]);
1097 b= pack16to32(mv[1][0],mv[1][1]);
1099 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1100 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1102 }else if(IS_16X16(*mb_type)){
1105 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1106 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1107 if(!IS_INTRA(mb_type_col[0])
1108 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1109 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1110 && (h->x264_build>33 || !h->x264_build)))){
1112 a= pack16to32(mv[0][0],mv[0][1]);
1114 b= pack16to32(mv[1][0],mv[1][1]);
1116 a= pack16to32(mv[0][0],mv[0][1]);
1117 b= pack16to32(mv[1][0],mv[1][1]);
1119 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1120 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1122 for(i8=0; i8<4; i8++){
1123 const int x8 = i8&1;
1124 const int y8 = i8>>1;
1126 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1128 h->sub_mb_type[i8] = sub_mb_type;
1130 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1131 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1132 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1133 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1136 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1137 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1138 && (h->x264_build>33 || !h->x264_build)))){
1139 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1140 if(IS_SUB_8X8(sub_mb_type)){
1141 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1142 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1144 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1146 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1149 for(i4=0; i4<4; i4++){
1150 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1151 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1153 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1155 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1161 }else{ /* direct temporal mv pred */
1162 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1163 const int *dist_scale_factor = h->dist_scale_factor;
1165 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1166 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1167 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1168 dist_scale_factor = h->dist_scale_factor_field;
1170 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1171 /* FIXME assumes direct_8x8_inference == 1 */
1172 int y_shift = 2*!IS_INTERLACED(*mb_type);
1173 int ref_shift= FRAME_MBAFF ? y_shift : 1;
1175 for(i8=0; i8<4; i8++){
1176 const int x8 = i8&1;
1177 const int y8 = i8>>1;
1179 const int16_t (*l1mv)[2]= l1mv0;
1181 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1183 h->sub_mb_type[i8] = sub_mb_type;
1185 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1186 if(IS_INTRA(mb_type_col[y8])){
1187 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1188 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1189 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1193 ref0 = l1ref0[x8 + y8*b8_stride];
1195 ref0 = map_col_to_list0[0][ref0*2>>ref_shift];
1197 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride]*2>>ref_shift];
1200 scale = dist_scale_factor[ref0];
1201 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1204 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1205 int my_col = (mv_col[1]<<y_shift)/2;
1206 int mx = (scale * mv_col[0] + 128) >> 8;
1207 int my = (scale * my_col + 128) >> 8;
1208 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1209 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1215 /* one-to-one mv scaling */
1217 if(IS_16X16(*mb_type)){
1220 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1221 if(IS_INTRA(mb_type_col[0])){
1224 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1225 : map_col_to_list0[1][l1ref1[0]];
1226 const int scale = dist_scale_factor[ref0];
1227 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1229 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1230 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1232 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1233 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1235 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1236 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1237 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1239 for(i8=0; i8<4; i8++){
1240 const int x8 = i8&1;
1241 const int y8 = i8>>1;
1243 const int16_t (*l1mv)[2]= l1mv0;
1245 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1247 h->sub_mb_type[i8] = sub_mb_type;
1248 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1249 if(IS_INTRA(mb_type_col[0])){
1250 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1251 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1252 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1256 ref0 = l1ref0[x8 + y8*h->b8_stride];
1258 ref0 = map_col_to_list0[0][ref0];
1260 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1263 scale = dist_scale_factor[ref0];
1265 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1266 if(IS_SUB_8X8(sub_mb_type)){
1267 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1268 int mx = (scale * mv_col[0] + 128) >> 8;
1269 int my = (scale * mv_col[1] + 128) >> 8;
1270 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1271 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1273 for(i4=0; i4<4; i4++){
1274 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1275 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1276 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1277 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1278 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1279 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1286 static inline void write_back_motion(H264Context *h, int mb_type){
1287 MpegEncContext * const s = &h->s;
1288 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1289 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1292 if(!USES_LIST(mb_type, 0))
1293 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1295 for(list=0; list<h->list_count; list++){
1297 if(!USES_LIST(mb_type, list))
1301 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1302 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1304 if( h->pps.cabac ) {
1305 if(IS_SKIP(mb_type))
1306 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1309 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1310 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1315 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1316 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1317 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1318 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1319 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1323 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1324 if(IS_8X8(mb_type)){
1325 uint8_t *direct_table = &h->direct_table[b8_xy];
1326 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1327 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1328 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1334 * Decodes a network abstraction layer unit.
1335 * @param consumed is the number of bytes used as input
1336 * @param length is the length of the array
1337 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1338 * @returns decoded bytes, might be src+1 if no escapes
1340 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1345 // src[0]&0x80; //forbidden bit
1346 h->nal_ref_idc= src[0]>>5;
1347 h->nal_unit_type= src[0]&0x1F;
1351 for(i=0; i<length; i++)
1352 printf("%2X ", src[i]);
1354 for(i=0; i+1<length; i+=2){
1355 if(src[i]) continue;
1356 if(i>0 && src[i-1]==0) i--;
1357 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1359 /* startcode, so we must be past the end */
1366 if(i>=length-1){ //no escaped 0
1367 *dst_length= length;
1368 *consumed= length+1; //+1 for the header
1372 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1373 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1374 dst= h->rbsp_buffer[bufidx];
1380 //printf("decoding esc\n");
1383 //remove escapes (very rare 1:2^22)
1384 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1385 if(src[si+2]==3){ //escape
1390 }else //next start code
1394 dst[di++]= src[si++];
1398 *consumed= si + 1;//+1 for the header
1399 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1404 * identifies the exact end of the bitstream
1405 * @return the length of the trailing, or 0 if damaged
1407 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1411 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1421 * IDCT transforms the 16 dc values and dequantizes them.
1422 * @param qp quantization parameter
1424 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1427 int temp[16]; //FIXME check if this is a good idea
1428 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1429 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1431 //memset(block, 64, 2*256);
1434 const int offset= y_offset[i];
1435 const int z0= block[offset+stride*0] + block[offset+stride*4];
1436 const int z1= block[offset+stride*0] - block[offset+stride*4];
1437 const int z2= block[offset+stride*1] - block[offset+stride*5];
1438 const int z3= block[offset+stride*1] + block[offset+stride*5];
1447 const int offset= x_offset[i];
1448 const int z0= temp[4*0+i] + temp[4*2+i];
1449 const int z1= temp[4*0+i] - temp[4*2+i];
1450 const int z2= temp[4*1+i] - temp[4*3+i];
1451 const int z3= temp[4*1+i] + temp[4*3+i];
1453 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1454 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1455 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1456 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1462 * DCT transforms the 16 dc values.
1463 * @param qp quantization parameter ??? FIXME
1465 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1466 // const int qmul= dequant_coeff[qp][0];
1468 int temp[16]; //FIXME check if this is a good idea
1469 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1470 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1473 const int offset= y_offset[i];
1474 const int z0= block[offset+stride*0] + block[offset+stride*4];
1475 const int z1= block[offset+stride*0] - block[offset+stride*4];
1476 const int z2= block[offset+stride*1] - block[offset+stride*5];
1477 const int z3= block[offset+stride*1] + block[offset+stride*5];
1486 const int offset= x_offset[i];
1487 const int z0= temp[4*0+i] + temp[4*2+i];
1488 const int z1= temp[4*0+i] - temp[4*2+i];
1489 const int z2= temp[4*1+i] - temp[4*3+i];
1490 const int z3= temp[4*1+i] + temp[4*3+i];
1492 block[stride*0 +offset]= (z0 + z3)>>1;
1493 block[stride*2 +offset]= (z1 + z2)>>1;
1494 block[stride*8 +offset]= (z1 - z2)>>1;
1495 block[stride*10+offset]= (z0 - z3)>>1;
1503 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1504 const int stride= 16*2;
1505 const int xStride= 16;
1508 a= block[stride*0 + xStride*0];
1509 b= block[stride*0 + xStride*1];
1510 c= block[stride*1 + xStride*0];
1511 d= block[stride*1 + xStride*1];
1518 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1519 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1520 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1521 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1525 static void chroma_dc_dct_c(DCTELEM *block){
1526 const int stride= 16*2;
1527 const int xStride= 16;
1530 a= block[stride*0 + xStride*0];
1531 b= block[stride*0 + xStride*1];
1532 c= block[stride*1 + xStride*0];
1533 d= block[stride*1 + xStride*1];
1540 block[stride*0 + xStride*0]= (a+c);
1541 block[stride*0 + xStride*1]= (e+b);
1542 block[stride*1 + xStride*0]= (a-c);
1543 block[stride*1 + xStride*1]= (e-b);
1548 * gets the chroma qp.
1550 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1551 return h->pps.chroma_qp_table[t][qscale];
1554 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1555 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1556 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1558 const int * const quant_table= quant_coeff[qscale];
1559 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1560 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1561 const unsigned int threshold2= (threshold1<<1);
1567 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1568 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1569 const unsigned int dc_threshold2= (dc_threshold1<<1);
1571 int level= block[0]*quant_coeff[qscale+18][0];
1572 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1574 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1577 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1580 // last_non_zero = i;
1585 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1586 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1587 const unsigned int dc_threshold2= (dc_threshold1<<1);
1589 int level= block[0]*quant_table[0];
1590 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1592 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1595 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1598 // last_non_zero = i;
1611 const int j= scantable[i];
1612 int level= block[j]*quant_table[j];
1614 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1615 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1616 if(((unsigned)(level+threshold1))>threshold2){
1618 level= (bias + level)>>QUANT_SHIFT;
1621 level= (bias - level)>>QUANT_SHIFT;
1630 return last_non_zero;
1633 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1634 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1635 int src_x_offset, int src_y_offset,
1636 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1637 MpegEncContext * const s = &h->s;
1638 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1639 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1640 const int luma_xy= (mx&3) + ((my&3)<<2);
1641 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1642 uint8_t * src_cb, * src_cr;
1643 int extra_width= h->emu_edge_width;
1644 int extra_height= h->emu_edge_height;
1646 const int full_mx= mx>>2;
1647 const int full_my= my>>2;
1648 const int pic_width = 16*s->mb_width;
1649 const int pic_height = 16*s->mb_height >> MB_FIELD;
1651 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1654 if(mx&7) extra_width -= 3;
1655 if(my&7) extra_height -= 3;
1657 if( full_mx < 0-extra_width
1658 || full_my < 0-extra_height
1659 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1660 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1661 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1662 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1666 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1668 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1671 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1674 // chroma offset when predicting from a field of opposite parity
1675 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1676 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1678 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1679 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1682 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1683 src_cb= s->edge_emu_buffer;
1685 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1688 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1689 src_cr= s->edge_emu_buffer;
1691 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1694 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1695 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1696 int x_offset, int y_offset,
1697 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1698 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1699 int list0, int list1){
1700 MpegEncContext * const s = &h->s;
1701 qpel_mc_func *qpix_op= qpix_put;
1702 h264_chroma_mc_func chroma_op= chroma_put;
1704 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1705 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1706 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1707 x_offset += 8*s->mb_x;
1708 y_offset += 8*(s->mb_y >> MB_FIELD);
1711 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1712 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1713 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1714 qpix_op, chroma_op);
1717 chroma_op= chroma_avg;
1721 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1722 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1723 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1724 qpix_op, chroma_op);
1728 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1729 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1730 int x_offset, int y_offset,
1731 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1732 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1733 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1734 int list0, int list1){
1735 MpegEncContext * const s = &h->s;
1737 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1738 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1739 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1740 x_offset += 8*s->mb_x;
1741 y_offset += 8*(s->mb_y >> MB_FIELD);
1744 /* don't optimize for luma-only case, since B-frames usually
1745 * use implicit weights => chroma too. */
1746 uint8_t *tmp_cb = s->obmc_scratchpad;
1747 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1748 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1749 int refn0 = h->ref_cache[0][ scan8[n] ];
1750 int refn1 = h->ref_cache[1][ scan8[n] ];
1752 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1753 dest_y, dest_cb, dest_cr,
1754 x_offset, y_offset, qpix_put, chroma_put);
1755 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1756 tmp_y, tmp_cb, tmp_cr,
1757 x_offset, y_offset, qpix_put, chroma_put);
1759 if(h->use_weight == 2){
1760 int weight0 = h->implicit_weight[refn0][refn1];
1761 int weight1 = 64 - weight0;
1762 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1763 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1764 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1766 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1767 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1768 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1769 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1770 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1771 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1772 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1773 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1774 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1777 int list = list1 ? 1 : 0;
1778 int refn = h->ref_cache[list][ scan8[n] ];
1779 Picture *ref= &h->ref_list[list][refn];
1780 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1781 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1782 qpix_put, chroma_put);
1784 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1785 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1786 if(h->use_weight_chroma){
1787 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1788 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1789 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1790 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1795 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1796 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1797 int x_offset, int y_offset,
1798 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1799 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1800 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1801 int list0, int list1){
1802 if((h->use_weight==2 && list0 && list1
1803 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1804 || h->use_weight==1)
1805 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1806 x_offset, y_offset, qpix_put, chroma_put,
1807 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1809 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1810 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1813 static inline void prefetch_motion(H264Context *h, int list){
1814 /* fetch pixels for estimated mv 4 macroblocks ahead
1815 * optimized for 64byte cache lines */
1816 MpegEncContext * const s = &h->s;
1817 const int refn = h->ref_cache[list][scan8[0]];
1819 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1820 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1821 uint8_t **src= h->ref_list[list][refn].data;
1822 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1823 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1824 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1825 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1829 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1830 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1831 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1832 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1833 MpegEncContext * const s = &h->s;
1834 const int mb_xy= h->mb_xy;
1835 const int mb_type= s->current_picture.mb_type[mb_xy];
1837 assert(IS_INTER(mb_type));
1839 prefetch_motion(h, 0);
1841 if(IS_16X16(mb_type)){
1842 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1843 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1844 &weight_op[0], &weight_avg[0],
1845 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1846 }else if(IS_16X8(mb_type)){
1847 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1848 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1849 &weight_op[1], &weight_avg[1],
1850 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1851 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1852 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1853 &weight_op[1], &weight_avg[1],
1854 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1855 }else if(IS_8X16(mb_type)){
1856 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1857 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1858 &weight_op[2], &weight_avg[2],
1859 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1860 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1861 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1862 &weight_op[2], &weight_avg[2],
1863 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1867 assert(IS_8X8(mb_type));
1870 const int sub_mb_type= h->sub_mb_type[i];
1872 int x_offset= (i&1)<<2;
1873 int y_offset= (i&2)<<1;
1875 if(IS_SUB_8X8(sub_mb_type)){
1876 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1877 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1878 &weight_op[3], &weight_avg[3],
1879 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1880 }else if(IS_SUB_8X4(sub_mb_type)){
1881 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1882 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1883 &weight_op[4], &weight_avg[4],
1884 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1885 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1886 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1887 &weight_op[4], &weight_avg[4],
1888 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1889 }else if(IS_SUB_4X8(sub_mb_type)){
1890 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1891 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1892 &weight_op[5], &weight_avg[5],
1893 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1894 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1895 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1896 &weight_op[5], &weight_avg[5],
1897 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1900 assert(IS_SUB_4X4(sub_mb_type));
1902 int sub_x_offset= x_offset + 2*(j&1);
1903 int sub_y_offset= y_offset + (j&2);
1904 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1905 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1906 &weight_op[6], &weight_avg[6],
1907 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1913 prefetch_motion(h, 1);
1916 static av_cold void decode_init_vlc(void){
1917 static int done = 0;
1924 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1925 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1926 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1927 &chroma_dc_coeff_token_len [0], 1, 1,
1928 &chroma_dc_coeff_token_bits[0], 1, 1,
1929 INIT_VLC_USE_NEW_STATIC);
1933 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1934 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1935 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1936 &coeff_token_len [i][0], 1, 1,
1937 &coeff_token_bits[i][0], 1, 1,
1938 INIT_VLC_USE_NEW_STATIC);
1939 offset += coeff_token_vlc_tables_size[i];
1942 * This is a one time safety check to make sure that
1943 * the packed static coeff_token_vlc table sizes
1944 * were initialized correctly.
1946 assert(offset == sizeof(coeff_token_vlc_tables)/(sizeof(VLC_TYPE)*2));
1949 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1950 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1951 init_vlc(&chroma_dc_total_zeros_vlc[i],
1952 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1953 &chroma_dc_total_zeros_len [i][0], 1, 1,
1954 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1955 INIT_VLC_USE_NEW_STATIC);
1957 for(i=0; i<15; i++){
1958 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1959 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1960 init_vlc(&total_zeros_vlc[i],
1961 TOTAL_ZEROS_VLC_BITS, 16,
1962 &total_zeros_len [i][0], 1, 1,
1963 &total_zeros_bits[i][0], 1, 1,
1964 INIT_VLC_USE_NEW_STATIC);
1968 run_vlc[i].table = run_vlc_tables[i];
1969 run_vlc[i].table_allocated = run_vlc_tables_size;
1970 init_vlc(&run_vlc[i],
1972 &run_len [i][0], 1, 1,
1973 &run_bits[i][0], 1, 1,
1974 INIT_VLC_USE_NEW_STATIC);
1976 run7_vlc.table = run7_vlc_table,
1977 run7_vlc.table_allocated = run7_vlc_table_size;
1978 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1979 &run_len [6][0], 1, 1,
1980 &run_bits[6][0], 1, 1,
1981 INIT_VLC_USE_NEW_STATIC);
1985 static void free_tables(H264Context *h){
1988 av_freep(&h->intra4x4_pred_mode);
1989 av_freep(&h->chroma_pred_mode_table);
1990 av_freep(&h->cbp_table);
1991 av_freep(&h->mvd_table[0]);
1992 av_freep(&h->mvd_table[1]);
1993 av_freep(&h->direct_table);
1994 av_freep(&h->non_zero_count);
1995 av_freep(&h->slice_table_base);
1996 h->slice_table= NULL;
1998 av_freep(&h->mb2b_xy);
1999 av_freep(&h->mb2b8_xy);
2001 for(i = 0; i < MAX_SPS_COUNT; i++)
2002 av_freep(h->sps_buffers + i);
2004 for(i = 0; i < MAX_PPS_COUNT; i++)
2005 av_freep(h->pps_buffers + i);
2007 for(i = 0; i < h->s.avctx->thread_count; i++) {
2008 hx = h->thread_context[i];
2010 av_freep(&hx->top_borders[1]);
2011 av_freep(&hx->top_borders[0]);
2012 av_freep(&hx->s.obmc_scratchpad);
2016 static void init_dequant8_coeff_table(H264Context *h){
2018 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2019 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2020 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2022 for(i=0; i<2; i++ ){
2023 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2024 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2028 for(q=0; q<52; q++){
2029 int shift = ff_div6[q];
2030 int idx = ff_rem6[q];
2032 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2033 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2034 h->pps.scaling_matrix8[i][x]) << shift;
2039 static void init_dequant4_coeff_table(H264Context *h){
2041 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2042 for(i=0; i<6; i++ ){
2043 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2045 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2046 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2053 for(q=0; q<52; q++){
2054 int shift = ff_div6[q] + 2;
2055 int idx = ff_rem6[q];
2057 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2058 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2059 h->pps.scaling_matrix4[i][x]) << shift;
2064 static void init_dequant_tables(H264Context *h){
2066 init_dequant4_coeff_table(h);
2067 if(h->pps.transform_8x8_mode)
2068 init_dequant8_coeff_table(h);
2069 if(h->sps.transform_bypass){
2072 h->dequant4_coeff[i][0][x] = 1<<6;
2073 if(h->pps.transform_8x8_mode)
2076 h->dequant8_coeff[i][0][x] = 1<<6;
2083 * needs width/height
2085 static int alloc_tables(H264Context *h){
2086 MpegEncContext * const s = &h->s;
2087 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2090 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2092 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2093 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2094 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2096 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2097 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2098 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2099 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2101 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2102 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2104 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2105 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2106 for(y=0; y<s->mb_height; y++){
2107 for(x=0; x<s->mb_width; x++){
2108 const int mb_xy= x + y*s->mb_stride;
2109 const int b_xy = 4*x + 4*y*h->b_stride;
2110 const int b8_xy= 2*x + 2*y*h->b8_stride;
2112 h->mb2b_xy [mb_xy]= b_xy;
2113 h->mb2b8_xy[mb_xy]= b8_xy;
2117 s->obmc_scratchpad = NULL;
2119 if(!h->dequant4_coeff[0])
2120 init_dequant_tables(h);
2129 * Mimic alloc_tables(), but for every context thread.
2131 static void clone_tables(H264Context *dst, H264Context *src){
2132 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2133 dst->non_zero_count = src->non_zero_count;
2134 dst->slice_table = src->slice_table;
2135 dst->cbp_table = src->cbp_table;
2136 dst->mb2b_xy = src->mb2b_xy;
2137 dst->mb2b8_xy = src->mb2b8_xy;
2138 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2139 dst->mvd_table[0] = src->mvd_table[0];
2140 dst->mvd_table[1] = src->mvd_table[1];
2141 dst->direct_table = src->direct_table;
2143 dst->s.obmc_scratchpad = NULL;
2144 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2149 * Allocate buffers which are not shared amongst multiple threads.
2151 static int context_init(H264Context *h){
2152 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2153 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2157 return -1; // free_tables will clean up for us
2160 static av_cold void common_init(H264Context *h){
2161 MpegEncContext * const s = &h->s;
2163 s->width = s->avctx->width;
2164 s->height = s->avctx->height;
2165 s->codec_id= s->avctx->codec->id;
2167 ff_h264_pred_init(&h->hpc, s->codec_id);
2169 h->dequant_coeff_pps= -1;
2170 s->unrestricted_mv=1;
2171 s->decode=1; //FIXME
2173 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2174 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2177 static av_cold int decode_init(AVCodecContext *avctx){
2178 H264Context *h= avctx->priv_data;
2179 MpegEncContext * const s = &h->s;
2181 MPV_decode_defaults(s);
2186 s->out_format = FMT_H264;
2187 s->workaround_bugs= avctx->workaround_bugs;
2190 // s->decode_mb= ff_h263_decode_mb;
2191 s->quarter_sample = 1;
2194 if(avctx->codec_id == CODEC_ID_SVQ3)
2195 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2197 avctx->pix_fmt= PIX_FMT_YUV420P;
2201 if(avctx->extradata_size > 0 && avctx->extradata &&
2202 *(char *)avctx->extradata == 1){
2209 h->thread_context[0] = h;
2210 h->outputed_poc = INT_MIN;
2214 static int frame_start(H264Context *h){
2215 MpegEncContext * const s = &h->s;
2218 if(MPV_frame_start(s, s->avctx) < 0)
2220 ff_er_frame_start(s);
2222 * MPV_frame_start uses pict_type to derive key_frame.
2223 * This is incorrect for H.264; IDR markings must be used.
2224 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2225 * See decode_nal_units().
2227 s->current_picture_ptr->key_frame= 0;
2229 assert(s->linesize && s->uvlinesize);
2231 for(i=0; i<16; i++){
2232 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2233 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2236 h->block_offset[16+i]=
2237 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2238 h->block_offset[24+16+i]=
2239 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2242 /* can't be in alloc_tables because linesize isn't known there.
2243 * FIXME: redo bipred weight to not require extra buffer? */
2244 for(i = 0; i < s->avctx->thread_count; i++)
2245 if(!h->thread_context[i]->s.obmc_scratchpad)
2246 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2248 /* some macroblocks will be accessed before they're available */
2249 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2250 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2252 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2254 // We mark the current picture as non-reference after allocating it, so
2255 // that if we break out due to an error it can be released automatically
2256 // in the next MPV_frame_start().
2257 // SVQ3 as well as most other codecs have only last/next/current and thus
2258 // get released even with set reference, besides SVQ3 and others do not
2259 // mark frames as reference later "naturally".
2260 if(s->codec_id != CODEC_ID_SVQ3)
2261 s->current_picture_ptr->reference= 0;
2263 s->current_picture_ptr->field_poc[0]=
2264 s->current_picture_ptr->field_poc[1]= INT_MAX;
2265 assert(s->current_picture_ptr->long_ref==0);
2270 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2271 MpegEncContext * const s = &h->s;
2275 src_cb -= uvlinesize;
2276 src_cr -= uvlinesize;
2278 // There are two lines saved, the line above the the top macroblock of a pair,
2279 // and the line above the bottom macroblock
2280 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2281 for(i=1; i<17; i++){
2282 h->left_border[i]= src_y[15+i* linesize];
2285 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2286 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2288 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2289 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2290 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2292 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2293 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2295 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2296 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2300 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2301 MpegEncContext * const s = &h->s;
2308 if(h->deblocking_filter == 2) {
2310 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2311 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2313 deblock_left = (s->mb_x > 0);
2314 deblock_top = (s->mb_y > 0);
2317 src_y -= linesize + 1;
2318 src_cb -= uvlinesize + 1;
2319 src_cr -= uvlinesize + 1;
2321 #define XCHG(a,b,t,xchg)\
2328 for(i = !deblock_top; i<17; i++){
2329 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2334 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2335 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2336 if(s->mb_x+1 < s->mb_width){
2337 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2341 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2343 for(i = !deblock_top; i<9; i++){
2344 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2345 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2349 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2350 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2355 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2356 MpegEncContext * const s = &h->s;
2359 src_y -= 2 * linesize;
2360 src_cb -= 2 * uvlinesize;
2361 src_cr -= 2 * uvlinesize;
2363 // There are two lines saved, the line above the the top macroblock of a pair,
2364 // and the line above the bottom macroblock
2365 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2366 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2367 for(i=2; i<34; i++){
2368 h->left_border[i]= src_y[15+i* linesize];
2371 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2372 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2373 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2374 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2376 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2377 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2378 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2379 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2380 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2381 for(i=2; i<18; i++){
2382 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2383 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2385 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2386 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2387 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2388 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2392 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2393 MpegEncContext * const s = &h->s;
2396 int deblock_left = (s->mb_x > 0);
2397 int deblock_top = (s->mb_y > 1);
2399 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2401 src_y -= 2 * linesize + 1;
2402 src_cb -= 2 * uvlinesize + 1;
2403 src_cr -= 2 * uvlinesize + 1;
2405 #define XCHG(a,b,t,xchg)\
2412 for(i = (!deblock_top)<<1; i<34; i++){
2413 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2418 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2419 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2420 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2421 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2422 if(s->mb_x+1 < s->mb_width){
2423 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2424 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2428 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2430 for(i = (!deblock_top) << 1; i<18; i++){
2431 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2432 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2436 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2437 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2438 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2439 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2444 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2445 MpegEncContext * const s = &h->s;
2446 const int mb_x= s->mb_x;
2447 const int mb_y= s->mb_y;
2448 const int mb_xy= h->mb_xy;
2449 const int mb_type= s->current_picture.mb_type[mb_xy];
2450 uint8_t *dest_y, *dest_cb, *dest_cr;
2451 int linesize, uvlinesize /*dct_offset*/;
2453 int *block_offset = &h->block_offset[0];
2454 const unsigned int bottom = mb_y & 1;
2455 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2456 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2457 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2459 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2460 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2461 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2463 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2464 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2466 if (!simple && MB_FIELD) {
2467 linesize = h->mb_linesize = s->linesize * 2;
2468 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2469 block_offset = &h->block_offset[24];
2470 if(mb_y&1){ //FIXME move out of this function?
2471 dest_y -= s->linesize*15;
2472 dest_cb-= s->uvlinesize*7;
2473 dest_cr-= s->uvlinesize*7;
2477 for(list=0; list<h->list_count; list++){
2478 if(!USES_LIST(mb_type, list))
2480 if(IS_16X16(mb_type)){
2481 int8_t *ref = &h->ref_cache[list][scan8[0]];
2482 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2484 for(i=0; i<16; i+=4){
2485 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2486 int ref = h->ref_cache[list][scan8[i]];
2488 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2494 linesize = h->mb_linesize = s->linesize;
2495 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2496 // dct_offset = s->linesize * 16;
2499 if(transform_bypass){
2501 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2502 }else if(IS_8x8DCT(mb_type)){
2503 idct_dc_add = s->dsp.h264_idct8_dc_add;
2504 idct_add = s->dsp.h264_idct8_add;
2506 idct_dc_add = s->dsp.h264_idct_dc_add;
2507 idct_add = s->dsp.h264_idct_add;
2510 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2511 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2512 int mbt_y = mb_y&~1;
2513 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2514 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2515 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2516 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2519 if (!simple && IS_INTRA_PCM(mb_type)) {
2520 for (i=0; i<16; i++) {
2521 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2523 for (i=0; i<8; i++) {
2524 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2525 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2528 if(IS_INTRA(mb_type)){
2529 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2530 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2532 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2533 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2534 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2537 if(IS_INTRA4x4(mb_type)){
2538 if(simple || !s->encoding){
2539 if(IS_8x8DCT(mb_type)){
2540 for(i=0; i<16; i+=4){
2541 uint8_t * const ptr= dest_y + block_offset[i];
2542 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2543 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2544 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2545 (h->topright_samples_available<<i)&0x4000, linesize);
2547 if(nnz == 1 && h->mb[i*16])
2548 idct_dc_add(ptr, h->mb + i*16, linesize);
2550 idct_add(ptr, h->mb + i*16, linesize);
2554 for(i=0; i<16; i++){
2555 uint8_t * const ptr= dest_y + block_offset[i];
2557 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2560 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2561 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2562 assert(mb_y || linesize <= block_offset[i]);
2563 if(!topright_avail){
2564 tr= ptr[3 - linesize]*0x01010101;
2565 topright= (uint8_t*) &tr;
2567 topright= ptr + 4 - linesize;
2571 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2572 nnz = h->non_zero_count_cache[ scan8[i] ];
2575 if(nnz == 1 && h->mb[i*16])
2576 idct_dc_add(ptr, h->mb + i*16, linesize);
2578 idct_add(ptr, h->mb + i*16, linesize);
2580 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2585 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2587 if(!transform_bypass)
2588 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2590 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2592 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2593 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2595 hl_motion(h, dest_y, dest_cb, dest_cr,
2596 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2597 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2598 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2602 if(!IS_INTRA4x4(mb_type)){
2604 if(IS_INTRA16x16(mb_type)){
2605 for(i=0; i<16; i++){
2606 if(h->non_zero_count_cache[ scan8[i] ])
2607 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2608 else if(h->mb[i*16])
2609 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2612 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2613 for(i=0; i<16; i+=di){
2614 int nnz = h->non_zero_count_cache[ scan8[i] ];
2616 if(nnz==1 && h->mb[i*16])
2617 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2619 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2624 for(i=0; i<16; i++){
2625 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2626 uint8_t * const ptr= dest_y + block_offset[i];
2627 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2633 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2634 uint8_t *dest[2] = {dest_cb, dest_cr};
2635 if(transform_bypass){
2636 idct_add = idct_dc_add = s->dsp.add_pixels4;
2638 idct_add = s->dsp.h264_idct_add;
2639 idct_dc_add = s->dsp.h264_idct_dc_add;
2640 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2641 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2644 for(i=16; i<16+8; i++){
2645 if(h->non_zero_count_cache[ scan8[i] ])
2646 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2647 else if(h->mb[i*16])
2648 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2651 for(i=16; i<16+8; i++){
2652 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2653 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2654 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2660 if(h->deblocking_filter) {
2661 if (!simple && FRAME_MBAFF) {
2662 //FIXME try deblocking one mb at a time?
2663 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2664 const int mb_y = s->mb_y - 1;
2665 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2666 const int mb_xy= mb_x + mb_y*s->mb_stride;
2667 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2668 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2669 if (!bottom) return;
2670 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2671 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2672 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2674 if(IS_INTRA(mb_type_top | mb_type_bottom))
2675 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2677 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2680 s->mb_y--; h->mb_xy -= s->mb_stride;
2681 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2682 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2683 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2684 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2685 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2687 s->mb_y++; h->mb_xy += s->mb_stride;
2688 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2689 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2690 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2691 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2692 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2694 tprintf(h->s.avctx, "call filter_mb\n");
2695 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2696 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2697 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2698 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2699 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2705 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2707 static void hl_decode_mb_simple(H264Context *h){
2708 hl_decode_mb_internal(h, 1);
2712 * Process a macroblock; this handles edge cases, such as interlacing.
2714 static void av_noinline hl_decode_mb_complex(H264Context *h){
2715 hl_decode_mb_internal(h, 0);
2718 static void hl_decode_mb(H264Context *h){
2719 MpegEncContext * const s = &h->s;
2720 const int mb_xy= h->mb_xy;
2721 const int mb_type= s->current_picture.mb_type[mb_xy];
2722 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2723 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2725 if(ENABLE_H264_ENCODER && !s->decode)
2729 hl_decode_mb_complex(h);
2730 else hl_decode_mb_simple(h);
2733 static void pic_as_field(Picture *pic, const int parity){
2735 for (i = 0; i < 4; ++i) {
2736 if (parity == PICT_BOTTOM_FIELD)
2737 pic->data[i] += pic->linesize[i];
2738 pic->reference = parity;
2739 pic->linesize[i] *= 2;
2741 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2744 static int split_field_copy(Picture *dest, Picture *src,
2745 int parity, int id_add){
2746 int match = !!(src->reference & parity);
2750 if(parity != PICT_FRAME){
2751 pic_as_field(dest, parity);
2753 dest->pic_id += id_add;
2760 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2764 while(i[0]<len || i[1]<len){
2765 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2767 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2770 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2771 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2774 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2775 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2782 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2787 best_poc= dir ? INT_MIN : INT_MAX;
2789 for(i=0; i<len; i++){
2790 const int poc= src[i]->poc;
2791 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2793 sorted[out_i]= src[i];
2796 if(best_poc == (dir ? INT_MIN : INT_MAX))
2798 limit= sorted[out_i++]->poc - dir;
2804 * fills the default_ref_list.
2806 static int fill_default_ref_list(H264Context *h){
2807 MpegEncContext * const s = &h->s;
2810 if(h->slice_type_nos==FF_B_TYPE){
2811 Picture *sorted[32];
2816 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2818 cur_poc= s->current_picture_ptr->poc;
2820 for(list= 0; list<2; list++){
2821 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2822 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2824 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2825 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2828 if(len < h->ref_count[list])
2829 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2833 if(lens[0] == lens[1] && lens[1] > 1){
2834 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2836 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2839 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2840 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2842 if(len < h->ref_count[0])
2843 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2846 for (i=0; i<h->ref_count[0]; i++) {
2847 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2849 if(h->slice_type_nos==FF_B_TYPE){
2850 for (i=0; i<h->ref_count[1]; i++) {
2851 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2858 static void print_short_term(H264Context *h);
2859 static void print_long_term(H264Context *h);
2862 * Extract structure information about the picture described by pic_num in
2863 * the current decoding context (frame or field). Note that pic_num is
2864 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2865 * @param pic_num picture number for which to extract structure information
2866 * @param structure one of PICT_XXX describing structure of picture
2868 * @return frame number (short term) or long term index of picture
2869 * described by pic_num
2871 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2872 MpegEncContext * const s = &h->s;
2874 *structure = s->picture_structure;
2877 /* opposite field */
2878 *structure ^= PICT_FRAME;
2885 static int decode_ref_pic_list_reordering(H264Context *h){
2886 MpegEncContext * const s = &h->s;
2887 int list, index, pic_structure;
2889 print_short_term(h);
2892 for(list=0; list<h->list_count; list++){
2893 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2895 if(get_bits1(&s->gb)){
2896 int pred= h->curr_pic_num;
2898 for(index=0; ; index++){
2899 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2900 unsigned int pic_id;
2902 Picture *ref = NULL;
2904 if(reordering_of_pic_nums_idc==3)
2907 if(index >= h->ref_count[list]){
2908 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2912 if(reordering_of_pic_nums_idc<3){
2913 if(reordering_of_pic_nums_idc<2){
2914 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2917 if(abs_diff_pic_num > h->max_pic_num){
2918 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2922 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2923 else pred+= abs_diff_pic_num;
2924 pred &= h->max_pic_num - 1;
2926 frame_num = pic_num_extract(h, pred, &pic_structure);
2928 for(i= h->short_ref_count-1; i>=0; i--){
2929 ref = h->short_ref[i];
2930 assert(ref->reference);
2931 assert(!ref->long_ref);
2933 ref->frame_num == frame_num &&
2934 (ref->reference & pic_structure)
2942 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2944 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2947 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2950 ref = h->long_ref[long_idx];
2951 assert(!(ref && !ref->reference));
2952 if(ref && (ref->reference & pic_structure)){
2953 ref->pic_id= pic_id;
2954 assert(ref->long_ref);
2962 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2963 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2965 for(i=index; i+1<h->ref_count[list]; i++){
2966 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2969 for(; i > index; i--){
2970 h->ref_list[list][i]= h->ref_list[list][i-1];
2972 h->ref_list[list][index]= *ref;
2974 pic_as_field(&h->ref_list[list][index], pic_structure);
2978 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2984 for(list=0; list<h->list_count; list++){
2985 for(index= 0; index < h->ref_count[list]; index++){
2986 if(!h->ref_list[list][index].data[0]){
2987 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2988 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2993 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
2994 direct_dist_scale_factor(h);
2995 direct_ref_list_init(h);
2999 static void fill_mbaff_ref_list(H264Context *h){
3001 for(list=0; list<2; list++){ //FIXME try list_count
3002 for(i=0; i<h->ref_count[list]; i++){
3003 Picture *frame = &h->ref_list[list][i];
3004 Picture *field = &h->ref_list[list][16+2*i];
3007 field[0].linesize[j] <<= 1;
3008 field[0].reference = PICT_TOP_FIELD;
3009 field[1] = field[0];
3011 field[1].data[j] += frame->linesize[j];
3012 field[1].reference = PICT_BOTTOM_FIELD;
3014 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3015 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3017 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3018 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3022 for(j=0; j<h->ref_count[1]; j++){
3023 for(i=0; i<h->ref_count[0]; i++)
3024 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3025 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3026 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3030 static int pred_weight_table(H264Context *h){
3031 MpegEncContext * const s = &h->s;
3033 int luma_def, chroma_def;
3036 h->use_weight_chroma= 0;
3037 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3038 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3039 luma_def = 1<<h->luma_log2_weight_denom;
3040 chroma_def = 1<<h->chroma_log2_weight_denom;
3042 for(list=0; list<2; list++){
3043 for(i=0; i<h->ref_count[list]; i++){
3044 int luma_weight_flag, chroma_weight_flag;
3046 luma_weight_flag= get_bits1(&s->gb);
3047 if(luma_weight_flag){
3048 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3049 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3050 if( h->luma_weight[list][i] != luma_def
3051 || h->luma_offset[list][i] != 0)
3054 h->luma_weight[list][i]= luma_def;
3055 h->luma_offset[list][i]= 0;
3059 chroma_weight_flag= get_bits1(&s->gb);
3060 if(chroma_weight_flag){
3063 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3064 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3065 if( h->chroma_weight[list][i][j] != chroma_def
3066 || h->chroma_offset[list][i][j] != 0)
3067 h->use_weight_chroma= 1;
3072 h->chroma_weight[list][i][j]= chroma_def;
3073 h->chroma_offset[list][i][j]= 0;
3078 if(h->slice_type_nos != FF_B_TYPE) break;
3080 h->use_weight= h->use_weight || h->use_weight_chroma;
3084 static void implicit_weight_table(H264Context *h){
3085 MpegEncContext * const s = &h->s;
3087 int cur_poc = s->current_picture_ptr->poc;
3089 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3090 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3092 h->use_weight_chroma= 0;
3097 h->use_weight_chroma= 2;
3098 h->luma_log2_weight_denom= 5;
3099 h->chroma_log2_weight_denom= 5;
3101 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3102 int poc0 = h->ref_list[0][ref0].poc;
3103 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3104 int poc1 = h->ref_list[1][ref1].poc;
3105 int td = av_clip(poc1 - poc0, -128, 127);
3107 int tb = av_clip(cur_poc - poc0, -128, 127);
3108 int tx = (16384 + (FFABS(td) >> 1)) / td;
3109 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3110 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3111 h->implicit_weight[ref0][ref1] = 32;
3113 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3115 h->implicit_weight[ref0][ref1] = 32;
3121 * Mark a picture as no longer needed for reference. The refmask
3122 * argument allows unreferencing of individual fields or the whole frame.
3123 * If the picture becomes entirely unreferenced, but is being held for
3124 * display purposes, it is marked as such.
3125 * @param refmask mask of fields to unreference; the mask is bitwise
3126 * anded with the reference marking of pic
3127 * @return non-zero if pic becomes entirely unreferenced (except possibly
3128 * for display purposes) zero if one of the fields remains in
3131 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3133 if (pic->reference &= refmask) {
3136 for(i = 0; h->delayed_pic[i]; i++)
3137 if(pic == h->delayed_pic[i]){
3138 pic->reference=DELAYED_PIC_REF;
3146 * instantaneous decoder refresh.
3148 static void idr(H264Context *h){
3151 for(i=0; i<16; i++){
3152 remove_long(h, i, 0);
3154 assert(h->long_ref_count==0);
3156 for(i=0; i<h->short_ref_count; i++){
3157 unreference_pic(h, h->short_ref[i], 0);
3158 h->short_ref[i]= NULL;
3160 h->short_ref_count=0;
3161 h->prev_frame_num= 0;
3162 h->prev_frame_num_offset= 0;
3167 /* forget old pics after a seek */
3168 static void flush_dpb(AVCodecContext *avctx){
3169 H264Context *h= avctx->priv_data;
3171 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3172 if(h->delayed_pic[i])
3173 h->delayed_pic[i]->reference= 0;
3174 h->delayed_pic[i]= NULL;
3176 h->outputed_poc= INT_MIN;
3178 if(h->s.current_picture_ptr)
3179 h->s.current_picture_ptr->reference= 0;
3180 h->s.first_field= 0;
3181 ff_mpeg_flush(avctx);
3185 * Find a Picture in the short term reference list by frame number.
3186 * @param frame_num frame number to search for
3187 * @param idx the index into h->short_ref where returned picture is found
3188 * undefined if no picture found.
3189 * @return pointer to the found picture, or NULL if no pic with the provided
3190 * frame number is found
3192 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3193 MpegEncContext * const s = &h->s;
3196 for(i=0; i<h->short_ref_count; i++){
3197 Picture *pic= h->short_ref[i];
3198 if(s->avctx->debug&FF_DEBUG_MMCO)
3199 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3200 if(pic->frame_num == frame_num) {
3209 * Remove a picture from the short term reference list by its index in
3210 * that list. This does no checking on the provided index; it is assumed
3211 * to be valid. Other list entries are shifted down.
3212 * @param i index into h->short_ref of picture to remove.
3214 static void remove_short_at_index(H264Context *h, int i){
3215 assert(i >= 0 && i < h->short_ref_count);
3216 h->short_ref[i]= NULL;
3217 if (--h->short_ref_count)
3218 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3223 * @return the removed picture or NULL if an error occurs
3225 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3226 MpegEncContext * const s = &h->s;
3230 if(s->avctx->debug&FF_DEBUG_MMCO)
3231 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3233 pic = find_short(h, frame_num, &i);
3235 if(unreference_pic(h, pic, ref_mask))
3236 remove_short_at_index(h, i);
3243 * Remove a picture from the long term reference list by its index in
3245 * @return the removed picture or NULL if an error occurs
3247 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3250 pic= h->long_ref[i];
3252 if(unreference_pic(h, pic, ref_mask)){
3253 assert(h->long_ref[i]->long_ref == 1);
3254 h->long_ref[i]->long_ref= 0;
3255 h->long_ref[i]= NULL;
3256 h->long_ref_count--;
3264 * print short term list
3266 static void print_short_term(H264Context *h) {
3268 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3269 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3270 for(i=0; i<h->short_ref_count; i++){
3271 Picture *pic= h->short_ref[i];
3272 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3278 * print long term list
3280 static void print_long_term(H264Context *h) {
3282 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3283 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3284 for(i = 0; i < 16; i++){
3285 Picture *pic= h->long_ref[i];
3287 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3294 * Executes the reference picture marking (memory management control operations).
3296 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3297 MpegEncContext * const s = &h->s;
3299 int current_ref_assigned=0;
3302 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3303 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3305 for(i=0; i<mmco_count; i++){
3306 int structure, frame_num;
3307 if(s->avctx->debug&FF_DEBUG_MMCO)
3308 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3310 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3311 || mmco[i].opcode == MMCO_SHORT2LONG){
3312 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3313 pic = find_short(h, frame_num, &j);
3315 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3316 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3317 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3322 switch(mmco[i].opcode){
3323 case MMCO_SHORT2UNUSED:
3324 if(s->avctx->debug&FF_DEBUG_MMCO)
3325 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3326 remove_short(h, frame_num, structure ^ PICT_FRAME);
3328 case MMCO_SHORT2LONG:
3329 if (h->long_ref[mmco[i].long_arg] != pic)
3330 remove_long(h, mmco[i].long_arg, 0);
3332 remove_short_at_index(h, j);
3333 h->long_ref[ mmco[i].long_arg ]= pic;
3334 if (h->long_ref[ mmco[i].long_arg ]){
3335 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3336 h->long_ref_count++;
3339 case MMCO_LONG2UNUSED:
3340 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3341 pic = h->long_ref[j];
3343 remove_long(h, j, structure ^ PICT_FRAME);
3344 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3345 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3348 // Comment below left from previous code as it is an interresting note.
3349 /* First field in pair is in short term list or
3350 * at a different long term index.
3351 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3352 * Report the problem and keep the pair where it is,
3353 * and mark this field valid.
3356 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3357 remove_long(h, mmco[i].long_arg, 0);
3359 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3360 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3361 h->long_ref_count++;
3364 s->current_picture_ptr->reference |= s->picture_structure;
3365 current_ref_assigned=1;
3367 case MMCO_SET_MAX_LONG:
3368 assert(mmco[i].long_arg <= 16);
3369 // just remove the long term which index is greater than new max
3370 for(j = mmco[i].long_arg; j<16; j++){
3371 remove_long(h, j, 0);
3375 while(h->short_ref_count){
3376 remove_short(h, h->short_ref[0]->frame_num, 0);
3378 for(j = 0; j < 16; j++) {
3379 remove_long(h, j, 0);
3381 s->current_picture_ptr->poc=
3382 s->current_picture_ptr->field_poc[0]=
3383 s->current_picture_ptr->field_poc[1]=
3387 s->current_picture_ptr->frame_num= 0;
3393 if (!current_ref_assigned) {
3394 /* Second field of complementary field pair; the first field of
3395 * which is already referenced. If short referenced, it
3396 * should be first entry in short_ref. If not, it must exist
3397 * in long_ref; trying to put it on the short list here is an
3398 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3400 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3401 /* Just mark the second field valid */
3402 s->current_picture_ptr->reference = PICT_FRAME;
3403 } else if (s->current_picture_ptr->long_ref) {
3404 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3405 "assignment for second field "
3406 "in complementary field pair "
3407 "(first field is long term)\n");
3409 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3411 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3414 if(h->short_ref_count)
3415 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3417 h->short_ref[0]= s->current_picture_ptr;
3418 h->short_ref_count++;
3419 s->current_picture_ptr->reference |= s->picture_structure;
3423 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3425 /* We have too many reference frames, probably due to corrupted
3426 * stream. Need to discard one frame. Prevents overrun of the
3427 * short_ref and long_ref buffers.
3429 av_log(h->s.avctx, AV_LOG_ERROR,
3430 "number of reference frames exceeds max (probably "
3431 "corrupt input), discarding one\n");
3433 if (h->long_ref_count && !h->short_ref_count) {
3434 for (i = 0; i < 16; ++i)
3439 remove_long(h, i, 0);
3441 pic = h->short_ref[h->short_ref_count - 1];
3442 remove_short(h, pic->frame_num, 0);
3446 print_short_term(h);
3451 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3452 MpegEncContext * const s = &h->s;
3456 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3457 s->broken_link= get_bits1(gb) -1;
3459 h->mmco[0].opcode= MMCO_LONG;
3460 h->mmco[0].long_arg= 0;
3464 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3465 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3466 MMCOOpcode opcode= get_ue_golomb(gb);
3468 h->mmco[i].opcode= opcode;
3469 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3470 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3471 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3472 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3476 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3477 unsigned int long_arg= get_ue_golomb(gb);
3478 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3479 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3482 h->mmco[i].long_arg= long_arg;
3485 if(opcode > (unsigned)MMCO_LONG){
3486 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3489 if(opcode == MMCO_END)
3494 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3496 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3497 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3498 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3499 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3501 if (FIELD_PICTURE) {
3502 h->mmco[0].short_pic_num *= 2;
3503 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3504 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3514 static int init_poc(H264Context *h){
3515 MpegEncContext * const s = &h->s;
3516 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3518 Picture *cur = s->current_picture_ptr;
3520 h->frame_num_offset= h->prev_frame_num_offset;
3521 if(h->frame_num < h->prev_frame_num)
3522 h->frame_num_offset += max_frame_num;
3524 if(h->sps.poc_type==0){
3525 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3527 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3528 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3529 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3530 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3532 h->poc_msb = h->prev_poc_msb;
3533 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3535 field_poc[1] = h->poc_msb + h->poc_lsb;
3536 if(s->picture_structure == PICT_FRAME)
3537 field_poc[1] += h->delta_poc_bottom;
3538 }else if(h->sps.poc_type==1){
3539 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3542 if(h->sps.poc_cycle_length != 0)
3543 abs_frame_num = h->frame_num_offset + h->frame_num;
3547 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3550 expected_delta_per_poc_cycle = 0;
3551 for(i=0; i < h->sps.poc_cycle_length; i++)
3552 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3554 if(abs_frame_num > 0){
3555 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3556 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3558 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3559 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3560 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3564 if(h->nal_ref_idc == 0)
3565 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3567 field_poc[0] = expectedpoc + h->delta_poc[0];
3568 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3570 if(s->picture_structure == PICT_FRAME)
3571 field_poc[1] += h->delta_poc[1];
3573 int poc= 2*(h->frame_num_offset + h->frame_num);
3582 if(s->picture_structure != PICT_BOTTOM_FIELD)
3583 s->current_picture_ptr->field_poc[0]= field_poc[0];
3584 if(s->picture_structure != PICT_TOP_FIELD)
3585 s->current_picture_ptr->field_poc[1]= field_poc[1];
3586 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3593 * initialize scan tables
3595 static void init_scan_tables(H264Context *h){
3596 MpegEncContext * const s = &h->s;
3598 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3599 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3600 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3602 for(i=0; i<16; i++){
3603 #define T(x) (x>>2) | ((x<<2) & 0xF)
3604 h->zigzag_scan[i] = T(zigzag_scan[i]);
3605 h-> field_scan[i] = T( field_scan[i]);
3609 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3610 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3611 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3612 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3613 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3615 for(i=0; i<64; i++){
3616 #define T(x) (x>>3) | ((x&7)<<3)
3617 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3618 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3619 h->field_scan8x8[i] = T(field_scan8x8[i]);
3620 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3624 if(h->sps.transform_bypass){ //FIXME same ugly
3625 h->zigzag_scan_q0 = zigzag_scan;
3626 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3627 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3628 h->field_scan_q0 = field_scan;
3629 h->field_scan8x8_q0 = field_scan8x8;
3630 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3632 h->zigzag_scan_q0 = h->zigzag_scan;
3633 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3634 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3635 h->field_scan_q0 = h->field_scan;
3636 h->field_scan8x8_q0 = h->field_scan8x8;
3637 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3642 * Replicates H264 "master" context to thread contexts.
3644 static void clone_slice(H264Context *dst, H264Context *src)
3646 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3647 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3648 dst->s.current_picture = src->s.current_picture;
3649 dst->s.linesize = src->s.linesize;
3650 dst->s.uvlinesize = src->s.uvlinesize;
3651 dst->s.first_field = src->s.first_field;
3653 dst->prev_poc_msb = src->prev_poc_msb;
3654 dst->prev_poc_lsb = src->prev_poc_lsb;
3655 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3656 dst->prev_frame_num = src->prev_frame_num;
3657 dst->short_ref_count = src->short_ref_count;
3659 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3660 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3661 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3662 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3664 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3665 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3669 * decodes a slice header.
3670 * This will also call MPV_common_init() and frame_start() as needed.
3672 * @param h h264context
3673 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3675 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3677 static int decode_slice_header(H264Context *h, H264Context *h0){
3678 MpegEncContext * const s = &h->s;
3679 MpegEncContext * const s0 = &h0->s;
3680 unsigned int first_mb_in_slice;
3681 unsigned int pps_id;
3682 int num_ref_idx_active_override_flag;
3683 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3684 unsigned int slice_type, tmp, i, j;
3685 int default_ref_list_done = 0;
3686 int last_pic_structure;
3688 s->dropable= h->nal_ref_idc == 0;
3690 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3691 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3692 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3694 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3695 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3698 first_mb_in_slice= get_ue_golomb(&s->gb);
3700 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3701 h0->current_slice = 0;
3702 if (!s0->first_field)
3703 s->current_picture_ptr= NULL;
3706 slice_type= get_ue_golomb(&s->gb);
3708 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3713 h->slice_type_fixed=1;
3715 h->slice_type_fixed=0;
3717 slice_type= slice_type_map[ slice_type ];
3718 if (slice_type == FF_I_TYPE
3719 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3720 default_ref_list_done = 1;
3722 h->slice_type= slice_type;
3723 h->slice_type_nos= slice_type & 3;
3725 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3726 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3727 av_log(h->s.avctx, AV_LOG_ERROR,
3728 "B picture before any references, skipping\n");
3732 pps_id= get_ue_golomb(&s->gb);
3733 if(pps_id>=MAX_PPS_COUNT){
3734 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3737 if(!h0->pps_buffers[pps_id]) {
3738 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3741 h->pps= *h0->pps_buffers[pps_id];
3743 if(!h0->sps_buffers[h->pps.sps_id]) {
3744 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3747 h->sps = *h0->sps_buffers[h->pps.sps_id];
3749 if(h == h0 && h->dequant_coeff_pps != pps_id){
3750 h->dequant_coeff_pps = pps_id;
3751 init_dequant_tables(h);
3754 s->mb_width= h->sps.mb_width;
3755 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3757 h->b_stride= s->mb_width*4;
3758 h->b8_stride= s->mb_width*2;
3760 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3761 if(h->sps.frame_mbs_only_flag)
3762 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3764 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3766 if (s->context_initialized
3767 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3769 return -1; // width / height changed during parallelized decoding
3773 if (!s->context_initialized) {
3775 return -1; // we cant (re-)initialize context during parallel decoding
3776 if (MPV_common_init(s) < 0)
3780 init_scan_tables(h);
3783 for(i = 1; i < s->avctx->thread_count; i++) {
3785 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3786 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3787 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3790 init_scan_tables(c);
3794 for(i = 0; i < s->avctx->thread_count; i++)
3795 if(context_init(h->thread_context[i]) < 0)
3798 s->avctx->width = s->width;
3799 s->avctx->height = s->height;
3800 s->avctx->sample_aspect_ratio= h->sps.sar;
3801 if(!s->avctx->sample_aspect_ratio.den)
3802 s->avctx->sample_aspect_ratio.den = 1;
3804 if(h->sps.timing_info_present_flag){
3805 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3806 if(h->x264_build > 0 && h->x264_build < 44)
3807 s->avctx->time_base.den *= 2;
3808 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3809 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3813 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3816 h->mb_aff_frame = 0;
3817 last_pic_structure = s0->picture_structure;
3818 if(h->sps.frame_mbs_only_flag){
3819 s->picture_structure= PICT_FRAME;
3821 if(get_bits1(&s->gb)) { //field_pic_flag
3822 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3824 s->picture_structure= PICT_FRAME;
3825 h->mb_aff_frame = h->sps.mb_aff;
3828 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3830 if(h0->current_slice == 0){
3831 while(h->frame_num != h->prev_frame_num &&
3832 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3833 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3835 h->prev_frame_num++;
3836 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3837 s->current_picture_ptr->frame_num= h->prev_frame_num;
3838 execute_ref_pic_marking(h, NULL, 0);
3841 /* See if we have a decoded first field looking for a pair... */
3842 if (s0->first_field) {
3843 assert(s0->current_picture_ptr);
3844 assert(s0->current_picture_ptr->data[0]);
3845 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3847 /* figure out if we have a complementary field pair */
3848 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3850 * Previous field is unmatched. Don't display it, but let it
3851 * remain for reference if marked as such.
3853 s0->current_picture_ptr = NULL;
3854 s0->first_field = FIELD_PICTURE;
3857 if (h->nal_ref_idc &&
3858 s0->current_picture_ptr->reference &&
3859 s0->current_picture_ptr->frame_num != h->frame_num) {
3861 * This and previous field were reference, but had
3862 * different frame_nums. Consider this field first in
3863 * pair. Throw away previous field except for reference
3866 s0->first_field = 1;
3867 s0->current_picture_ptr = NULL;
3870 /* Second field in complementary pair */
3871 s0->first_field = 0;
3876 /* Frame or first field in a potentially complementary pair */
3877 assert(!s0->current_picture_ptr);
3878 s0->first_field = FIELD_PICTURE;
3881 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3882 s0->first_field = 0;
3889 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3891 assert(s->mb_num == s->mb_width * s->mb_height);
3892 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3893 first_mb_in_slice >= s->mb_num){
3894 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3897 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3898 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3899 if (s->picture_structure == PICT_BOTTOM_FIELD)
3900 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3901 assert(s->mb_y < s->mb_height);
3903 if(s->picture_structure==PICT_FRAME){
3904 h->curr_pic_num= h->frame_num;
3905 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3907 h->curr_pic_num= 2*h->frame_num + 1;
3908 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3911 if(h->nal_unit_type == NAL_IDR_SLICE){
3912 get_ue_golomb(&s->gb); /* idr_pic_id */
3915 if(h->sps.poc_type==0){
3916 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3918 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3919 h->delta_poc_bottom= get_se_golomb(&s->gb);
3923 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3924 h->delta_poc[0]= get_se_golomb(&s->gb);
3926 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3927 h->delta_poc[1]= get_se_golomb(&s->gb);
3932 if(h->pps.redundant_pic_cnt_present){
3933 h->redundant_pic_count= get_ue_golomb(&s->gb);
3936 //set defaults, might be overridden a few lines later
3937 h->ref_count[0]= h->pps.ref_count[0];
3938 h->ref_count[1]= h->pps.ref_count[1];
3940 if(h->slice_type_nos != FF_I_TYPE){
3941 if(h->slice_type_nos == FF_B_TYPE){
3942 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3944 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3946 if(num_ref_idx_active_override_flag){
3947 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3948 if(h->slice_type_nos==FF_B_TYPE)
3949 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3951 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3952 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3953 h->ref_count[0]= h->ref_count[1]= 1;
3957 if(h->slice_type_nos == FF_B_TYPE)
3964 if(!default_ref_list_done){
3965 fill_default_ref_list(h);
3968 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3971 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3972 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3973 pred_weight_table(h);
3974 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3975 implicit_weight_table(h);
3980 decode_ref_pic_marking(h0, &s->gb);
3983 fill_mbaff_ref_list(h);
3985 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3986 tmp = get_ue_golomb(&s->gb);
3988 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3991 h->cabac_init_idc= tmp;
3994 h->last_qscale_diff = 0;
3995 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3997 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4001 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4002 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4003 //FIXME qscale / qp ... stuff
4004 if(h->slice_type == FF_SP_TYPE){
4005 get_bits1(&s->gb); /* sp_for_switch_flag */
4007 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4008 get_se_golomb(&s->gb); /* slice_qs_delta */
4011 h->deblocking_filter = 1;
4012 h->slice_alpha_c0_offset = 0;
4013 h->slice_beta_offset = 0;
4014 if( h->pps.deblocking_filter_parameters_present ) {
4015 tmp= get_ue_golomb(&s->gb);
4017 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4020 h->deblocking_filter= tmp;
4021 if(h->deblocking_filter < 2)
4022 h->deblocking_filter^= 1; // 1<->0
4024 if( h->deblocking_filter ) {
4025 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4026 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4030 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4031 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4032 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4033 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4034 h->deblocking_filter= 0;
4036 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4037 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4038 /* Cheat slightly for speed:
4039 Do not bother to deblock across slices. */
4040 h->deblocking_filter = 2;
4042 h0->max_contexts = 1;
4043 if(!h0->single_decode_warning) {
4044 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4045 h0->single_decode_warning = 1;
4048 return 1; // deblocking switched inside frame
4053 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4054 slice_group_change_cycle= get_bits(&s->gb, ?);
4057 h0->last_slice_type = slice_type;
4058 h->slice_num = ++h0->current_slice;
4061 int *ref2frm= h->ref2frm[h->slice_num&15][j];
4065 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4066 +(h->ref_list[j][i].reference&3);
4069 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4070 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4072 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4073 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4075 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4077 av_get_pict_type_char(h->slice_type),
4078 pps_id, h->frame_num,
4079 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4080 h->ref_count[0], h->ref_count[1],
4082 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4084 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4085 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4095 static inline int get_level_prefix(GetBitContext *gb){
4099 OPEN_READER(re, gb);
4100 UPDATE_CACHE(re, gb);
4101 buf=GET_CACHE(re, gb);
4103 log= 32 - av_log2(buf);
4105 print_bin(buf>>(32-log), log);
4106 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4109 LAST_SKIP_BITS(re, gb, log);
4110 CLOSE_READER(re, gb);
4115 static inline int get_dct8x8_allowed(H264Context *h){
4118 if(!IS_SUB_8X8(h->sub_mb_type[i])
4119 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4126 * decodes a residual block.
4127 * @param n block index
4128 * @param scantable scantable
4129 * @param max_coeff number of coefficients in the block
4130 * @return <0 if an error occurred
4132 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4133 MpegEncContext * const s = &h->s;
4134 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4136 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4138 //FIXME put trailing_onex into the context
4140 if(n == CHROMA_DC_BLOCK_INDEX){
4141 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4142 total_coeff= coeff_token>>2;
4144 if(n == LUMA_DC_BLOCK_INDEX){
4145 total_coeff= pred_non_zero_count(h, 0);
4146 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4147 total_coeff= coeff_token>>2;
4149 total_coeff= pred_non_zero_count(h, n);
4150 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4151 total_coeff= coeff_token>>2;
4152 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4156 //FIXME set last_non_zero?
4160 if(total_coeff > (unsigned)max_coeff) {
4161 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4165 trailing_ones= coeff_token&3;
4166 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4167 assert(total_coeff<=16);
4169 for(i=0; i<trailing_ones; i++){
4170 level[i]= 1 - 2*get_bits1(gb);
4174 int level_code, mask;
4175 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4176 int prefix= get_level_prefix(gb);
4178 //first coefficient has suffix_length equal to 0 or 1
4179 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4181 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4183 level_code= (prefix<<suffix_length); //part
4184 }else if(prefix==14){
4186 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4188 level_code= prefix + get_bits(gb, 4); //part
4190 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4191 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4193 level_code += (1<<(prefix-3))-4096;
4196 if(trailing_ones < 3) level_code += 2;
4201 mask= -(level_code&1);
4202 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4205 //remaining coefficients have suffix_length > 0
4206 for(;i<total_coeff;i++) {
4207 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4208 prefix = get_level_prefix(gb);
4210 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4212 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4214 level_code += (1<<(prefix-3))-4096;
4216 mask= -(level_code&1);
4217 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4218 if(level_code > suffix_limit[suffix_length])
4223 if(total_coeff == max_coeff)
4226 if(n == CHROMA_DC_BLOCK_INDEX)
4227 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4229 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4232 coeff_num = zeros_left + total_coeff - 1;
4233 j = scantable[coeff_num];
4235 block[j] = level[0];
4236 for(i=1;i<total_coeff;i++) {
4239 else if(zeros_left < 7){
4240 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4242 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4244 zeros_left -= run_before;
4245 coeff_num -= 1 + run_before;
4246 j= scantable[ coeff_num ];
4251 block[j] = (level[0] * qmul[j] + 32)>>6;
4252 for(i=1;i<total_coeff;i++) {
4255 else if(zeros_left < 7){
4256 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4258 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4260 zeros_left -= run_before;
4261 coeff_num -= 1 + run_before;
4262 j= scantable[ coeff_num ];
4264 block[j]= (level[i] * qmul[j] + 32)>>6;
4269 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4276 static void predict_field_decoding_flag(H264Context *h){
4277 MpegEncContext * const s = &h->s;
4278 const int mb_xy= h->mb_xy;
4279 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4280 ? s->current_picture.mb_type[mb_xy-1]
4281 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4282 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4284 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4288 * decodes a P_SKIP or B_SKIP macroblock
4290 static void decode_mb_skip(H264Context *h){
4291 MpegEncContext * const s = &h->s;
4292 const int mb_xy= h->mb_xy;
4295 memset(h->non_zero_count[mb_xy], 0, 16);
4296 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4299 mb_type|= MB_TYPE_INTERLACED;
4301 if( h->slice_type_nos == FF_B_TYPE )
4303 // just for fill_caches. pred_direct_motion will set the real mb_type
4304 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4306 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4307 pred_direct_motion(h, &mb_type);
4308 mb_type|= MB_TYPE_SKIP;
4313 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4315 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4316 pred_pskip_motion(h, &mx, &my);
4317 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4318 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4321 write_back_motion(h, mb_type);
4322 s->current_picture.mb_type[mb_xy]= mb_type;
4323 s->current_picture.qscale_table[mb_xy]= s->qscale;
4324 h->slice_table[ mb_xy ]= h->slice_num;
4325 h->prev_mb_skipped= 1;
4329 * decodes a macroblock
4330 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4332 static int decode_mb_cavlc(H264Context *h){
4333 MpegEncContext * const s = &h->s;
4335 int partition_count;
4336 unsigned int mb_type, cbp;
4337 int dct8x8_allowed= h->pps.transform_8x8_mode;
4339 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4341 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4343 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4344 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4346 if(h->slice_type_nos != FF_I_TYPE){
4347 if(s->mb_skip_run==-1)
4348 s->mb_skip_run= get_ue_golomb(&s->gb);
4350 if (s->mb_skip_run--) {
4351 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4352 if(s->mb_skip_run==0)
4353 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4355 predict_field_decoding_flag(h);
4362 if( (s->mb_y&1) == 0 )
4363 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4366 h->prev_mb_skipped= 0;
4368 mb_type= get_ue_golomb(&s->gb);
4369 if(h->slice_type_nos == FF_B_TYPE){
4371 partition_count= b_mb_type_info[mb_type].partition_count;
4372 mb_type= b_mb_type_info[mb_type].type;
4375 goto decode_intra_mb;
4377 }else if(h->slice_type_nos == FF_P_TYPE){
4379 partition_count= p_mb_type_info[mb_type].partition_count;
4380 mb_type= p_mb_type_info[mb_type].type;
4383 goto decode_intra_mb;
4386 assert(h->slice_type_nos == FF_I_TYPE);
4387 if(h->slice_type == FF_SI_TYPE && mb_type)
4391 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4395 cbp= i_mb_type_info[mb_type].cbp;
4396 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4397 mb_type= i_mb_type_info[mb_type].type;
4401 mb_type |= MB_TYPE_INTERLACED;
4403 h->slice_table[ mb_xy ]= h->slice_num;
4405 if(IS_INTRA_PCM(mb_type)){
4408 // We assume these blocks are very rare so we do not optimize it.
4409 align_get_bits(&s->gb);
4411 // The pixels are stored in the same order as levels in h->mb array.
4412 for(x=0; x < (CHROMA ? 384 : 256); x++){
4413 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4416 // In deblocking, the quantizer is 0
4417 s->current_picture.qscale_table[mb_xy]= 0;
4418 // All coeffs are present
4419 memset(h->non_zero_count[mb_xy], 16, 16);
4421 s->current_picture.mb_type[mb_xy]= mb_type;
4426 h->ref_count[0] <<= 1;
4427 h->ref_count[1] <<= 1;
4430 fill_caches(h, mb_type, 0);
4433 if(IS_INTRA(mb_type)){
4435 // init_top_left_availability(h);
4436 if(IS_INTRA4x4(mb_type)){
4439 if(dct8x8_allowed && get_bits1(&s->gb)){
4440 mb_type |= MB_TYPE_8x8DCT;
4444 // fill_intra4x4_pred_table(h);
4445 for(i=0; i<16; i+=di){
4446 int mode= pred_intra_mode(h, i);
4448 if(!get_bits1(&s->gb)){
4449 const int rem_mode= get_bits(&s->gb, 3);
4450 mode = rem_mode + (rem_mode >= mode);
4454 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4456 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4458 write_back_intra_pred_mode(h);
4459 if( check_intra4x4_pred_mode(h) < 0)
4462 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4463 if(h->intra16x16_pred_mode < 0)
4467 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4470 h->chroma_pred_mode= pred_mode;
4472 }else if(partition_count==4){
4473 int i, j, sub_partition_count[4], list, ref[2][4];
4475 if(h->slice_type_nos == FF_B_TYPE){
4477 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4478 if(h->sub_mb_type[i] >=13){
4479 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4482 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4483 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4485 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4486 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4487 pred_direct_motion(h, &mb_type);
4488 h->ref_cache[0][scan8[4]] =
4489 h->ref_cache[1][scan8[4]] =
4490 h->ref_cache[0][scan8[12]] =
4491 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4494 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4496 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4497 if(h->sub_mb_type[i] >=4){
4498 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4501 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4502 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4506 for(list=0; list<h->list_count; list++){
4507 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4509 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4510 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4511 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4513 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4525 dct8x8_allowed = get_dct8x8_allowed(h);
4527 for(list=0; list<h->list_count; list++){
4529 if(IS_DIRECT(h->sub_mb_type[i])) {
4530 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4533 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4534 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4536 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4537 const int sub_mb_type= h->sub_mb_type[i];
4538 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4539 for(j=0; j<sub_partition_count[i]; j++){
4541 const int index= 4*i + block_width*j;
4542 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4543 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4544 mx += get_se_golomb(&s->gb);
4545 my += get_se_golomb(&s->gb);
4546 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4548 if(IS_SUB_8X8(sub_mb_type)){
4550 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4552 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4553 }else if(IS_SUB_8X4(sub_mb_type)){
4554 mv_cache[ 1 ][0]= mx;
4555 mv_cache[ 1 ][1]= my;
4556 }else if(IS_SUB_4X8(sub_mb_type)){
4557 mv_cache[ 8 ][0]= mx;
4558 mv_cache[ 8 ][1]= my;
4560 mv_cache[ 0 ][0]= mx;
4561 mv_cache[ 0 ][1]= my;
4564 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4570 }else if(IS_DIRECT(mb_type)){
4571 pred_direct_motion(h, &mb_type);
4572 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4574 int list, mx, my, i;
4575 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4576 if(IS_16X16(mb_type)){
4577 for(list=0; list<h->list_count; list++){
4579 if(IS_DIR(mb_type, 0, list)){
4580 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4581 if(val >= h->ref_count[list]){
4582 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4586 val= LIST_NOT_USED&0xFF;
4587 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4589 for(list=0; list<h->list_count; list++){
4591 if(IS_DIR(mb_type, 0, list)){
4592 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4593 mx += get_se_golomb(&s->gb);
4594 my += get_se_golomb(&s->gb);
4595 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4597 val= pack16to32(mx,my);
4600 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4603 else if(IS_16X8(mb_type)){
4604 for(list=0; list<h->list_count; list++){
4607 if(IS_DIR(mb_type, i, list)){
4608 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4609 if(val >= h->ref_count[list]){
4610 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4614 val= LIST_NOT_USED&0xFF;
4615 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4618 for(list=0; list<h->list_count; list++){
4621 if(IS_DIR(mb_type, i, list)){
4622 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4623 mx += get_se_golomb(&s->gb);
4624 my += get_se_golomb(&s->gb);
4625 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4627 val= pack16to32(mx,my);
4630 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4634 assert(IS_8X16(mb_type));
4635 for(list=0; list<h->list_count; list++){
4638 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4639 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4640 if(val >= h->ref_count[list]){
4641 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4645 val= LIST_NOT_USED&0xFF;
4646 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4649 for(list=0; list<h->list_count; list++){
4652 if(IS_DIR(mb_type, i, list)){
4653 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4654 mx += get_se_golomb(&s->gb);
4655 my += get_se_golomb(&s->gb);
4656 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4658 val= pack16to32(mx,my);
4661 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4667 if(IS_INTER(mb_type))
4668 write_back_motion(h, mb_type);
4670 if(!IS_INTRA16x16(mb_type)){
4671 cbp= get_ue_golomb(&s->gb);
4673 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4678 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4679 else cbp= golomb_to_inter_cbp [cbp];
4681 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4682 else cbp= golomb_to_inter_cbp_gray[cbp];
4687 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4688 if(get_bits1(&s->gb)){
4689 mb_type |= MB_TYPE_8x8DCT;
4690 h->cbp_table[mb_xy]= cbp;
4693 s->current_picture.mb_type[mb_xy]= mb_type;
4695 if(cbp || IS_INTRA16x16(mb_type)){
4696 int i8x8, i4x4, chroma_idx;
4698 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4699 const uint8_t *scan, *scan8x8, *dc_scan;
4701 // fill_non_zero_count_cache(h);
4703 if(IS_INTERLACED(mb_type)){
4704 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4705 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4706 dc_scan= luma_dc_field_scan;
4708 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4709 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4710 dc_scan= luma_dc_zigzag_scan;
4713 dquant= get_se_golomb(&s->gb);
4715 if( dquant > 25 || dquant < -26 ){
4716 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4720 s->qscale += dquant;
4721 if(((unsigned)s->qscale) > 51){
4722 if(s->qscale<0) s->qscale+= 52;
4723 else s->qscale-= 52;
4726 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4727 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4728 if(IS_INTRA16x16(mb_type)){
4729 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4730 return -1; //FIXME continue if partitioned and other return -1 too
4733 assert((cbp&15) == 0 || (cbp&15) == 15);
4736 for(i8x8=0; i8x8<4; i8x8++){
4737 for(i4x4=0; i4x4<4; i4x4++){
4738 const int index= i4x4 + 4*i8x8;
4739 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4745 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4748 for(i8x8=0; i8x8<4; i8x8++){
4749 if(cbp & (1<<i8x8)){
4750 if(IS_8x8DCT(mb_type)){
4751 DCTELEM *buf = &h->mb[64*i8x8];
4753 for(i4x4=0; i4x4<4; i4x4++){
4754 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4755 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4758 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4759 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4761 for(i4x4=0; i4x4<4; i4x4++){
4762 const int index= i4x4 + 4*i8x8;
4764 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4770 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4771 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4777 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4778 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4784 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4785 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4786 for(i4x4=0; i4x4<4; i4x4++){
4787 const int index= 16 + 4*chroma_idx + i4x4;
4788 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4794 uint8_t * const nnz= &h->non_zero_count_cache[0];
4795 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4796 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4799 uint8_t * const nnz= &h->non_zero_count_cache[0];
4800 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4801 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4802 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4804 s->current_picture.qscale_table[mb_xy]= s->qscale;
4805 write_back_non_zero_count(h);
4808 h->ref_count[0] >>= 1;
4809 h->ref_count[1] >>= 1;
4815 static int decode_cabac_field_decoding_flag(H264Context *h) {
4816 MpegEncContext * const s = &h->s;
4817 const int mb_x = s->mb_x;
4818 const int mb_y = s->mb_y & ~1;
4819 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4820 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4822 unsigned int ctx = 0;
4824 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4827 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4831 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4834 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4835 uint8_t *state= &h->cabac_state[ctx_base];
4839 MpegEncContext * const s = &h->s;
4840 const int mba_xy = h->left_mb_xy[0];
4841 const int mbb_xy = h->top_mb_xy;
4843 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4845 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4847 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4848 return 0; /* I4x4 */
4851 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4852 return 0; /* I4x4 */
4855 if( get_cabac_terminate( &h->cabac ) )
4856 return 25; /* PCM */
4858 mb_type = 1; /* I16x16 */
4859 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4860 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4861 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4862 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4863 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4867 static int decode_cabac_mb_type( H264Context *h ) {
4868 MpegEncContext * const s = &h->s;
4870 if( h->slice_type_nos == FF_I_TYPE ) {
4871 return decode_cabac_intra_mb_type(h, 3, 1);
4872 } else if( h->slice_type_nos == FF_P_TYPE ) {
4873 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4875 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4876 /* P_L0_D16x16, P_8x8 */
4877 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4879 /* P_L0_D8x16, P_L0_D16x8 */
4880 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4883 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4885 } else if( h->slice_type_nos == FF_B_TYPE ) {
4886 const int mba_xy = h->left_mb_xy[0];
4887 const int mbb_xy = h->top_mb_xy;
4891 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4893 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4896 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4897 return 0; /* B_Direct_16x16 */
4899 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4900 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4903 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4904 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4905 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4906 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4908 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4909 else if( bits == 13 ) {
4910 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4911 } else if( bits == 14 )
4912 return 11; /* B_L1_L0_8x16 */
4913 else if( bits == 15 )
4914 return 22; /* B_8x8 */
4916 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4917 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4919 /* TODO SI/SP frames? */
4924 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4925 MpegEncContext * const s = &h->s;
4929 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4930 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4933 && h->slice_table[mba_xy] == h->slice_num
4934 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4935 mba_xy += s->mb_stride;
4937 mbb_xy = mb_xy - s->mb_stride;
4939 && h->slice_table[mbb_xy] == h->slice_num
4940 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4941 mbb_xy -= s->mb_stride;
4943 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4945 int mb_xy = h->mb_xy;
4947 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4950 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4952 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4955 if( h->slice_type_nos == FF_B_TYPE )
4957 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4960 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4963 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4966 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4967 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4968 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4970 if( mode >= pred_mode )
4976 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4977 const int mba_xy = h->left_mb_xy[0];
4978 const int mbb_xy = h->top_mb_xy;
4982 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4983 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4986 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4989 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4992 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4994 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5000 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5001 int cbp_b, cbp_a, ctx, cbp = 0;
5003 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5004 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5006 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5007 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5008 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5009 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5010 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5011 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5012 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5013 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5016 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5020 cbp_a = (h->left_cbp>>4)&0x03;
5021 cbp_b = (h-> top_cbp>>4)&0x03;
5024 if( cbp_a > 0 ) ctx++;
5025 if( cbp_b > 0 ) ctx += 2;
5026 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5030 if( cbp_a == 2 ) ctx++;
5031 if( cbp_b == 2 ) ctx += 2;
5032 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5034 static int decode_cabac_mb_dqp( H264Context *h) {
5038 if( h->last_qscale_diff != 0 )
5041 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5047 if(val > 102) //prevent infinite loop
5054 return -(val + 1)/2;
5056 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5057 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5059 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5061 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5065 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5067 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5068 return 0; /* B_Direct_8x8 */
5069 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5070 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5072 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5073 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5074 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5077 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5078 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5082 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5083 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5086 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5087 int refa = h->ref_cache[list][scan8[n] - 1];
5088 int refb = h->ref_cache[list][scan8[n] - 8];
5092 if( h->slice_type_nos == FF_B_TYPE) {
5093 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5095 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5104 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5110 if(ref >= 32 /*h->ref_list[list]*/){
5111 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5112 return 0; //FIXME we should return -1 and check the return everywhere
5118 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5119 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5120 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5121 int ctxbase = (l == 0) ? 40 : 47;
5126 else if( amvd > 32 )
5131 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5136 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5144 while( get_cabac_bypass( &h->cabac ) ) {
5148 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5153 if( get_cabac_bypass( &h->cabac ) )
5157 return get_cabac_bypass_sign( &h->cabac, -mvd );
5160 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5166 nza = h->left_cbp&0x100;
5167 nzb = h-> top_cbp&0x100;
5169 nza = (h->left_cbp>>(6+idx))&0x01;
5170 nzb = (h-> top_cbp>>(6+idx))&0x01;
5174 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5175 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5177 assert(cat == 1 || cat == 2);
5178 nza = h->non_zero_count_cache[scan8[idx] - 1];
5179 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5189 return ctx + 4 * cat;
5192 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5193 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5194 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5195 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5196 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5199 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5200 static const int significant_coeff_flag_offset[2][6] = {
5201 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5202 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5204 static const int last_coeff_flag_offset[2][6] = {
5205 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5206 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5208 static const int coeff_abs_level_m1_offset[6] = {
5209 227+0, 227+10, 227+20, 227+30, 227+39, 426
5211 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5212 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5213 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5214 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5215 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5216 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5217 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5218 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5219 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5221 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5222 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5223 * map node ctx => cabac ctx for level=1 */
5224 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5225 /* map node ctx => cabac ctx for level>1 */
5226 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5227 static const uint8_t coeff_abs_level_transition[2][8] = {
5228 /* update node ctx after decoding a level=1 */
5229 { 1, 2, 3, 3, 4, 5, 6, 7 },
5230 /* update node ctx after decoding a level>1 */
5231 { 4, 4, 4, 4, 5, 6, 7, 7 }
5237 int coeff_count = 0;
5240 uint8_t *significant_coeff_ctx_base;
5241 uint8_t *last_coeff_ctx_base;
5242 uint8_t *abs_level_m1_ctx_base;
5245 #define CABAC_ON_STACK
5247 #ifdef CABAC_ON_STACK
5250 cc.range = h->cabac.range;
5251 cc.low = h->cabac.low;
5252 cc.bytestream= h->cabac.bytestream;
5254 #define CC &h->cabac
5258 /* cat: 0-> DC 16x16 n = 0
5259 * 1-> AC 16x16 n = luma4x4idx
5260 * 2-> Luma4x4 n = luma4x4idx
5261 * 3-> DC Chroma n = iCbCr
5262 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5263 * 5-> Luma8x8 n = 4 * luma8x8idx
5266 /* read coded block flag */
5267 if( is_dc || cat != 5 ) {
5268 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5271 h->non_zero_count_cache[scan8[16+n]] = 0;
5273 h->non_zero_count_cache[scan8[n]] = 0;
5276 #ifdef CABAC_ON_STACK
5277 h->cabac.range = cc.range ;
5278 h->cabac.low = cc.low ;
5279 h->cabac.bytestream= cc.bytestream;
5285 significant_coeff_ctx_base = h->cabac_state
5286 + significant_coeff_flag_offset[MB_FIELD][cat];
5287 last_coeff_ctx_base = h->cabac_state
5288 + last_coeff_flag_offset[MB_FIELD][cat];
5289 abs_level_m1_ctx_base = h->cabac_state
5290 + coeff_abs_level_m1_offset[cat];
5292 if( !is_dc && cat == 5 ) {
5293 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5294 for(last= 0; last < coefs; last++) { \
5295 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5296 if( get_cabac( CC, sig_ctx )) { \
5297 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5298 index[coeff_count++] = last; \
5299 if( get_cabac( CC, last_ctx ) ) { \
5305 if( last == max_coeff -1 ) {\
5306 index[coeff_count++] = last;\
5308 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5309 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5310 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5312 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5314 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5316 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5319 assert(coeff_count > 0);
5323 h->cbp_table[h->mb_xy] |= 0x100;
5325 h->cbp_table[h->mb_xy] |= 0x40 << n;
5328 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5330 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5332 assert( cat == 1 || cat == 2 );
5333 h->non_zero_count_cache[scan8[n]] = coeff_count;
5338 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5340 int j= scantable[index[--coeff_count]];
5342 if( get_cabac( CC, ctx ) == 0 ) {
5343 node_ctx = coeff_abs_level_transition[0][node_ctx];
5345 block[j] = get_cabac_bypass_sign( CC, -1);
5347 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5351 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5352 node_ctx = coeff_abs_level_transition[1][node_ctx];
5354 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5358 if( coeff_abs >= 15 ) {
5360 while( get_cabac_bypass( CC ) ) {
5366 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5372 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5374 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5377 } while( coeff_count );
5378 #ifdef CABAC_ON_STACK
5379 h->cabac.range = cc.range ;
5380 h->cabac.low = cc.low ;
5381 h->cabac.bytestream= cc.bytestream;
5386 #ifndef CONFIG_SMALL
5387 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5388 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5391 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5392 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5396 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5398 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5400 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5401 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5405 static inline void compute_mb_neighbors(H264Context *h)
5407 MpegEncContext * const s = &h->s;
5408 const int mb_xy = h->mb_xy;
5409 h->top_mb_xy = mb_xy - s->mb_stride;
5410 h->left_mb_xy[0] = mb_xy - 1;
5412 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5413 const int top_pair_xy = pair_xy - s->mb_stride;
5414 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5415 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5416 const int curr_mb_frame_flag = !MB_FIELD;
5417 const int bottom = (s->mb_y & 1);
5419 ? !curr_mb_frame_flag // bottom macroblock
5420 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5422 h->top_mb_xy -= s->mb_stride;
5424 if (left_mb_frame_flag != curr_mb_frame_flag) {
5425 h->left_mb_xy[0] = pair_xy - 1;
5427 } else if (FIELD_PICTURE) {
5428 h->top_mb_xy -= s->mb_stride;
5434 * decodes a macroblock
5435 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5437 static int decode_mb_cabac(H264Context *h) {
5438 MpegEncContext * const s = &h->s;
5440 int mb_type, partition_count, cbp = 0;
5441 int dct8x8_allowed= h->pps.transform_8x8_mode;
5443 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5445 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5447 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5448 if( h->slice_type_nos != FF_I_TYPE ) {
5450 /* a skipped mb needs the aff flag from the following mb */
5451 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5452 predict_field_decoding_flag(h);
5453 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5454 skip = h->next_mb_skipped;
5456 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5457 /* read skip flags */
5459 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5460 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5461 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5462 if(h->next_mb_skipped)
5463 predict_field_decoding_flag(h);
5465 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5470 h->cbp_table[mb_xy] = 0;
5471 h->chroma_pred_mode_table[mb_xy] = 0;
5472 h->last_qscale_diff = 0;
5479 if( (s->mb_y&1) == 0 )
5481 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5484 h->prev_mb_skipped = 0;
5486 compute_mb_neighbors(h);
5487 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5488 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5492 if( h->slice_type_nos == FF_B_TYPE ) {
5494 partition_count= b_mb_type_info[mb_type].partition_count;
5495 mb_type= b_mb_type_info[mb_type].type;
5498 goto decode_intra_mb;
5500 } else if( h->slice_type_nos == FF_P_TYPE ) {
5502 partition_count= p_mb_type_info[mb_type].partition_count;
5503 mb_type= p_mb_type_info[mb_type].type;
5506 goto decode_intra_mb;
5509 if(h->slice_type == FF_SI_TYPE && mb_type)
5511 assert(h->slice_type_nos == FF_I_TYPE);
5513 partition_count = 0;
5514 cbp= i_mb_type_info[mb_type].cbp;
5515 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5516 mb_type= i_mb_type_info[mb_type].type;
5519 mb_type |= MB_TYPE_INTERLACED;
5521 h->slice_table[ mb_xy ]= h->slice_num;
5523 if(IS_INTRA_PCM(mb_type)) {
5526 // We assume these blocks are very rare so we do not optimize it.
5527 // FIXME The two following lines get the bitstream position in the cabac
5528 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5529 ptr= h->cabac.bytestream;
5530 if(h->cabac.low&0x1) ptr--;
5532 if(h->cabac.low&0x1FF) ptr--;
5535 // The pixels are stored in the same order as levels in h->mb array.
5536 memcpy(h->mb, ptr, 256); ptr+=256;
5538 memcpy(h->mb+128, ptr, 128); ptr+=128;
5541 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5543 // All blocks are present
5544 h->cbp_table[mb_xy] = 0x1ef;
5545 h->chroma_pred_mode_table[mb_xy] = 0;
5546 // In deblocking, the quantizer is 0
5547 s->current_picture.qscale_table[mb_xy]= 0;
5548 // All coeffs are present
5549 memset(h->non_zero_count[mb_xy], 16, 16);
5550 s->current_picture.mb_type[mb_xy]= mb_type;
5551 h->last_qscale_diff = 0;
5556 h->ref_count[0] <<= 1;
5557 h->ref_count[1] <<= 1;
5560 fill_caches(h, mb_type, 0);
5562 if( IS_INTRA( mb_type ) ) {
5564 if( IS_INTRA4x4( mb_type ) ) {
5565 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5566 mb_type |= MB_TYPE_8x8DCT;
5567 for( i = 0; i < 16; i+=4 ) {
5568 int pred = pred_intra_mode( h, i );
5569 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5570 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5573 for( i = 0; i < 16; i++ ) {
5574 int pred = pred_intra_mode( h, i );
5575 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5577 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5580 write_back_intra_pred_mode(h);
5581 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5583 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5584 if( h->intra16x16_pred_mode < 0 ) return -1;
5587 h->chroma_pred_mode_table[mb_xy] =
5588 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5590 pred_mode= check_intra_pred_mode( h, pred_mode );
5591 if( pred_mode < 0 ) return -1;
5592 h->chroma_pred_mode= pred_mode;
5594 } else if( partition_count == 4 ) {
5595 int i, j, sub_partition_count[4], list, ref[2][4];
5597 if( h->slice_type_nos == FF_B_TYPE ) {
5598 for( i = 0; i < 4; i++ ) {
5599 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5600 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5601 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5603 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5604 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5605 pred_direct_motion(h, &mb_type);
5606 h->ref_cache[0][scan8[4]] =
5607 h->ref_cache[1][scan8[4]] =
5608 h->ref_cache[0][scan8[12]] =
5609 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5610 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5611 for( i = 0; i < 4; i++ )
5612 if( IS_DIRECT(h->sub_mb_type[i]) )
5613 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5617 for( i = 0; i < 4; i++ ) {
5618 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5619 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5620 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5624 for( list = 0; list < h->list_count; list++ ) {
5625 for( i = 0; i < 4; i++ ) {
5626 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5627 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5628 if( h->ref_count[list] > 1 )
5629 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5635 h->ref_cache[list][ scan8[4*i]+1 ]=
5636 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5641 dct8x8_allowed = get_dct8x8_allowed(h);
5643 for(list=0; list<h->list_count; list++){
5645 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5646 if(IS_DIRECT(h->sub_mb_type[i])){
5647 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5651 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5652 const int sub_mb_type= h->sub_mb_type[i];
5653 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5654 for(j=0; j<sub_partition_count[i]; j++){
5657 const int index= 4*i + block_width*j;
5658 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5659 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5660 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5662 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5663 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5664 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5666 if(IS_SUB_8X8(sub_mb_type)){
5668 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5670 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5673 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5675 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5676 }else if(IS_SUB_8X4(sub_mb_type)){
5677 mv_cache[ 1 ][0]= mx;
5678 mv_cache[ 1 ][1]= my;
5680 mvd_cache[ 1 ][0]= mx - mpx;
5681 mvd_cache[ 1 ][1]= my - mpy;
5682 }else if(IS_SUB_4X8(sub_mb_type)){
5683 mv_cache[ 8 ][0]= mx;
5684 mv_cache[ 8 ][1]= my;
5686 mvd_cache[ 8 ][0]= mx - mpx;
5687 mvd_cache[ 8 ][1]= my - mpy;
5689 mv_cache[ 0 ][0]= mx;
5690 mv_cache[ 0 ][1]= my;
5692 mvd_cache[ 0 ][0]= mx - mpx;
5693 mvd_cache[ 0 ][1]= my - mpy;
5696 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5697 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5698 p[0] = p[1] = p[8] = p[9] = 0;
5699 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5703 } else if( IS_DIRECT(mb_type) ) {
5704 pred_direct_motion(h, &mb_type);
5705 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5706 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5707 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5709 int list, mx, my, i, mpx, mpy;
5710 if(IS_16X16(mb_type)){
5711 for(list=0; list<h->list_count; list++){
5712 if(IS_DIR(mb_type, 0, list)){
5713 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5714 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5716 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5718 for(list=0; list<h->list_count; list++){
5719 if(IS_DIR(mb_type, 0, list)){
5720 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5722 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5723 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5724 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5726 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5727 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5729 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5732 else if(IS_16X8(mb_type)){
5733 for(list=0; list<h->list_count; list++){
5735 if(IS_DIR(mb_type, i, list)){
5736 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5737 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5739 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5742 for(list=0; list<h->list_count; list++){
5744 if(IS_DIR(mb_type, i, list)){
5745 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5746 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5747 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5748 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5750 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5751 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5753 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5754 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5759 assert(IS_8X16(mb_type));
5760 for(list=0; list<h->list_count; list++){
5762 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5763 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5764 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5766 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5769 for(list=0; list<h->list_count; list++){
5771 if(IS_DIR(mb_type, i, list)){
5772 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5773 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5774 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5776 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5777 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5778 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5780 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5781 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5788 if( IS_INTER( mb_type ) ) {
5789 h->chroma_pred_mode_table[mb_xy] = 0;
5790 write_back_motion( h, mb_type );
5793 if( !IS_INTRA16x16( mb_type ) ) {
5794 cbp = decode_cabac_mb_cbp_luma( h );
5796 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5799 h->cbp_table[mb_xy] = h->cbp = cbp;
5801 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5802 if( decode_cabac_mb_transform_size( h ) )
5803 mb_type |= MB_TYPE_8x8DCT;
5805 s->current_picture.mb_type[mb_xy]= mb_type;
5807 if( cbp || IS_INTRA16x16( mb_type ) ) {
5808 const uint8_t *scan, *scan8x8, *dc_scan;
5809 const uint32_t *qmul;
5812 if(IS_INTERLACED(mb_type)){
5813 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5814 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5815 dc_scan= luma_dc_field_scan;
5817 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5818 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5819 dc_scan= luma_dc_zigzag_scan;
5822 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5823 if( dqp == INT_MIN ){
5824 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5828 if(((unsigned)s->qscale) > 51){
5829 if(s->qscale<0) s->qscale+= 52;
5830 else s->qscale-= 52;
5832 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5833 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5835 if( IS_INTRA16x16( mb_type ) ) {
5837 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5838 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5841 qmul = h->dequant4_coeff[0][s->qscale];
5842 for( i = 0; i < 16; i++ ) {
5843 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5844 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5847 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5851 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5852 if( cbp & (1<<i8x8) ) {
5853 if( IS_8x8DCT(mb_type) ) {
5854 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5855 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5857 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5858 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5859 const int index = 4*i8x8 + i4x4;
5860 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5862 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5863 //STOP_TIMER("decode_residual")
5867 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5868 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5875 for( c = 0; c < 2; c++ ) {
5876 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5877 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5883 for( c = 0; c < 2; c++ ) {
5884 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5885 for( i = 0; i < 4; i++ ) {
5886 const int index = 16 + 4 * c + i;
5887 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5888 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5892 uint8_t * const nnz= &h->non_zero_count_cache[0];
5893 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5894 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5897 uint8_t * const nnz= &h->non_zero_count_cache[0];
5898 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5899 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5900 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5901 h->last_qscale_diff = 0;
5904 s->current_picture.qscale_table[mb_xy]= s->qscale;
5905 write_back_non_zero_count(h);
5908 h->ref_count[0] >>= 1;
5909 h->ref_count[1] >>= 1;
5916 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5918 const int index_a = qp + h->slice_alpha_c0_offset;
5919 const int alpha = (alpha_table+52)[index_a];
5920 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5925 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5926 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5928 /* 16px edge length, because bS=4 is triggered by being at
5929 * the edge of an intra MB, so all 4 bS are the same */
5930 for( d = 0; d < 16; d++ ) {
5931 const int p0 = pix[-1];
5932 const int p1 = pix[-2];
5933 const int p2 = pix[-3];
5935 const int q0 = pix[0];
5936 const int q1 = pix[1];
5937 const int q2 = pix[2];
5939 if( FFABS( p0 - q0 ) < alpha &&
5940 FFABS( p1 - p0 ) < beta &&
5941 FFABS( q1 - q0 ) < beta ) {
5943 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5944 if( FFABS( p2 - p0 ) < beta)
5946 const int p3 = pix[-4];
5948 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5949 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5950 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5953 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5955 if( FFABS( q2 - q0 ) < beta)
5957 const int q3 = pix[3];
5959 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5960 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5961 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5964 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5968 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5969 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5971 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5977 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5979 const int index_a = qp + h->slice_alpha_c0_offset;
5980 const int alpha = (alpha_table+52)[index_a];
5981 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5986 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5987 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5989 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5993 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5995 for( i = 0; i < 16; i++, pix += stride) {
6001 int bS_index = (i >> 1);
6004 bS_index |= (i & 1);
6007 if( bS[bS_index] == 0 ) {
6011 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6012 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6013 alpha = (alpha_table+52)[index_a];
6014 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6016 if( bS[bS_index] < 4 ) {
6017 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6018 const int p0 = pix[-1];
6019 const int p1 = pix[-2];
6020 const int p2 = pix[-3];
6021 const int q0 = pix[0];
6022 const int q1 = pix[1];
6023 const int q2 = pix[2];
6025 if( FFABS( p0 - q0 ) < alpha &&
6026 FFABS( p1 - p0 ) < beta &&
6027 FFABS( q1 - q0 ) < beta ) {
6031 if( FFABS( p2 - p0 ) < beta ) {
6032 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6035 if( FFABS( q2 - q0 ) < beta ) {
6036 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6040 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6041 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6042 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6043 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6046 const int p0 = pix[-1];
6047 const int p1 = pix[-2];
6048 const int p2 = pix[-3];
6050 const int q0 = pix[0];
6051 const int q1 = pix[1];
6052 const int q2 = pix[2];
6054 if( FFABS( p0 - q0 ) < alpha &&
6055 FFABS( p1 - p0 ) < beta &&
6056 FFABS( q1 - q0 ) < beta ) {
6058 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6059 if( FFABS( p2 - p0 ) < beta)
6061 const int p3 = pix[-4];
6063 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6064 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6065 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6068 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6070 if( FFABS( q2 - q0 ) < beta)
6072 const int q3 = pix[3];
6074 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6075 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6076 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6079 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6083 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6084 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6086 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6091 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6093 for( i = 0; i < 8; i++, pix += stride) {
6101 if( bS[bS_index] == 0 ) {
6105 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6106 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6107 alpha = (alpha_table+52)[index_a];
6108 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6110 if( bS[bS_index] < 4 ) {
6111 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6112 const int p0 = pix[-1];
6113 const int p1 = pix[-2];
6114 const int q0 = pix[0];
6115 const int q1 = pix[1];
6117 if( FFABS( p0 - q0 ) < alpha &&
6118 FFABS( p1 - p0 ) < beta &&
6119 FFABS( q1 - q0 ) < beta ) {
6120 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6122 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6123 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6124 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6127 const int p0 = pix[-1];
6128 const int p1 = pix[-2];
6129 const int q0 = pix[0];
6130 const int q1 = pix[1];
6132 if( FFABS( p0 - q0 ) < alpha &&
6133 FFABS( p1 - p0 ) < beta &&
6134 FFABS( q1 - q0 ) < beta ) {
6136 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6137 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6138 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6144 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6146 const int index_a = qp + h->slice_alpha_c0_offset;
6147 const int alpha = (alpha_table+52)[index_a];
6148 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6149 const int pix_next = stride;
6154 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6155 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6157 /* 16px edge length, see filter_mb_edgev */
6158 for( d = 0; d < 16; d++ ) {
6159 const int p0 = pix[-1*pix_next];
6160 const int p1 = pix[-2*pix_next];
6161 const int p2 = pix[-3*pix_next];
6162 const int q0 = pix[0];
6163 const int q1 = pix[1*pix_next];
6164 const int q2 = pix[2*pix_next];
6166 if( FFABS( p0 - q0 ) < alpha &&
6167 FFABS( p1 - p0 ) < beta &&
6168 FFABS( q1 - q0 ) < beta ) {
6170 const int p3 = pix[-4*pix_next];
6171 const int q3 = pix[ 3*pix_next];
6173 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6174 if( FFABS( p2 - p0 ) < beta) {
6176 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6177 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6178 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6181 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6183 if( FFABS( q2 - q0 ) < beta) {
6185 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6186 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6187 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6190 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6194 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6195 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6197 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6204 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6206 const int index_a = qp + h->slice_alpha_c0_offset;
6207 const int alpha = (alpha_table+52)[index_a];
6208 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6213 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6214 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6216 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6220 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6221 MpegEncContext * const s = &h->s;
6222 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6224 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6228 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6230 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6231 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6232 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6235 assert(!FRAME_MBAFF);
6237 mb_type = s->current_picture.mb_type[mb_xy];
6238 qp = s->current_picture.qscale_table[mb_xy];
6239 qp0 = s->current_picture.qscale_table[mb_xy-1];
6240 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6241 qpc = get_chroma_qp( h, 0, qp );
6242 qpc0 = get_chroma_qp( h, 0, qp0 );
6243 qpc1 = get_chroma_qp( h, 0, qp1 );
6244 qp0 = (qp + qp0 + 1) >> 1;
6245 qp1 = (qp + qp1 + 1) >> 1;
6246 qpc0 = (qpc + qpc0 + 1) >> 1;
6247 qpc1 = (qpc + qpc1 + 1) >> 1;
6248 qp_thresh = 15 - h->slice_alpha_c0_offset;
6249 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6250 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6253 if( IS_INTRA(mb_type) ) {
6254 int16_t bS4[4] = {4,4,4,4};
6255 int16_t bS3[4] = {3,3,3,3};
6256 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6257 if( IS_8x8DCT(mb_type) ) {
6258 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6259 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6260 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6261 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6263 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6264 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6265 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6266 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6267 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6268 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6269 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6270 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6272 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6273 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6274 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6275 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6276 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6277 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6278 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6279 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6282 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6283 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6285 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6287 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6289 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6290 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6291 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6292 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6294 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6295 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6296 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6297 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6299 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6300 bSv[0][0] = 0x0004000400040004ULL;
6301 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6302 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6304 #define FILTER(hv,dir,edge)\
6305 if(bSv[dir][edge]) {\
6306 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6308 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6309 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6315 } else if( IS_8x8DCT(mb_type) ) {
6334 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6335 MpegEncContext * const s = &h->s;
6336 const int mb_xy= mb_x + mb_y*s->mb_stride;
6337 const int mb_type = s->current_picture.mb_type[mb_xy];
6338 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6339 int first_vertical_edge_done = 0;
6342 //for sufficiently low qp, filtering wouldn't do anything
6343 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6345 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6346 int qp = s->current_picture.qscale_table[mb_xy];
6348 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6349 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6354 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6355 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6356 int top_type, left_type[2];
6357 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6358 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6359 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6361 if(IS_8x8DCT(top_type)){
6362 h->non_zero_count_cache[4+8*0]=
6363 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6364 h->non_zero_count_cache[6+8*0]=
6365 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6367 if(IS_8x8DCT(left_type[0])){
6368 h->non_zero_count_cache[3+8*1]=
6369 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6371 if(IS_8x8DCT(left_type[1])){
6372 h->non_zero_count_cache[3+8*3]=
6373 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6376 if(IS_8x8DCT(mb_type)){
6377 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6378 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6380 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6381 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6383 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6384 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6386 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6387 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6392 // left mb is in picture
6393 && h->slice_table[mb_xy-1] != 255
6394 // and current and left pair do not have the same interlaced type
6395 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6396 // and left mb is in the same slice if deblocking_filter == 2
6397 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6398 /* First vertical edge is different in MBAFF frames
6399 * There are 8 different bS to compute and 2 different Qp
6401 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6402 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6407 int mb_qp, mbn0_qp, mbn1_qp;
6409 first_vertical_edge_done = 1;
6411 if( IS_INTRA(mb_type) )
6412 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6414 for( i = 0; i < 8; i++ ) {
6415 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6417 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6419 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6420 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6421 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6428 mb_qp = s->current_picture.qscale_table[mb_xy];
6429 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6430 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6431 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6432 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6433 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6434 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6435 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6436 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6437 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6438 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6439 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6440 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6443 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6444 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6445 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6446 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6447 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6449 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6450 for( dir = 0; dir < 2; dir++ )
6453 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6454 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6455 int (*ref2frm) [48+2] = h->ref2frm[ h->slice_num &15 ];
6456 int (*ref2frmm)[48+2] = h->ref2frm[ h->slice_table[mbm_xy]&15 ];
6457 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6459 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6460 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6461 // how often to recheck mv-based bS when iterating between edges
6462 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6463 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6464 // how often to recheck mv-based bS when iterating along each edge
6465 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6467 if (first_vertical_edge_done) {
6469 first_vertical_edge_done = 0;
6472 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6475 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6476 && !IS_INTERLACED(mb_type)
6477 && IS_INTERLACED(mbm_type)
6479 // This is a special case in the norm where the filtering must
6480 // be done twice (one each of the field) even if we are in a
6481 // frame macroblock.
6483 static const int nnz_idx[4] = {4,5,6,3};
6484 unsigned int tmp_linesize = 2 * linesize;
6485 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6486 int mbn_xy = mb_xy - 2 * s->mb_stride;
6491 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6492 if( IS_INTRA(mb_type) ||
6493 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6494 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6496 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6497 for( i = 0; i < 4; i++ ) {
6498 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6499 mbn_nnz[nnz_idx[i]] != 0 )
6505 // Do not use s->qscale as luma quantizer because it has not the same
6506 // value in IPCM macroblocks.
6507 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6508 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6509 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6510 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6511 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6512 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6513 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6514 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6521 for( edge = start; edge < edges; edge++ ) {
6522 /* mbn_xy: neighbor macroblock */
6523 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6524 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6525 int (*ref2frmn)[48+2] = edge > 0 ? ref2frm : ref2frmm;
6529 if( (edge&1) && IS_8x8DCT(mb_type) )
6532 if( IS_INTRA(mb_type) ||
6533 IS_INTRA(mbn_type) ) {
6536 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6537 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6546 bS[0] = bS[1] = bS[2] = bS[3] = value;
6551 if( edge & mask_edge ) {
6552 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6555 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6556 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6559 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6560 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6561 int bn_idx= b_idx - (dir ? 8:1);
6564 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6565 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6566 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6567 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6570 if(h->slice_type_nos == FF_B_TYPE && v){
6572 for( l = 0; !v && l < 2; l++ ) {
6574 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6575 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6576 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6580 bS[0] = bS[1] = bS[2] = bS[3] = v;
6586 for( i = 0; i < 4; i++ ) {
6587 int x = dir == 0 ? edge : i;
6588 int y = dir == 0 ? i : edge;
6589 int b_idx= 8 + 4 + x + 8*y;
6590 int bn_idx= b_idx - (dir ? 8:1);
6592 if( h->non_zero_count_cache[b_idx] != 0 ||
6593 h->non_zero_count_cache[bn_idx] != 0 ) {
6599 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6600 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6601 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6602 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6608 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6610 for( l = 0; l < 2; l++ ) {
6612 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6613 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6614 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6623 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6628 // Do not use s->qscale as luma quantizer because it has not the same
6629 // value in IPCM macroblocks.
6630 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6631 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6632 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6633 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6635 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6636 if( (edge&1) == 0 ) {
6637 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6638 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6639 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6640 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6643 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6644 if( (edge&1) == 0 ) {
6645 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6646 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6647 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6648 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6655 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6656 MpegEncContext * const s = &h->s;
6657 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6661 if( h->pps.cabac ) {
6665 align_get_bits( &s->gb );
6668 ff_init_cabac_states( &h->cabac);
6669 ff_init_cabac_decoder( &h->cabac,
6670 s->gb.buffer + get_bits_count(&s->gb)/8,
6671 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6672 /* calculate pre-state */
6673 for( i= 0; i < 460; i++ ) {
6675 if( h->slice_type_nos == FF_I_TYPE )
6676 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6678 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6681 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6683 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6688 int ret = decode_mb_cabac(h);
6690 //STOP_TIMER("decode_mb_cabac")
6692 if(ret>=0) hl_decode_mb(h);
6694 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6697 if(ret>=0) ret = decode_mb_cabac(h);
6699 if(ret>=0) hl_decode_mb(h);
6702 eos = get_cabac_terminate( &h->cabac );
6704 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6705 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6706 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6710 if( ++s->mb_x >= s->mb_width ) {
6712 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6714 if(FIELD_OR_MBAFF_PICTURE) {
6719 if( eos || s->mb_y >= s->mb_height ) {
6720 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6721 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6728 int ret = decode_mb_cavlc(h);
6730 if(ret>=0) hl_decode_mb(h);
6732 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6734 ret = decode_mb_cavlc(h);
6736 if(ret>=0) hl_decode_mb(h);
6741 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6742 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6747 if(++s->mb_x >= s->mb_width){
6749 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6751 if(FIELD_OR_MBAFF_PICTURE) {
6754 if(s->mb_y >= s->mb_height){
6755 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6757 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6758 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6762 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6769 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6770 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6771 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6772 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6776 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6785 for(;s->mb_y < s->mb_height; s->mb_y++){
6786 for(;s->mb_x < s->mb_width; s->mb_x++){
6787 int ret= decode_mb(h);
6792 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6793 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6798 if(++s->mb_x >= s->mb_width){
6800 if(++s->mb_y >= s->mb_height){
6801 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6802 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6806 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6813 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6814 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6815 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6819 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6826 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6829 return -1; //not reached
6832 static int decode_unregistered_user_data(H264Context *h, int size){
6833 MpegEncContext * const s = &h->s;
6834 uint8_t user_data[16+256];
6840 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6841 user_data[i]= get_bits(&s->gb, 8);
6845 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6846 if(e==1 && build>=0)
6847 h->x264_build= build;
6849 if(s->avctx->debug & FF_DEBUG_BUGS)
6850 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6853 skip_bits(&s->gb, 8);
6858 static int decode_sei(H264Context *h){
6859 MpegEncContext * const s = &h->s;
6861 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6866 type+= show_bits(&s->gb, 8);
6867 }while(get_bits(&s->gb, 8) == 255);
6871 size+= show_bits(&s->gb, 8);
6872 }while(get_bits(&s->gb, 8) == 255);
6876 if(decode_unregistered_user_data(h, size) < 0)
6880 skip_bits(&s->gb, 8*size);
6883 //FIXME check bits here
6884 align_get_bits(&s->gb);
6890 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6891 MpegEncContext * const s = &h->s;
6893 cpb_count = get_ue_golomb(&s->gb) + 1;
6894 get_bits(&s->gb, 4); /* bit_rate_scale */
6895 get_bits(&s->gb, 4); /* cpb_size_scale */
6896 for(i=0; i<cpb_count; i++){
6897 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6898 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6899 get_bits1(&s->gb); /* cbr_flag */
6901 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6902 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6903 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6904 get_bits(&s->gb, 5); /* time_offset_length */
6907 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6908 MpegEncContext * const s = &h->s;
6909 int aspect_ratio_info_present_flag;
6910 unsigned int aspect_ratio_idc;
6911 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6913 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6915 if( aspect_ratio_info_present_flag ) {
6916 aspect_ratio_idc= get_bits(&s->gb, 8);
6917 if( aspect_ratio_idc == EXTENDED_SAR ) {
6918 sps->sar.num= get_bits(&s->gb, 16);
6919 sps->sar.den= get_bits(&s->gb, 16);
6920 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
6921 sps->sar= pixel_aspect[aspect_ratio_idc];
6923 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6930 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6932 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6933 get_bits1(&s->gb); /* overscan_appropriate_flag */
6936 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6937 get_bits(&s->gb, 3); /* video_format */
6938 get_bits1(&s->gb); /* video_full_range_flag */
6939 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6940 get_bits(&s->gb, 8); /* colour_primaries */
6941 get_bits(&s->gb, 8); /* transfer_characteristics */
6942 get_bits(&s->gb, 8); /* matrix_coefficients */
6946 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6947 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6948 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6951 sps->timing_info_present_flag = get_bits1(&s->gb);
6952 if(sps->timing_info_present_flag){
6953 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6954 sps->time_scale = get_bits_long(&s->gb, 32);
6955 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6958 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6959 if(nal_hrd_parameters_present_flag)
6960 decode_hrd_parameters(h, sps);
6961 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6962 if(vcl_hrd_parameters_present_flag)
6963 decode_hrd_parameters(h, sps);
6964 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6965 get_bits1(&s->gb); /* low_delay_hrd_flag */
6966 get_bits1(&s->gb); /* pic_struct_present_flag */
6968 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6969 if(sps->bitstream_restriction_flag){
6970 unsigned int num_reorder_frames;
6971 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6972 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6973 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6974 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6975 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6976 num_reorder_frames= get_ue_golomb(&s->gb);
6977 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6979 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6980 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
6984 sps->num_reorder_frames= num_reorder_frames;
6990 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6991 const uint8_t *jvt_list, const uint8_t *fallback_list){
6992 MpegEncContext * const s = &h->s;
6993 int i, last = 8, next = 8;
6994 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6995 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6996 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6998 for(i=0;i<size;i++){
7000 next = (last + get_se_golomb(&s->gb)) & 0xff;
7001 if(!i && !next){ /* matrix not written, we use the preset one */
7002 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7005 last = factors[scan[i]] = next ? next : last;
7009 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7010 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7011 MpegEncContext * const s = &h->s;
7012 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7013 const uint8_t *fallback[4] = {
7014 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7015 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7016 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7017 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7019 if(get_bits1(&s->gb)){
7020 sps->scaling_matrix_present |= is_sps;
7021 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7022 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7023 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7024 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7025 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7026 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7027 if(is_sps || pps->transform_8x8_mode){
7028 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7029 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7031 } else if(fallback_sps) {
7032 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7033 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7038 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7041 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7042 const size_t size, const char *name)
7045 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7050 vec[id] = av_mallocz(size);
7052 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7057 static inline int decode_seq_parameter_set(H264Context *h){
7058 MpegEncContext * const s = &h->s;
7059 int profile_idc, level_idc;
7060 unsigned int sps_id, tmp, mb_width, mb_height;
7064 profile_idc= get_bits(&s->gb, 8);
7065 get_bits1(&s->gb); //constraint_set0_flag
7066 get_bits1(&s->gb); //constraint_set1_flag
7067 get_bits1(&s->gb); //constraint_set2_flag
7068 get_bits1(&s->gb); //constraint_set3_flag
7069 get_bits(&s->gb, 4); // reserved
7070 level_idc= get_bits(&s->gb, 8);
7071 sps_id= get_ue_golomb(&s->gb);
7073 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7077 sps->profile_idc= profile_idc;
7078 sps->level_idc= level_idc;
7080 if(sps->profile_idc >= 100){ //high profile
7081 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7082 if(sps->chroma_format_idc == 3)
7083 get_bits1(&s->gb); //residual_color_transform_flag
7084 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7085 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7086 sps->transform_bypass = get_bits1(&s->gb);
7087 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7089 sps->scaling_matrix_present = 0;
7090 sps->chroma_format_idc= 1;
7093 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7094 sps->poc_type= get_ue_golomb(&s->gb);
7096 if(sps->poc_type == 0){ //FIXME #define
7097 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7098 } else if(sps->poc_type == 1){//FIXME #define
7099 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7100 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7101 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7102 tmp= get_ue_golomb(&s->gb);
7104 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7105 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7108 sps->poc_cycle_length= tmp;
7110 for(i=0; i<sps->poc_cycle_length; i++)
7111 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7112 }else if(sps->poc_type != 2){
7113 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7117 tmp= get_ue_golomb(&s->gb);
7118 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7119 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7122 sps->ref_frame_count= tmp;
7123 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7124 mb_width= get_ue_golomb(&s->gb) + 1;
7125 mb_height= get_ue_golomb(&s->gb) + 1;
7126 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7127 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7128 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7131 sps->mb_width = mb_width;
7132 sps->mb_height= mb_height;
7134 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7135 if(!sps->frame_mbs_only_flag)
7136 sps->mb_aff= get_bits1(&s->gb);
7140 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7142 #ifndef ALLOW_INTERLACE
7144 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7146 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7147 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7149 sps->crop= get_bits1(&s->gb);
7151 sps->crop_left = get_ue_golomb(&s->gb);
7152 sps->crop_right = get_ue_golomb(&s->gb);
7153 sps->crop_top = get_ue_golomb(&s->gb);
7154 sps->crop_bottom= get_ue_golomb(&s->gb);
7155 if(sps->crop_left || sps->crop_top){
7156 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7158 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7159 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7165 sps->crop_bottom= 0;
7168 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7169 if( sps->vui_parameters_present_flag )
7170 decode_vui_parameters(h, sps);
7172 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7173 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7174 sps_id, sps->profile_idc, sps->level_idc,
7176 sps->ref_frame_count,
7177 sps->mb_width, sps->mb_height,
7178 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7179 sps->direct_8x8_inference_flag ? "8B8" : "",
7180 sps->crop_left, sps->crop_right,
7181 sps->crop_top, sps->crop_bottom,
7182 sps->vui_parameters_present_flag ? "VUI" : "",
7183 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7190 build_qp_table(PPS *pps, int t, int index)
7193 for(i = 0; i < 52; i++)
7194 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7197 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7198 MpegEncContext * const s = &h->s;
7199 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7202 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7206 tmp= get_ue_golomb(&s->gb);
7207 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7208 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7213 pps->cabac= get_bits1(&s->gb);
7214 pps->pic_order_present= get_bits1(&s->gb);
7215 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7216 if(pps->slice_group_count > 1 ){
7217 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7218 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7219 switch(pps->mb_slice_group_map_type){
7222 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7223 | run_length[ i ] |1 |ue(v) |
7228 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7230 | top_left_mb[ i ] |1 |ue(v) |
7231 | bottom_right_mb[ i ] |1 |ue(v) |
7239 | slice_group_change_direction_flag |1 |u(1) |
7240 | slice_group_change_rate_minus1 |1 |ue(v) |
7245 | slice_group_id_cnt_minus1 |1 |ue(v) |
7246 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7248 | slice_group_id[ i ] |1 |u(v) |
7253 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7254 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7255 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7256 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7257 pps->ref_count[0]= pps->ref_count[1]= 1;
7261 pps->weighted_pred= get_bits1(&s->gb);
7262 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7263 pps->init_qp= get_se_golomb(&s->gb) + 26;
7264 pps->init_qs= get_se_golomb(&s->gb) + 26;
7265 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7266 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7267 pps->constrained_intra_pred= get_bits1(&s->gb);
7268 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7270 pps->transform_8x8_mode= 0;
7271 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7272 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7273 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7275 if(get_bits_count(&s->gb) < bit_length){
7276 pps->transform_8x8_mode= get_bits1(&s->gb);
7277 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7278 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7280 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7283 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7284 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7285 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7286 h->pps.chroma_qp_diff= 1;
7288 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7289 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7290 pps_id, pps->sps_id,
7291 pps->cabac ? "CABAC" : "CAVLC",
7292 pps->slice_group_count,
7293 pps->ref_count[0], pps->ref_count[1],
7294 pps->weighted_pred ? "weighted" : "",
7295 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7296 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7297 pps->constrained_intra_pred ? "CONSTR" : "",
7298 pps->redundant_pic_cnt_present ? "REDU" : "",
7299 pps->transform_8x8_mode ? "8x8DCT" : ""
7307 * Call decode_slice() for each context.
7309 * @param h h264 master context
7310 * @param context_count number of contexts to execute
7312 static void execute_decode_slices(H264Context *h, int context_count){
7313 MpegEncContext * const s = &h->s;
7314 AVCodecContext * const avctx= s->avctx;
7318 if(context_count == 1) {
7319 decode_slice(avctx, h);
7321 for(i = 1; i < context_count; i++) {
7322 hx = h->thread_context[i];
7323 hx->s.error_resilience = avctx->error_resilience;
7324 hx->s.error_count = 0;
7327 avctx->execute(avctx, (void *)decode_slice,
7328 (void **)h->thread_context, NULL, context_count);
7330 /* pull back stuff from slices to master context */
7331 hx = h->thread_context[context_count - 1];
7332 s->mb_x = hx->s.mb_x;
7333 s->mb_y = hx->s.mb_y;
7334 s->dropable = hx->s.dropable;
7335 s->picture_structure = hx->s.picture_structure;
7336 for(i = 1; i < context_count; i++)
7337 h->s.error_count += h->thread_context[i]->s.error_count;
7342 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7343 MpegEncContext * const s = &h->s;
7344 AVCodecContext * const avctx= s->avctx;
7346 H264Context *hx; ///< thread context
7347 int context_count = 0;
7349 h->max_contexts = avctx->thread_count;
7352 for(i=0; i<50; i++){
7353 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7356 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7357 h->current_slice = 0;
7358 if (!s->first_field)
7359 s->current_picture_ptr= NULL;
7371 if(buf_index >= buf_size) break;
7373 for(i = 0; i < h->nal_length_size; i++)
7374 nalsize = (nalsize << 8) | buf[buf_index++];
7375 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7380 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7385 // start code prefix search
7386 for(; buf_index + 3 < buf_size; buf_index++){
7387 // This should always succeed in the first iteration.
7388 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7392 if(buf_index+3 >= buf_size) break;
7397 hx = h->thread_context[context_count];
7399 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7400 if (ptr==NULL || dst_length < 0){
7403 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7405 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7407 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7408 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7411 if (h->is_avc && (nalsize != consumed)){
7412 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7416 buf_index += consumed;
7418 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7419 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7424 switch(hx->nal_unit_type){
7426 if (h->nal_unit_type != NAL_IDR_SLICE) {
7427 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7430 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7432 init_get_bits(&hx->s.gb, ptr, bit_length);
7434 hx->inter_gb_ptr= &hx->s.gb;
7435 hx->s.data_partitioning = 0;
7437 if((err = decode_slice_header(hx, h)))
7440 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7441 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7442 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7443 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7444 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7445 && avctx->skip_frame < AVDISCARD_ALL)
7449 init_get_bits(&hx->s.gb, ptr, bit_length);
7451 hx->inter_gb_ptr= NULL;
7452 hx->s.data_partitioning = 1;
7454 err = decode_slice_header(hx, h);
7457 init_get_bits(&hx->intra_gb, ptr, bit_length);
7458 hx->intra_gb_ptr= &hx->intra_gb;
7461 init_get_bits(&hx->inter_gb, ptr, bit_length);
7462 hx->inter_gb_ptr= &hx->inter_gb;
7464 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7465 && s->context_initialized
7467 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7468 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7469 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7470 && avctx->skip_frame < AVDISCARD_ALL)
7474 init_get_bits(&s->gb, ptr, bit_length);
7478 init_get_bits(&s->gb, ptr, bit_length);
7479 decode_seq_parameter_set(h);
7481 if(s->flags& CODEC_FLAG_LOW_DELAY)
7484 if(avctx->has_b_frames < 2)
7485 avctx->has_b_frames= !s->low_delay;
7488 init_get_bits(&s->gb, ptr, bit_length);
7490 decode_picture_parameter_set(h, bit_length);
7494 case NAL_END_SEQUENCE:
7495 case NAL_END_STREAM:
7496 case NAL_FILLER_DATA:
7498 case NAL_AUXILIARY_SLICE:
7501 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7504 if(context_count == h->max_contexts) {
7505 execute_decode_slices(h, context_count);
7510 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7512 /* Slice could not be decoded in parallel mode, copy down
7513 * NAL unit stuff to context 0 and restart. Note that
7514 * rbsp_buffer is not transferred, but since we no longer
7515 * run in parallel mode this should not be an issue. */
7516 h->nal_unit_type = hx->nal_unit_type;
7517 h->nal_ref_idc = hx->nal_ref_idc;
7523 execute_decode_slices(h, context_count);
7528 * returns the number of bytes consumed for building the current frame
7530 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7531 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7532 if(pos+10>buf_size) pos=buf_size; // oops ;)
7537 static int decode_frame(AVCodecContext *avctx,
7538 void *data, int *data_size,
7539 const uint8_t *buf, int buf_size)
7541 H264Context *h = avctx->priv_data;
7542 MpegEncContext *s = &h->s;
7543 AVFrame *pict = data;
7546 s->flags= avctx->flags;
7547 s->flags2= avctx->flags2;
7549 /* end of stream, output what is still in the buffers */
7550 if (buf_size == 0) {
7554 //FIXME factorize this with the output code below
7555 out = h->delayed_pic[0];
7557 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7558 if(h->delayed_pic[i]->poc < out->poc){
7559 out = h->delayed_pic[i];
7563 for(i=out_idx; h->delayed_pic[i]; i++)
7564 h->delayed_pic[i] = h->delayed_pic[i+1];
7567 *data_size = sizeof(AVFrame);
7568 *pict= *(AVFrame*)out;
7574 if(h->is_avc && !h->got_avcC) {
7575 int i, cnt, nalsize;
7576 unsigned char *p = avctx->extradata;
7577 if(avctx->extradata_size < 7) {
7578 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7582 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7585 /* sps and pps in the avcC always have length coded with 2 bytes,
7586 so put a fake nal_length_size = 2 while parsing them */
7587 h->nal_length_size = 2;
7588 // Decode sps from avcC
7589 cnt = *(p+5) & 0x1f; // Number of sps
7591 for (i = 0; i < cnt; i++) {
7592 nalsize = AV_RB16(p) + 2;
7593 if(decode_nal_units(h, p, nalsize) < 0) {
7594 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7599 // Decode pps from avcC
7600 cnt = *(p++); // Number of pps
7601 for (i = 0; i < cnt; i++) {
7602 nalsize = AV_RB16(p) + 2;
7603 if(decode_nal_units(h, p, nalsize) != nalsize) {
7604 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7609 // Now store right nal length size, that will be use to parse all other nals
7610 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7611 // Do not reparse avcC
7615 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7616 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7620 buf_index=decode_nal_units(h, buf, buf_size);
7624 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7625 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7626 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7630 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7631 Picture *out = s->current_picture_ptr;
7632 Picture *cur = s->current_picture_ptr;
7633 int i, pics, cross_idr, out_of_order, out_idx;
7637 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7638 s->current_picture_ptr->pict_type= s->pict_type;
7641 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7642 h->prev_poc_msb= h->poc_msb;
7643 h->prev_poc_lsb= h->poc_lsb;
7645 h->prev_frame_num_offset= h->frame_num_offset;
7646 h->prev_frame_num= h->frame_num;
7649 * FIXME: Error handling code does not seem to support interlaced
7650 * when slices span multiple rows
7651 * The ff_er_add_slice calls don't work right for bottom
7652 * fields; they cause massive erroneous error concealing
7653 * Error marking covers both fields (top and bottom).
7654 * This causes a mismatched s->error_count
7655 * and a bad error table. Further, the error count goes to
7656 * INT_MAX when called for bottom field, because mb_y is
7657 * past end by one (callers fault) and resync_mb_y != 0
7658 * causes problems for the first MB line, too.
7665 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7666 /* Wait for second field. */
7670 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7671 /* Derive top_field_first from field pocs. */
7672 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7674 //FIXME do something with unavailable reference frames
7676 /* Sort B-frames into display order */
7678 if(h->sps.bitstream_restriction_flag
7679 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7680 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7684 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7685 && !h->sps.bitstream_restriction_flag){
7686 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7691 while(h->delayed_pic[pics]) pics++;
7693 assert(pics <= MAX_DELAYED_PIC_COUNT);
7695 h->delayed_pic[pics++] = cur;
7696 if(cur->reference == 0)
7697 cur->reference = DELAYED_PIC_REF;
7699 out = h->delayed_pic[0];
7701 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7702 if(h->delayed_pic[i]->poc < out->poc){
7703 out = h->delayed_pic[i];
7706 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i];
7708 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7710 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7712 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7714 ((!cross_idr && out->poc > h->outputed_poc + 2)
7715 || cur->pict_type == FF_B_TYPE)))
7718 s->avctx->has_b_frames++;
7721 if(out_of_order || pics > s->avctx->has_b_frames){
7722 out->reference &= ~DELAYED_PIC_REF;
7723 for(i=out_idx; h->delayed_pic[i]; i++)
7724 h->delayed_pic[i] = h->delayed_pic[i+1];
7726 if(!out_of_order && pics > s->avctx->has_b_frames){
7727 *data_size = sizeof(AVFrame);
7729 h->outputed_poc = out->poc;
7730 *pict= *(AVFrame*)out;
7732 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7737 assert(pict->data[0] || !*data_size);
7738 ff_print_debug_info(s, pict);
7739 //printf("out %d\n", (int)pict->data[0]);
7742 /* Return the Picture timestamp as the frame number */
7743 /* we subtract 1 because it is added on utils.c */
7744 avctx->frame_number = s->picture_number - 1;
7746 return get_consumed_bytes(s, buf_index, buf_size);
7749 static inline void fill_mb_avail(H264Context *h){
7750 MpegEncContext * const s = &h->s;
7751 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7754 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7755 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7756 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7762 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7763 h->mb_avail[4]= 1; //FIXME move out
7764 h->mb_avail[5]= 0; //FIXME move out
7772 #define SIZE (COUNT*40)
7778 // int int_temp[10000];
7780 AVCodecContext avctx;
7782 dsputil_init(&dsp, &avctx);
7784 init_put_bits(&pb, temp, SIZE);
7785 printf("testing unsigned exp golomb\n");
7786 for(i=0; i<COUNT; i++){
7788 set_ue_golomb(&pb, i);
7789 STOP_TIMER("set_ue_golomb");
7791 flush_put_bits(&pb);
7793 init_get_bits(&gb, temp, 8*SIZE);
7794 for(i=0; i<COUNT; i++){
7797 s= show_bits(&gb, 24);
7800 j= get_ue_golomb(&gb);
7802 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7805 STOP_TIMER("get_ue_golomb");
7809 init_put_bits(&pb, temp, SIZE);
7810 printf("testing signed exp golomb\n");
7811 for(i=0; i<COUNT; i++){
7813 set_se_golomb(&pb, i - COUNT/2);
7814 STOP_TIMER("set_se_golomb");
7816 flush_put_bits(&pb);
7818 init_get_bits(&gb, temp, 8*SIZE);
7819 for(i=0; i<COUNT; i++){
7822 s= show_bits(&gb, 24);
7825 j= get_se_golomb(&gb);
7826 if(j != i - COUNT/2){
7827 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7830 STOP_TIMER("get_se_golomb");
7834 printf("testing 4x4 (I)DCT\n");
7837 uint8_t src[16], ref[16];
7838 uint64_t error= 0, max_error=0;
7840 for(i=0; i<COUNT; i++){
7842 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7843 for(j=0; j<16; j++){
7844 ref[j]= random()%255;
7845 src[j]= random()%255;
7848 h264_diff_dct_c(block, src, ref, 4);
7851 for(j=0; j<16; j++){
7852 // printf("%d ", block[j]);
7853 block[j]= block[j]*4;
7854 if(j&1) block[j]= (block[j]*4 + 2)/5;
7855 if(j&4) block[j]= (block[j]*4 + 2)/5;
7859 s->dsp.h264_idct_add(ref, block, 4);
7860 /* for(j=0; j<16; j++){
7861 printf("%d ", ref[j]);
7865 for(j=0; j<16; j++){
7866 int diff= FFABS(src[j] - ref[j]);
7869 max_error= FFMAX(max_error, diff);
7872 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7873 printf("testing quantizer\n");
7874 for(qp=0; qp<52; qp++){
7876 src1_block[i]= src2_block[i]= random()%255;
7879 printf("Testing NAL layer\n");
7881 uint8_t bitstream[COUNT];
7882 uint8_t nal[COUNT*2];
7884 memset(&h, 0, sizeof(H264Context));
7886 for(i=0; i<COUNT; i++){
7894 for(j=0; j<COUNT; j++){
7895 bitstream[j]= (random() % 255) + 1;
7898 for(j=0; j<zeros; j++){
7899 int pos= random() % COUNT;
7900 while(bitstream[pos] == 0){
7909 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7911 printf("encoding failed\n");
7915 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7919 if(out_length != COUNT){
7920 printf("incorrect length %d %d\n", out_length, COUNT);
7924 if(consumed != nal_length){
7925 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7929 if(memcmp(bitstream, out, COUNT)){
7930 printf("mismatch\n");
7936 printf("Testing RBSP\n");
7944 static av_cold int decode_end(AVCodecContext *avctx)
7946 H264Context *h = avctx->priv_data;
7947 MpegEncContext *s = &h->s;
7949 av_freep(&h->rbsp_buffer[0]);
7950 av_freep(&h->rbsp_buffer[1]);
7951 free_tables(h); //FIXME cleanup init stuff perhaps
7954 // memset(h, 0, sizeof(H264Context));
7960 AVCodec h264_decoder = {
7964 sizeof(H264Context),
7969 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7971 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),