2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * H.264 / AVC / MPEG4 part10 codec.
26 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
41 #define interlaced_dct interlaced_dct_is_a_bad_name
42 #define mb_intra mb_intra_isnt_initalized_see_mb_type
44 #define LUMA_DC_BLOCK_INDEX 25
45 #define CHROMA_DC_BLOCK_INDEX 26
47 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
48 #define COEFF_TOKEN_VLC_BITS 8
49 #define TOTAL_ZEROS_VLC_BITS 9
50 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
51 #define RUN_VLC_BITS 3
52 #define RUN7_VLC_BITS 6
54 #define MAX_SPS_COUNT 32
55 #define MAX_PPS_COUNT 256
57 #define MAX_MMCO_COUNT 66
59 /* Compiling in interlaced support reduces the speed
60 * of progressive decoding by about 2%. */
61 #define ALLOW_INTERLACE
63 #ifdef ALLOW_INTERLACE
64 #define MB_MBAFF h->mb_mbaff
65 #define MB_FIELD h->mb_field_decoding_flag
66 #define FRAME_MBAFF h->mb_aff_frame
72 #define IS_INTERLACED(mb_type) 0
76 * Sequence parameter set
82 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
83 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
84 int poc_type; ///< pic_order_cnt_type
85 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
86 int delta_pic_order_always_zero_flag;
87 int offset_for_non_ref_pic;
88 int offset_for_top_to_bottom_field;
89 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
90 int ref_frame_count; ///< num_ref_frames
91 int gaps_in_frame_num_allowed_flag;
92 int mb_width; ///< frame_width_in_mbs_minus1 + 1
93 int mb_height; ///< frame_height_in_mbs_minus1 + 1
94 int frame_mbs_only_flag;
95 int mb_aff; ///<mb_adaptive_frame_field_flag
96 int direct_8x8_inference_flag;
97 int crop; ///< frame_cropping_flag
98 int crop_left; ///< frame_cropping_rect_left_offset
99 int crop_right; ///< frame_cropping_rect_right_offset
100 int crop_top; ///< frame_cropping_rect_top_offset
101 int crop_bottom; ///< frame_cropping_rect_bottom_offset
102 int vui_parameters_present_flag;
104 int timing_info_present_flag;
105 uint32_t num_units_in_tick;
107 int fixed_frame_rate_flag;
108 short offset_for_ref_frame[256]; //FIXME dyn aloc?
109 int bitstream_restriction_flag;
110 int num_reorder_frames;
111 int scaling_matrix_present;
112 uint8_t scaling_matrix4[6][16];
113 uint8_t scaling_matrix8[2][64];
117 * Picture parameter set
121 int cabac; ///< entropy_coding_mode_flag
122 int pic_order_present; ///< pic_order_present_flag
123 int slice_group_count; ///< num_slice_groups_minus1 + 1
124 int mb_slice_group_map_type;
125 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
126 int weighted_pred; ///< weighted_pred_flag
127 int weighted_bipred_idc;
128 int init_qp; ///< pic_init_qp_minus26 + 26
129 int init_qs; ///< pic_init_qs_minus26 + 26
130 int chroma_qp_index_offset;
131 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
132 int constrained_intra_pred; ///< constrained_intra_pred_flag
133 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
134 int transform_8x8_mode; ///< transform_8x8_mode_flag
135 uint8_t scaling_matrix4[6][16];
136 uint8_t scaling_matrix8[2][64];
140 * Memory management control operation opcode.
142 typedef enum MMCOOpcode{
153 * Memory management control operation.
164 typedef struct H264Context{
168 uint8_t *rbsp_buffer;
169 unsigned int rbsp_buffer_size;
172 * Used to parse AVC variant of h264
174 int is_avc; ///< this flag is != 0 if codec is avc1
175 int got_avcC; ///< flag used to parse avcC data only once
176 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
184 int chroma_pred_mode;
185 int intra16x16_pred_mode;
190 int8_t intra4x4_pred_mode_cache[5*8];
191 int8_t (*intra4x4_pred_mode)[8];
192 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
193 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
194 void (*pred8x8 [4+3])(uint8_t *src, int stride);
195 void (*pred16x16[4+3])(uint8_t *src, int stride);
196 unsigned int topleft_samples_available;
197 unsigned int top_samples_available;
198 unsigned int topright_samples_available;
199 unsigned int left_samples_available;
200 uint8_t (*top_borders[2])[16+2*8];
201 uint8_t left_border[2*(17+2*9)];
204 * non zero coeff count cache.
205 * is 64 if not available.
207 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
208 uint8_t (*non_zero_count)[16];
211 * Motion vector cache.
213 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
214 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
215 #define LIST_NOT_USED -1 //FIXME rename?
216 #define PART_NOT_AVAILABLE -2
219 * is 1 if the specific list MV&references are set to 0,0,-2.
221 int mv_cache_clean[2];
224 * number of neighbors (top and/or left) that used 8x8 dct
226 int neighbor_transform_size;
229 * block_offset[ 0..23] for frame macroblocks
230 * block_offset[24..47] for field macroblocks
232 int block_offset[2*(16+8)];
234 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
236 int b_stride; //FIXME use s->b4_stride
239 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff
248 int unknown_svq3_flag;
249 int next_slice_index;
251 SPS sps_buffer[MAX_SPS_COUNT];
252 SPS sps; ///< current sps
254 PPS pps_buffer[MAX_PPS_COUNT];
258 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
260 uint32_t dequant4_buffer[6][52][16];
261 uint32_t dequant8_buffer[2][52][64];
262 uint32_t (*dequant4_coeff[6])[16];
263 uint32_t (*dequant8_coeff[2])[64];
264 int dequant_coeff_pps; ///< reinit tables when pps changes
267 uint8_t *slice_table_base;
268 uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
270 int slice_type_fixed;
272 //interlacing specific flags
274 int mb_field_decoding_flag;
275 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
282 int delta_poc_bottom;
285 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
286 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
287 int frame_num_offset; ///< for POC type 2
288 int prev_frame_num_offset; ///< for POC type 2
289 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
292 * frame_num for frames or 2*frame_num for field pics.
297 * max_frame_num or 2*max_frame_num for field pics.
301 //Weighted pred stuff
303 int use_weight_chroma;
304 int luma_log2_weight_denom;
305 int chroma_log2_weight_denom;
306 int luma_weight[2][48];
307 int luma_offset[2][48];
308 int chroma_weight[2][48][2];
309 int chroma_offset[2][48][2];
310 int implicit_weight[48][48];
313 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
314 int slice_alpha_c0_offset;
315 int slice_beta_offset;
317 int redundant_pic_count;
319 int direct_spatial_mv_pred;
320 int dist_scale_factor[16];
321 int dist_scale_factor_field[32];
322 int map_col_to_list0[2][16];
323 int map_col_to_list0_field[2][32];
326 * num_ref_idx_l0/1_active_minus1 + 1
328 int ref_count[2]; ///< counts frames or fields, depending on current mb mode
329 Picture *short_ref[32];
330 Picture *long_ref[32];
331 Picture default_ref_list[2][32];
332 Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs
333 Picture *delayed_pic[16]; //FIXME size?
334 Picture *delayed_output_pic;
337 * memory management control operations buffer.
339 MMCO mmco[MAX_MMCO_COUNT];
342 int long_ref_count; ///< number of actual long term references
343 int short_ref_count; ///< number of actual short term references
346 GetBitContext intra_gb;
347 GetBitContext inter_gb;
348 GetBitContext *intra_gb_ptr;
349 GetBitContext *inter_gb_ptr;
351 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
357 uint8_t cabac_state[460];
360 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
365 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
366 uint8_t *chroma_pred_mode_table;
367 int last_qscale_diff;
368 int16_t (*mvd_table[2])[2];
369 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
370 uint8_t *direct_table;
371 uint8_t direct_cache[5*8];
373 uint8_t zigzag_scan[16];
374 uint8_t zigzag_scan8x8[64];
375 uint8_t zigzag_scan8x8_cavlc[64];
376 uint8_t field_scan[16];
377 uint8_t field_scan8x8[64];
378 uint8_t field_scan8x8_cavlc[64];
379 const uint8_t *zigzag_scan_q0;
380 const uint8_t *zigzag_scan8x8_q0;
381 const uint8_t *zigzag_scan8x8_cavlc_q0;
382 const uint8_t *field_scan_q0;
383 const uint8_t *field_scan8x8_q0;
384 const uint8_t *field_scan8x8_cavlc_q0;
389 static VLC coeff_token_vlc[4];
390 static VLC chroma_dc_coeff_token_vlc;
392 static VLC total_zeros_vlc[15];
393 static VLC chroma_dc_total_zeros_vlc[3];
395 static VLC run_vlc[6];
398 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
399 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
400 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
401 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
403 static always_inline uint32_t pack16to32(int a, int b){
404 #ifdef WORDS_BIGENDIAN
405 return (b&0xFFFF) + (a<<16);
407 return (a&0xFFFF) + (b<<16);
413 * @param h height of the rectangle, should be a constant
414 * @param w width of the rectangle, should be a constant
415 * @param size the size of val (1 or 4), should be a constant
417 static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
418 uint8_t *p= (uint8_t*)vp;
419 assert(size==1 || size==4);
425 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
426 assert((stride&(w-1))==0);
428 const uint16_t v= size==4 ? val : val*0x0101;
429 *(uint16_t*)(p + 0*stride)= v;
431 *(uint16_t*)(p + 1*stride)= v;
433 *(uint16_t*)(p + 2*stride)=
434 *(uint16_t*)(p + 3*stride)= v;
436 const uint32_t v= size==4 ? val : val*0x01010101;
437 *(uint32_t*)(p + 0*stride)= v;
439 *(uint32_t*)(p + 1*stride)= v;
441 *(uint32_t*)(p + 2*stride)=
442 *(uint32_t*)(p + 3*stride)= v;
444 //gcc can't optimize 64bit math on x86_32
445 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
446 const uint64_t v= val*0x0100000001ULL;
447 *(uint64_t*)(p + 0*stride)= v;
449 *(uint64_t*)(p + 1*stride)= v;
451 *(uint64_t*)(p + 2*stride)=
452 *(uint64_t*)(p + 3*stride)= v;
454 const uint64_t v= val*0x0100000001ULL;
455 *(uint64_t*)(p + 0+0*stride)=
456 *(uint64_t*)(p + 8+0*stride)=
457 *(uint64_t*)(p + 0+1*stride)=
458 *(uint64_t*)(p + 8+1*stride)= v;
460 *(uint64_t*)(p + 0+2*stride)=
461 *(uint64_t*)(p + 8+2*stride)=
462 *(uint64_t*)(p + 0+3*stride)=
463 *(uint64_t*)(p + 8+3*stride)= v;
465 *(uint32_t*)(p + 0+0*stride)=
466 *(uint32_t*)(p + 4+0*stride)= val;
468 *(uint32_t*)(p + 0+1*stride)=
469 *(uint32_t*)(p + 4+1*stride)= val;
471 *(uint32_t*)(p + 0+2*stride)=
472 *(uint32_t*)(p + 4+2*stride)=
473 *(uint32_t*)(p + 0+3*stride)=
474 *(uint32_t*)(p + 4+3*stride)= val;
476 *(uint32_t*)(p + 0+0*stride)=
477 *(uint32_t*)(p + 4+0*stride)=
478 *(uint32_t*)(p + 8+0*stride)=
479 *(uint32_t*)(p +12+0*stride)=
480 *(uint32_t*)(p + 0+1*stride)=
481 *(uint32_t*)(p + 4+1*stride)=
482 *(uint32_t*)(p + 8+1*stride)=
483 *(uint32_t*)(p +12+1*stride)= val;
485 *(uint32_t*)(p + 0+2*stride)=
486 *(uint32_t*)(p + 4+2*stride)=
487 *(uint32_t*)(p + 8+2*stride)=
488 *(uint32_t*)(p +12+2*stride)=
489 *(uint32_t*)(p + 0+3*stride)=
490 *(uint32_t*)(p + 4+3*stride)=
491 *(uint32_t*)(p + 8+3*stride)=
492 *(uint32_t*)(p +12+3*stride)= val;
499 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
500 MpegEncContext * const s = &h->s;
501 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
502 int topleft_xy, top_xy, topright_xy, left_xy[2];
503 int topleft_type, top_type, topright_type, left_type[2];
507 //FIXME deblocking could skip the intra and nnz parts.
508 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
511 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
513 top_xy = mb_xy - s->mb_stride;
514 topleft_xy = top_xy - 1;
515 topright_xy= top_xy + 1;
516 left_xy[1] = left_xy[0] = mb_xy-1;
526 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
527 const int top_pair_xy = pair_xy - s->mb_stride;
528 const int topleft_pair_xy = top_pair_xy - 1;
529 const int topright_pair_xy = top_pair_xy + 1;
530 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
531 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
532 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
533 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
534 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
535 const int bottom = (s->mb_y & 1);
536 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
538 ? !curr_mb_frame_flag // bottom macroblock
539 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
541 top_xy -= s->mb_stride;
544 ? !curr_mb_frame_flag // bottom macroblock
545 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
547 topleft_xy -= s->mb_stride;
550 ? !curr_mb_frame_flag // bottom macroblock
551 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
553 topright_xy -= s->mb_stride;
555 if (left_mb_frame_flag != curr_mb_frame_flag) {
556 left_xy[1] = left_xy[0] = pair_xy - 1;
557 if (curr_mb_frame_flag) {
578 left_xy[1] += s->mb_stride;
591 h->top_mb_xy = top_xy;
592 h->left_mb_xy[0] = left_xy[0];
593 h->left_mb_xy[1] = left_xy[1];
597 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
598 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
599 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
601 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
603 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
605 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
606 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
607 if(USES_LIST(mb_type,list)){
608 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
609 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
610 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
611 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
617 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
618 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
620 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
621 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
623 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
624 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
629 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
630 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
631 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
632 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
633 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
636 if(IS_INTRA(mb_type)){
637 h->topleft_samples_available=
638 h->top_samples_available=
639 h->left_samples_available= 0xFFFF;
640 h->topright_samples_available= 0xEEEA;
642 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
643 h->topleft_samples_available= 0xB3FF;
644 h->top_samples_available= 0x33FF;
645 h->topright_samples_available= 0x26EA;
648 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
649 h->topleft_samples_available&= 0xDF5F;
650 h->left_samples_available&= 0x5F5F;
654 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
655 h->topleft_samples_available&= 0x7FFF;
657 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
658 h->topright_samples_available&= 0xFBFF;
660 if(IS_INTRA4x4(mb_type)){
661 if(IS_INTRA4x4(top_type)){
662 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
663 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
664 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
665 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
668 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
673 h->intra4x4_pred_mode_cache[4+8*0]=
674 h->intra4x4_pred_mode_cache[5+8*0]=
675 h->intra4x4_pred_mode_cache[6+8*0]=
676 h->intra4x4_pred_mode_cache[7+8*0]= pred;
679 if(IS_INTRA4x4(left_type[i])){
680 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
681 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
684 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
689 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
690 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
705 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
707 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
708 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
709 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
710 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
712 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
713 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
715 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
716 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
719 h->non_zero_count_cache[4+8*0]=
720 h->non_zero_count_cache[5+8*0]=
721 h->non_zero_count_cache[6+8*0]=
722 h->non_zero_count_cache[7+8*0]=
724 h->non_zero_count_cache[1+8*0]=
725 h->non_zero_count_cache[2+8*0]=
727 h->non_zero_count_cache[1+8*3]=
728 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
732 for (i=0; i<2; i++) {
734 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
735 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
736 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
737 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
739 h->non_zero_count_cache[3+8*1 + 2*8*i]=
740 h->non_zero_count_cache[3+8*2 + 2*8*i]=
741 h->non_zero_count_cache[0+8*1 + 8*i]=
742 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
749 h->top_cbp = h->cbp_table[top_xy];
750 } else if(IS_INTRA(mb_type)) {
757 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
758 } else if(IS_INTRA(mb_type)) {
764 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
767 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
772 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
774 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
775 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
776 /*if(!h->mv_cache_clean[list]){
777 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
778 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
779 h->mv_cache_clean[list]= 1;
783 h->mv_cache_clean[list]= 0;
785 if(USES_LIST(top_type, list)){
786 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
787 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
788 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
789 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
790 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
791 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
792 h->ref_cache[list][scan8[0] + 0 - 1*8]=
793 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
794 h->ref_cache[list][scan8[0] + 2 - 1*8]=
795 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
797 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
798 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
799 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
800 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
801 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
804 //FIXME unify cleanup or sth
805 if(USES_LIST(left_type[0], list)){
806 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
807 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
808 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
809 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
810 h->ref_cache[list][scan8[0] - 1 + 0*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
811 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1]>>1)];
813 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
814 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
815 h->ref_cache[list][scan8[0] - 1 + 0*8]=
816 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
819 if(USES_LIST(left_type[1], list)){
820 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
821 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
822 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
823 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
824 h->ref_cache[list][scan8[0] - 1 + 2*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
825 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[3]>>1)];
827 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
828 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
829 h->ref_cache[list][scan8[0] - 1 + 2*8]=
830 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
831 assert((!left_type[0]) == (!left_type[1]));
834 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
837 if(USES_LIST(topleft_type, list)){
838 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
839 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
840 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
841 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
843 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
844 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
847 if(USES_LIST(topright_type, list)){
848 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
849 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
850 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
851 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
853 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
854 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
857 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
860 h->ref_cache[list][scan8[5 ]+1] =
861 h->ref_cache[list][scan8[7 ]+1] =
862 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
863 h->ref_cache[list][scan8[4 ]] =
864 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
865 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
866 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
867 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
868 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
869 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
872 /* XXX beurk, Load mvd */
873 if(USES_LIST(top_type, list)){
874 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
875 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
876 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
877 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
878 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
880 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
881 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
882 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
883 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
885 if(USES_LIST(left_type[0], list)){
886 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
887 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
888 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
890 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
891 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
893 if(USES_LIST(left_type[1], list)){
894 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
895 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
896 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
898 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
899 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
901 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
902 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
903 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
904 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
905 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
907 if(h->slice_type == B_TYPE){
908 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
910 if(IS_DIRECT(top_type)){
911 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
912 }else if(IS_8X8(top_type)){
913 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
914 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
915 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
917 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
920 if(IS_DIRECT(left_type[0]))
921 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
922 else if(IS_8X8(left_type[0]))
923 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
925 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
927 if(IS_DIRECT(left_type[1]))
928 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
929 else if(IS_8X8(left_type[1]))
930 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
932 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
938 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
939 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
940 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
941 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
942 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
943 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
944 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
945 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
946 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
947 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
949 #define MAP_F2F(idx, mb_type)\
950 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
951 h->ref_cache[list][idx] <<= 1;\
952 h->mv_cache[list][idx][1] /= 2;\
953 h->mvd_cache[list][idx][1] /= 2;\
958 #define MAP_F2F(idx, mb_type)\
959 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
960 h->ref_cache[list][idx] >>= 1;\
961 h->mv_cache[list][idx][1] <<= 1;\
962 h->mvd_cache[list][idx][1] <<= 1;\
972 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
975 static inline void write_back_intra_pred_mode(H264Context *h){
976 MpegEncContext * const s = &h->s;
977 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
979 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
980 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
981 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
982 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
983 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
984 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
985 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
989 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
991 static inline int check_intra4x4_pred_mode(H264Context *h){
992 MpegEncContext * const s = &h->s;
993 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
994 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
997 if(!(h->top_samples_available&0x8000)){
999 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
1001 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1004 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
1009 if(!(h->left_samples_available&0x8000)){
1011 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
1013 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1016 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
1022 } //FIXME cleanup like next
1025 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1027 static inline int check_intra_pred_mode(H264Context *h, int mode){
1028 MpegEncContext * const s = &h->s;
1029 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
1030 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
1032 if(mode < 0 || mode > 6) {
1033 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
1037 if(!(h->top_samples_available&0x8000)){
1040 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1045 if(!(h->left_samples_available&0x8000)){
1048 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1057 * gets the predicted intra4x4 prediction mode.
1059 static inline int pred_intra_mode(H264Context *h, int n){
1060 const int index8= scan8[n];
1061 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
1062 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
1063 const int min= FFMIN(left, top);
1065 tprintf("mode:%d %d min:%d\n", left ,top, min);
1067 if(min<0) return DC_PRED;
1071 static inline void write_back_non_zero_count(H264Context *h){
1072 MpegEncContext * const s = &h->s;
1073 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1075 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
1076 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
1077 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
1078 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
1079 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
1080 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
1081 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
1083 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
1084 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
1085 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
1087 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1088 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1089 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1092 // store all luma nnzs, for deblocking
1095 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
1096 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
1101 * gets the predicted number of non zero coefficients.
1102 * @param n block index
1104 static inline int pred_non_zero_count(H264Context *h, int n){
1105 const int index8= scan8[n];
1106 const int left= h->non_zero_count_cache[index8 - 1];
1107 const int top = h->non_zero_count_cache[index8 - 8];
1110 if(i<64) i= (i+1)>>1;
1112 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1117 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1118 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1120 /* there is no consistent mapping of mvs to neighboring locations that will
1121 * make mbaff happy, so we can't move all this logic to fill_caches */
1123 MpegEncContext *s = &h->s;
1124 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
1126 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
1127 *C = h->mv_cache[list][scan8[0]-2];
1130 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
1131 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
1132 if(IS_INTERLACED(mb_types[topright_xy])){
1133 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
1134 const int x4 = X4, y4 = Y4;\
1135 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
1136 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
1137 return LIST_NOT_USED;\
1138 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
1139 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
1140 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
1141 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
1143 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
1146 if(topright_ref == PART_NOT_AVAILABLE
1147 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
1148 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
1150 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
1151 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
1154 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
1155 && i >= scan8[0]+8){
1156 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
1157 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
1163 if(topright_ref != PART_NOT_AVAILABLE){
1164 *C= h->mv_cache[list][ i - 8 + part_width ];
1165 return topright_ref;
1167 tprintf("topright MV not available\n");
1169 *C= h->mv_cache[list][ i - 8 - 1 ];
1170 return h->ref_cache[list][ i - 8 - 1 ];
1175 * gets the predicted MV.
1176 * @param n the block index
1177 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1178 * @param mx the x component of the predicted motion vector
1179 * @param my the y component of the predicted motion vector
1181 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1182 const int index8= scan8[n];
1183 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1184 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1185 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1186 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1188 int diagonal_ref, match_count;
1190 assert(part_width==1 || part_width==2 || part_width==4);
1200 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1201 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1202 tprintf("pred_motion match_count=%d\n", match_count);
1203 if(match_count > 1){ //most common
1204 *mx= mid_pred(A[0], B[0], C[0]);
1205 *my= mid_pred(A[1], B[1], C[1]);
1206 }else if(match_count==1){
1210 }else if(top_ref==ref){
1218 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1222 *mx= mid_pred(A[0], B[0], C[0]);
1223 *my= mid_pred(A[1], B[1], C[1]);
1227 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1231 * gets the directionally predicted 16x8 MV.
1232 * @param n the block index
1233 * @param mx the x component of the predicted motion vector
1234 * @param my the y component of the predicted motion vector
1236 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1238 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1239 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1241 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1249 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1250 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1252 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1254 if(left_ref == ref){
1262 pred_motion(h, n, 4, list, ref, mx, my);
1266 * gets the directionally predicted 8x16 MV.
1267 * @param n the block index
1268 * @param mx the x component of the predicted motion vector
1269 * @param my the y component of the predicted motion vector
1271 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1273 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1274 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1276 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1278 if(left_ref == ref){
1287 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1289 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1291 if(diagonal_ref == ref){
1299 pred_motion(h, n, 2, list, ref, mx, my);
1302 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1303 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1304 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1306 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1308 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1309 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1310 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1316 pred_motion(h, 0, 4, 0, 0, mx, my);
1321 static inline void direct_dist_scale_factor(H264Context * const h){
1322 const int poc = h->s.current_picture_ptr->poc;
1323 const int poc1 = h->ref_list[1][0].poc;
1325 for(i=0; i<h->ref_count[0]; i++){
1326 int poc0 = h->ref_list[0][i].poc;
1327 int td = clip(poc1 - poc0, -128, 127);
1328 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1329 h->dist_scale_factor[i] = 256;
1331 int tb = clip(poc - poc0, -128, 127);
1332 int tx = (16384 + (FFABS(td) >> 1)) / td;
1333 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1337 for(i=0; i<h->ref_count[0]; i++){
1338 h->dist_scale_factor_field[2*i] =
1339 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
1343 static inline void direct_ref_list_init(H264Context * const h){
1344 MpegEncContext * const s = &h->s;
1345 Picture * const ref1 = &h->ref_list[1][0];
1346 Picture * const cur = s->current_picture_ptr;
1348 if(cur->pict_type == I_TYPE)
1349 cur->ref_count[0] = 0;
1350 if(cur->pict_type != B_TYPE)
1351 cur->ref_count[1] = 0;
1352 for(list=0; list<2; list++){
1353 cur->ref_count[list] = h->ref_count[list];
1354 for(j=0; j<h->ref_count[list]; j++)
1355 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1357 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1359 for(list=0; list<2; list++){
1360 for(i=0; i<ref1->ref_count[list]; i++){
1361 const int poc = ref1->ref_poc[list][i];
1362 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1363 for(j=0; j<h->ref_count[list]; j++)
1364 if(h->ref_list[list][j].poc == poc){
1365 h->map_col_to_list0[list][i] = j;
1371 for(list=0; list<2; list++){
1372 for(i=0; i<ref1->ref_count[list]; i++){
1373 j = h->map_col_to_list0[list][i];
1374 h->map_col_to_list0_field[list][2*i] = 2*j;
1375 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1381 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1382 MpegEncContext * const s = &h->s;
1383 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1384 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1385 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1386 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1387 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1388 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1389 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1390 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1391 const int is_b8x8 = IS_8X8(*mb_type);
1395 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1396 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1397 /* FIXME save sub mb types from previous frames (or derive from MVs)
1398 * so we know exactly what block size to use */
1399 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1400 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1401 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1402 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1403 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1405 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1406 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1409 *mb_type |= MB_TYPE_DIRECT2;
1411 *mb_type |= MB_TYPE_INTERLACED;
1413 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1415 if(h->direct_spatial_mv_pred){
1420 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1422 /* ref = min(neighbors) */
1423 for(list=0; list<2; list++){
1424 int refa = h->ref_cache[list][scan8[0] - 1];
1425 int refb = h->ref_cache[list][scan8[0] - 8];
1426 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1428 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1430 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1432 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1438 if(ref[0] < 0 && ref[1] < 0){
1439 ref[0] = ref[1] = 0;
1440 mv[0][0] = mv[0][1] =
1441 mv[1][0] = mv[1][1] = 0;
1443 for(list=0; list<2; list++){
1445 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1447 mv[list][0] = mv[list][1] = 0;
1452 *mb_type &= ~MB_TYPE_P0L1;
1453 sub_mb_type &= ~MB_TYPE_P0L1;
1454 }else if(ref[0] < 0){
1455 *mb_type &= ~MB_TYPE_P0L0;
1456 sub_mb_type &= ~MB_TYPE_P0L0;
1459 if(IS_16X16(*mb_type)){
1460 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1461 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1462 if(!IS_INTRA(mb_type_col)
1463 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1464 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1465 && (h->x264_build>33 || !h->x264_build)))){
1467 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1469 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1471 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1473 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1475 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1476 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1479 for(i8=0; i8<4; i8++){
1480 const int x8 = i8&1;
1481 const int y8 = i8>>1;
1483 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1485 h->sub_mb_type[i8] = sub_mb_type;
1487 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1488 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1489 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1490 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1493 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1494 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1495 && (h->x264_build>33 || !h->x264_build)))){
1496 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1497 if(IS_SUB_8X8(sub_mb_type)){
1498 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1499 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1501 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1503 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1506 for(i4=0; i4<4; i4++){
1507 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1508 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1510 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1512 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1518 }else{ /* direct temporal mv pred */
1519 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1520 const int *dist_scale_factor = h->dist_scale_factor;
1523 if(IS_INTERLACED(*mb_type)){
1524 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1525 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1526 dist_scale_factor = h->dist_scale_factor_field;
1528 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1529 /* FIXME assumes direct_8x8_inference == 1 */
1530 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1531 int mb_types_col[2];
1534 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1535 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1536 | (*mb_type & MB_TYPE_INTERLACED);
1537 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1539 if(IS_INTERLACED(*mb_type)){
1540 /* frame to field scaling */
1541 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1542 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1544 l1ref0 -= 2*h->b8_stride;
1545 l1ref1 -= 2*h->b8_stride;
1546 l1mv0 -= 4*h->b_stride;
1547 l1mv1 -= 4*h->b_stride;
1551 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1552 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1554 *mb_type |= MB_TYPE_16x8;
1556 *mb_type |= MB_TYPE_8x8;
1558 /* field to frame scaling */
1559 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1560 * but in MBAFF, top and bottom POC are equal */
1561 int dy = (s->mb_y&1) ? 1 : 2;
1563 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1564 l1ref0 += dy*h->b8_stride;
1565 l1ref1 += dy*h->b8_stride;
1566 l1mv0 += 2*dy*h->b_stride;
1567 l1mv1 += 2*dy*h->b_stride;
1570 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1572 *mb_type |= MB_TYPE_16x16;
1574 *mb_type |= MB_TYPE_8x8;
1577 for(i8=0; i8<4; i8++){
1578 const int x8 = i8&1;
1579 const int y8 = i8>>1;
1581 const int16_t (*l1mv)[2]= l1mv0;
1583 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1585 h->sub_mb_type[i8] = sub_mb_type;
1587 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1588 if(IS_INTRA(mb_types_col[y8])){
1589 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1590 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1591 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1595 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1597 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1599 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1602 scale = dist_scale_factor[ref0];
1603 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1606 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1607 int my_col = (mv_col[1]<<y_shift)/2;
1608 int mx = (scale * mv_col[0] + 128) >> 8;
1609 int my = (scale * my_col + 128) >> 8;
1610 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1611 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1618 /* one-to-one mv scaling */
1620 if(IS_16X16(*mb_type)){
1621 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1622 if(IS_INTRA(mb_type_col)){
1623 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1624 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1625 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1627 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1628 : map_col_to_list0[1][l1ref1[0]];
1629 const int scale = dist_scale_factor[ref0];
1630 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1632 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1633 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1634 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1635 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1636 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1639 for(i8=0; i8<4; i8++){
1640 const int x8 = i8&1;
1641 const int y8 = i8>>1;
1643 const int16_t (*l1mv)[2]= l1mv0;
1645 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1647 h->sub_mb_type[i8] = sub_mb_type;
1648 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1649 if(IS_INTRA(mb_type_col)){
1650 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1651 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1652 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1656 ref0 = l1ref0[x8 + y8*h->b8_stride];
1658 ref0 = map_col_to_list0[0][ref0];
1660 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1663 scale = dist_scale_factor[ref0];
1665 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1666 if(IS_SUB_8X8(sub_mb_type)){
1667 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1668 int mx = (scale * mv_col[0] + 128) >> 8;
1669 int my = (scale * mv_col[1] + 128) >> 8;
1670 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1671 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1673 for(i4=0; i4<4; i4++){
1674 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1675 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1676 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1677 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1678 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1679 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1686 static inline void write_back_motion(H264Context *h, int mb_type){
1687 MpegEncContext * const s = &h->s;
1688 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1689 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1692 if(!USES_LIST(mb_type, 0))
1693 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1695 for(list=0; list<2; list++){
1697 if(!USES_LIST(mb_type, list))
1701 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1702 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1704 if( h->pps.cabac ) {
1705 if(IS_SKIP(mb_type))
1706 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1709 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1710 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1715 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1716 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1717 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1718 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1719 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1723 if(h->slice_type == B_TYPE && h->pps.cabac){
1724 if(IS_8X8(mb_type)){
1725 uint8_t *direct_table = &h->direct_table[b8_xy];
1726 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1727 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1728 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1734 * Decodes a network abstraction layer unit.
1735 * @param consumed is the number of bytes used as input
1736 * @param length is the length of the array
1737 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1738 * @returns decoded bytes, might be src+1 if no escapes
1740 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1744 // src[0]&0x80; //forbidden bit
1745 h->nal_ref_idc= src[0]>>5;
1746 h->nal_unit_type= src[0]&0x1F;
1750 for(i=0; i<length; i++)
1751 printf("%2X ", src[i]);
1753 for(i=0; i+1<length; i+=2){
1754 if(src[i]) continue;
1755 if(i>0 && src[i-1]==0) i--;
1756 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1758 /* startcode, so we must be past the end */
1765 if(i>=length-1){ //no escaped 0
1766 *dst_length= length;
1767 *consumed= length+1; //+1 for the header
1771 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1772 dst= h->rbsp_buffer;
1774 //printf("decoding esc\n");
1777 //remove escapes (very rare 1:2^22)
1778 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1779 if(src[si+2]==3){ //escape
1784 }else //next start code
1788 dst[di++]= src[si++];
1792 *consumed= si + 1;//+1 for the header
1793 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1798 * identifies the exact end of the bitstream
1799 * @return the length of the trailing, or 0 if damaged
1801 static int decode_rbsp_trailing(uint8_t *src){
1805 tprintf("rbsp trailing %X\n", v);
1815 * idct tranforms the 16 dc values and dequantize them.
1816 * @param qp quantization parameter
1818 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1821 int temp[16]; //FIXME check if this is a good idea
1822 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1823 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1825 //memset(block, 64, 2*256);
1828 const int offset= y_offset[i];
1829 const int z0= block[offset+stride*0] + block[offset+stride*4];
1830 const int z1= block[offset+stride*0] - block[offset+stride*4];
1831 const int z2= block[offset+stride*1] - block[offset+stride*5];
1832 const int z3= block[offset+stride*1] + block[offset+stride*5];
1841 const int offset= x_offset[i];
1842 const int z0= temp[4*0+i] + temp[4*2+i];
1843 const int z1= temp[4*0+i] - temp[4*2+i];
1844 const int z2= temp[4*1+i] - temp[4*3+i];
1845 const int z3= temp[4*1+i] + temp[4*3+i];
1847 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1848 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1849 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1850 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1856 * dct tranforms the 16 dc values.
1857 * @param qp quantization parameter ??? FIXME
1859 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1860 // const int qmul= dequant_coeff[qp][0];
1862 int temp[16]; //FIXME check if this is a good idea
1863 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1864 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1867 const int offset= y_offset[i];
1868 const int z0= block[offset+stride*0] + block[offset+stride*4];
1869 const int z1= block[offset+stride*0] - block[offset+stride*4];
1870 const int z2= block[offset+stride*1] - block[offset+stride*5];
1871 const int z3= block[offset+stride*1] + block[offset+stride*5];
1880 const int offset= x_offset[i];
1881 const int z0= temp[4*0+i] + temp[4*2+i];
1882 const int z1= temp[4*0+i] - temp[4*2+i];
1883 const int z2= temp[4*1+i] - temp[4*3+i];
1884 const int z3= temp[4*1+i] + temp[4*3+i];
1886 block[stride*0 +offset]= (z0 + z3)>>1;
1887 block[stride*2 +offset]= (z1 + z2)>>1;
1888 block[stride*8 +offset]= (z1 - z2)>>1;
1889 block[stride*10+offset]= (z0 - z3)>>1;
1897 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1898 const int stride= 16*2;
1899 const int xStride= 16;
1902 a= block[stride*0 + xStride*0];
1903 b= block[stride*0 + xStride*1];
1904 c= block[stride*1 + xStride*0];
1905 d= block[stride*1 + xStride*1];
1912 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1913 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1914 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1915 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1919 static void chroma_dc_dct_c(DCTELEM *block){
1920 const int stride= 16*2;
1921 const int xStride= 16;
1924 a= block[stride*0 + xStride*0];
1925 b= block[stride*0 + xStride*1];
1926 c= block[stride*1 + xStride*0];
1927 d= block[stride*1 + xStride*1];
1934 block[stride*0 + xStride*0]= (a+c);
1935 block[stride*0 + xStride*1]= (e+b);
1936 block[stride*1 + xStride*0]= (a-c);
1937 block[stride*1 + xStride*1]= (e-b);
1942 * gets the chroma qp.
1944 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1946 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
1951 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1953 //FIXME try int temp instead of block
1956 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1957 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1958 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1959 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1960 const int z0= d0 + d3;
1961 const int z3= d0 - d3;
1962 const int z1= d1 + d2;
1963 const int z2= d1 - d2;
1965 block[0 + 4*i]= z0 + z1;
1966 block[1 + 4*i]= 2*z3 + z2;
1967 block[2 + 4*i]= z0 - z1;
1968 block[3 + 4*i]= z3 - 2*z2;
1972 const int z0= block[0*4 + i] + block[3*4 + i];
1973 const int z3= block[0*4 + i] - block[3*4 + i];
1974 const int z1= block[1*4 + i] + block[2*4 + i];
1975 const int z2= block[1*4 + i] - block[2*4 + i];
1977 block[0*4 + i]= z0 + z1;
1978 block[1*4 + i]= 2*z3 + z2;
1979 block[2*4 + i]= z0 - z1;
1980 block[3*4 + i]= z3 - 2*z2;
1985 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1986 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1987 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1989 const int * const quant_table= quant_coeff[qscale];
1990 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1991 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1992 const unsigned int threshold2= (threshold1<<1);
1998 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1999 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
2000 const unsigned int dc_threshold2= (dc_threshold1<<1);
2002 int level= block[0]*quant_coeff[qscale+18][0];
2003 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2005 level= (dc_bias + level)>>(QUANT_SHIFT-2);
2008 level= (dc_bias - level)>>(QUANT_SHIFT-2);
2011 // last_non_zero = i;
2016 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
2017 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
2018 const unsigned int dc_threshold2= (dc_threshold1<<1);
2020 int level= block[0]*quant_table[0];
2021 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2023 level= (dc_bias + level)>>(QUANT_SHIFT+1);
2026 level= (dc_bias - level)>>(QUANT_SHIFT+1);
2029 // last_non_zero = i;
2042 const int j= scantable[i];
2043 int level= block[j]*quant_table[j];
2045 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
2046 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
2047 if(((unsigned)(level+threshold1))>threshold2){
2049 level= (bias + level)>>QUANT_SHIFT;
2052 level= (bias - level)>>QUANT_SHIFT;
2061 return last_non_zero;
2064 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
2065 const uint32_t a= ((uint32_t*)(src-stride))[0];
2066 ((uint32_t*)(src+0*stride))[0]= a;
2067 ((uint32_t*)(src+1*stride))[0]= a;
2068 ((uint32_t*)(src+2*stride))[0]= a;
2069 ((uint32_t*)(src+3*stride))[0]= a;
2072 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
2073 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
2074 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
2075 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
2076 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
2079 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
2080 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
2081 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
2083 ((uint32_t*)(src+0*stride))[0]=
2084 ((uint32_t*)(src+1*stride))[0]=
2085 ((uint32_t*)(src+2*stride))[0]=
2086 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2089 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
2090 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
2092 ((uint32_t*)(src+0*stride))[0]=
2093 ((uint32_t*)(src+1*stride))[0]=
2094 ((uint32_t*)(src+2*stride))[0]=
2095 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2098 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
2099 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
2101 ((uint32_t*)(src+0*stride))[0]=
2102 ((uint32_t*)(src+1*stride))[0]=
2103 ((uint32_t*)(src+2*stride))[0]=
2104 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2107 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
2108 ((uint32_t*)(src+0*stride))[0]=
2109 ((uint32_t*)(src+1*stride))[0]=
2110 ((uint32_t*)(src+2*stride))[0]=
2111 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
2115 #define LOAD_TOP_RIGHT_EDGE\
2116 const int t4= topright[0];\
2117 const int t5= topright[1];\
2118 const int t6= topright[2];\
2119 const int t7= topright[3];\
2121 #define LOAD_LEFT_EDGE\
2122 const int l0= src[-1+0*stride];\
2123 const int l1= src[-1+1*stride];\
2124 const int l2= src[-1+2*stride];\
2125 const int l3= src[-1+3*stride];\
2127 #define LOAD_TOP_EDGE\
2128 const int t0= src[ 0-1*stride];\
2129 const int t1= src[ 1-1*stride];\
2130 const int t2= src[ 2-1*stride];\
2131 const int t3= src[ 3-1*stride];\
2133 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
2134 const int lt= src[-1-1*stride];
2138 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
2140 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
2143 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
2147 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2150 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
2152 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2153 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2156 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
2161 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
2163 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
2166 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
2170 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
2173 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
2175 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
2176 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
2179 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
2180 const int lt= src[-1-1*stride];
2183 const __attribute__((unused)) int unu= l3;
2186 src[1+2*stride]=(lt + t0 + 1)>>1;
2188 src[2+2*stride]=(t0 + t1 + 1)>>1;
2190 src[3+2*stride]=(t1 + t2 + 1)>>1;
2191 src[3+0*stride]=(t2 + t3 + 1)>>1;
2193 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2195 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2197 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2198 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2199 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2200 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2203 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2206 const __attribute__((unused)) int unu= t7;
2208 src[0+0*stride]=(t0 + t1 + 1)>>1;
2210 src[0+2*stride]=(t1 + t2 + 1)>>1;
2212 src[1+2*stride]=(t2 + t3 + 1)>>1;
2214 src[2+2*stride]=(t3 + t4+ 1)>>1;
2215 src[3+2*stride]=(t4 + t5+ 1)>>1;
2216 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2218 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2220 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2222 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2223 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2226 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2229 src[0+0*stride]=(l0 + l1 + 1)>>1;
2230 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2232 src[0+1*stride]=(l1 + l2 + 1)>>1;
2234 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2236 src[0+2*stride]=(l2 + l3 + 1)>>1;
2238 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2247 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2248 const int lt= src[-1-1*stride];
2251 const __attribute__((unused)) int unu= t3;
2254 src[2+1*stride]=(lt + l0 + 1)>>1;
2256 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2257 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2258 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2260 src[2+2*stride]=(l0 + l1 + 1)>>1;
2262 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2264 src[2+3*stride]=(l1 + l2+ 1)>>1;
2266 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2267 src[0+3*stride]=(l2 + l3 + 1)>>1;
2268 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2271 void ff_pred16x16_vertical_c(uint8_t *src, int stride){
2273 const uint32_t a= ((uint32_t*)(src-stride))[0];
2274 const uint32_t b= ((uint32_t*)(src-stride))[1];
2275 const uint32_t c= ((uint32_t*)(src-stride))[2];
2276 const uint32_t d= ((uint32_t*)(src-stride))[3];
2278 for(i=0; i<16; i++){
2279 ((uint32_t*)(src+i*stride))[0]= a;
2280 ((uint32_t*)(src+i*stride))[1]= b;
2281 ((uint32_t*)(src+i*stride))[2]= c;
2282 ((uint32_t*)(src+i*stride))[3]= d;
2286 void ff_pred16x16_horizontal_c(uint8_t *src, int stride){
2289 for(i=0; i<16; i++){
2290 ((uint32_t*)(src+i*stride))[0]=
2291 ((uint32_t*)(src+i*stride))[1]=
2292 ((uint32_t*)(src+i*stride))[2]=
2293 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2297 void ff_pred16x16_dc_c(uint8_t *src, int stride){
2301 dc+= src[-1+i*stride];
2308 dc= 0x01010101*((dc + 16)>>5);
2310 for(i=0; i<16; i++){
2311 ((uint32_t*)(src+i*stride))[0]=
2312 ((uint32_t*)(src+i*stride))[1]=
2313 ((uint32_t*)(src+i*stride))[2]=
2314 ((uint32_t*)(src+i*stride))[3]= dc;
2318 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2322 dc+= src[-1+i*stride];
2325 dc= 0x01010101*((dc + 8)>>4);
2327 for(i=0; i<16; i++){
2328 ((uint32_t*)(src+i*stride))[0]=
2329 ((uint32_t*)(src+i*stride))[1]=
2330 ((uint32_t*)(src+i*stride))[2]=
2331 ((uint32_t*)(src+i*stride))[3]= dc;
2335 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2341 dc= 0x01010101*((dc + 8)>>4);
2343 for(i=0; i<16; i++){
2344 ((uint32_t*)(src+i*stride))[0]=
2345 ((uint32_t*)(src+i*stride))[1]=
2346 ((uint32_t*)(src+i*stride))[2]=
2347 ((uint32_t*)(src+i*stride))[3]= dc;
2351 void ff_pred16x16_128_dc_c(uint8_t *src, int stride){
2354 for(i=0; i<16; i++){
2355 ((uint32_t*)(src+i*stride))[0]=
2356 ((uint32_t*)(src+i*stride))[1]=
2357 ((uint32_t*)(src+i*stride))[2]=
2358 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2362 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2365 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2366 const uint8_t * const src0 = src+7-stride;
2367 const uint8_t *src1 = src+8*stride-1;
2368 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2369 int H = src0[1] - src0[-1];
2370 int V = src1[0] - src2[ 0];
2371 for(k=2; k<=8; ++k) {
2372 src1 += stride; src2 -= stride;
2373 H += k*(src0[k] - src0[-k]);
2374 V += k*(src1[0] - src2[ 0]);
2377 H = ( 5*(H/4) ) / 16;
2378 V = ( 5*(V/4) ) / 16;
2380 /* required for 100% accuracy */
2381 i = H; H = V; V = i;
2383 H = ( 5*H+32 ) >> 6;
2384 V = ( 5*V+32 ) >> 6;
2387 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2388 for(j=16; j>0; --j) {
2391 for(i=-16; i<0; i+=4) {
2392 src[16+i] = cm[ (b ) >> 5 ];
2393 src[17+i] = cm[ (b+ H) >> 5 ];
2394 src[18+i] = cm[ (b+2*H) >> 5 ];
2395 src[19+i] = cm[ (b+3*H) >> 5 ];
2402 void ff_pred16x16_plane_c(uint8_t *src, int stride){
2403 pred16x16_plane_compat_c(src, stride, 0);
2406 void ff_pred8x8_vertical_c(uint8_t *src, int stride){
2408 const uint32_t a= ((uint32_t*)(src-stride))[0];
2409 const uint32_t b= ((uint32_t*)(src-stride))[1];
2412 ((uint32_t*)(src+i*stride))[0]= a;
2413 ((uint32_t*)(src+i*stride))[1]= b;
2417 void ff_pred8x8_horizontal_c(uint8_t *src, int stride){
2421 ((uint32_t*)(src+i*stride))[0]=
2422 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2426 void ff_pred8x8_128_dc_c(uint8_t *src, int stride){
2430 ((uint32_t*)(src+i*stride))[0]=
2431 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2435 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2441 dc0+= src[-1+i*stride];
2442 dc2+= src[-1+(i+4)*stride];
2444 dc0= 0x01010101*((dc0 + 2)>>2);
2445 dc2= 0x01010101*((dc2 + 2)>>2);
2448 ((uint32_t*)(src+i*stride))[0]=
2449 ((uint32_t*)(src+i*stride))[1]= dc0;
2452 ((uint32_t*)(src+i*stride))[0]=
2453 ((uint32_t*)(src+i*stride))[1]= dc2;
2457 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2463 dc0+= src[i-stride];
2464 dc1+= src[4+i-stride];
2466 dc0= 0x01010101*((dc0 + 2)>>2);
2467 dc1= 0x01010101*((dc1 + 2)>>2);
2470 ((uint32_t*)(src+i*stride))[0]= dc0;
2471 ((uint32_t*)(src+i*stride))[1]= dc1;
2474 ((uint32_t*)(src+i*stride))[0]= dc0;
2475 ((uint32_t*)(src+i*stride))[1]= dc1;
2480 void ff_pred8x8_dc_c(uint8_t *src, int stride){
2482 int dc0, dc1, dc2, dc3;
2486 dc0+= src[-1+i*stride] + src[i-stride];
2487 dc1+= src[4+i-stride];
2488 dc2+= src[-1+(i+4)*stride];
2490 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2491 dc0= 0x01010101*((dc0 + 4)>>3);
2492 dc1= 0x01010101*((dc1 + 2)>>2);
2493 dc2= 0x01010101*((dc2 + 2)>>2);
2496 ((uint32_t*)(src+i*stride))[0]= dc0;
2497 ((uint32_t*)(src+i*stride))[1]= dc1;
2500 ((uint32_t*)(src+i*stride))[0]= dc2;
2501 ((uint32_t*)(src+i*stride))[1]= dc3;
2505 void ff_pred8x8_plane_c(uint8_t *src, int stride){
2508 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2509 const uint8_t * const src0 = src+3-stride;
2510 const uint8_t *src1 = src+4*stride-1;
2511 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2512 int H = src0[1] - src0[-1];
2513 int V = src1[0] - src2[ 0];
2514 for(k=2; k<=4; ++k) {
2515 src1 += stride; src2 -= stride;
2516 H += k*(src0[k] - src0[-k]);
2517 V += k*(src1[0] - src2[ 0]);
2519 H = ( 17*H+16 ) >> 5;
2520 V = ( 17*V+16 ) >> 5;
2522 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2523 for(j=8; j>0; --j) {
2526 src[0] = cm[ (b ) >> 5 ];
2527 src[1] = cm[ (b+ H) >> 5 ];
2528 src[2] = cm[ (b+2*H) >> 5 ];
2529 src[3] = cm[ (b+3*H) >> 5 ];
2530 src[4] = cm[ (b+4*H) >> 5 ];
2531 src[5] = cm[ (b+5*H) >> 5 ];
2532 src[6] = cm[ (b+6*H) >> 5 ];
2533 src[7] = cm[ (b+7*H) >> 5 ];
2538 #define SRC(x,y) src[(x)+(y)*stride]
2540 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2541 #define PREDICT_8x8_LOAD_LEFT \
2542 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2543 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2544 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2545 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2548 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2549 #define PREDICT_8x8_LOAD_TOP \
2550 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2551 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2552 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2553 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2554 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2557 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2558 #define PREDICT_8x8_LOAD_TOPRIGHT \
2559 int t8, t9, t10, t11, t12, t13, t14, t15; \
2560 if(has_topright) { \
2561 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2562 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2563 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2565 #define PREDICT_8x8_LOAD_TOPLEFT \
2566 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2568 #define PREDICT_8x8_DC(v) \
2570 for( y = 0; y < 8; y++ ) { \
2571 ((uint32_t*)src)[0] = \
2572 ((uint32_t*)src)[1] = v; \
2576 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2578 PREDICT_8x8_DC(0x80808080);
2580 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2582 PREDICT_8x8_LOAD_LEFT;
2583 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2586 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2588 PREDICT_8x8_LOAD_TOP;
2589 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2592 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2594 PREDICT_8x8_LOAD_LEFT;
2595 PREDICT_8x8_LOAD_TOP;
2596 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2597 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2600 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2602 PREDICT_8x8_LOAD_LEFT;
2603 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2604 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2605 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2608 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2611 PREDICT_8x8_LOAD_TOP;
2620 for( y = 1; y < 8; y++ )
2621 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2623 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2625 PREDICT_8x8_LOAD_TOP;
2626 PREDICT_8x8_LOAD_TOPRIGHT;
2627 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2628 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2629 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2630 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2631 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2632 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2633 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2634 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2635 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2636 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2637 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2638 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2639 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2640 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2641 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2643 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2645 PREDICT_8x8_LOAD_TOP;
2646 PREDICT_8x8_LOAD_LEFT;
2647 PREDICT_8x8_LOAD_TOPLEFT;
2648 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2649 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2650 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2651 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2652 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2653 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2654 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2655 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2656 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2657 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2658 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2659 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2660 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2661 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2662 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2665 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2667 PREDICT_8x8_LOAD_TOP;
2668 PREDICT_8x8_LOAD_LEFT;
2669 PREDICT_8x8_LOAD_TOPLEFT;
2670 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2671 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2672 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2673 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2674 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2675 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2676 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2677 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2678 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2679 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2680 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2681 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2682 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2683 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2684 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2685 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2686 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2687 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2688 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2689 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2690 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2691 SRC(7,0)= (t6 + t7 + 1) >> 1;
2693 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2695 PREDICT_8x8_LOAD_TOP;
2696 PREDICT_8x8_LOAD_LEFT;
2697 PREDICT_8x8_LOAD_TOPLEFT;
2698 SRC(0,7)= (l6 + l7 + 1) >> 1;
2699 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2700 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2701 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2702 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2703 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2704 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2705 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2706 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2707 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2708 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2709 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2710 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2711 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2712 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2713 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2714 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2715 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2716 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2717 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2718 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2719 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2721 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2723 PREDICT_8x8_LOAD_TOP;
2724 PREDICT_8x8_LOAD_TOPRIGHT;
2725 SRC(0,0)= (t0 + t1 + 1) >> 1;
2726 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2727 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2728 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2729 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2730 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2731 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2732 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2733 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2734 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2735 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2736 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2737 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2738 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2739 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2740 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2741 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2742 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2743 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2744 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2745 SRC(7,6)= (t10 + t11 + 1) >> 1;
2746 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2748 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2750 PREDICT_8x8_LOAD_LEFT;
2751 SRC(0,0)= (l0 + l1 + 1) >> 1;
2752 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2753 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2754 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2755 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2756 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2757 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2758 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2759 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2760 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2761 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2762 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2763 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2764 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2765 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2766 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2767 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2768 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2770 #undef PREDICT_8x8_LOAD_LEFT
2771 #undef PREDICT_8x8_LOAD_TOP
2772 #undef PREDICT_8x8_LOAD_TOPLEFT
2773 #undef PREDICT_8x8_LOAD_TOPRIGHT
2774 #undef PREDICT_8x8_DC
2780 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2781 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2782 int src_x_offset, int src_y_offset,
2783 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2784 MpegEncContext * const s = &h->s;
2785 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2786 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2787 const int luma_xy= (mx&3) + ((my&3)<<2);
2788 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2789 uint8_t * src_cb, * src_cr;
2790 int extra_width= h->emu_edge_width;
2791 int extra_height= h->emu_edge_height;
2793 const int full_mx= mx>>2;
2794 const int full_my= my>>2;
2795 const int pic_width = 16*s->mb_width;
2796 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2801 if(mx&7) extra_width -= 3;
2802 if(my&7) extra_height -= 3;
2804 if( full_mx < 0-extra_width
2805 || full_my < 0-extra_height
2806 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2807 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2808 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2809 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2813 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2815 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2818 if(s->flags&CODEC_FLAG_GRAY) return;
2821 // chroma offset when predicting from a field of opposite parity
2822 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2823 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2825 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2826 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2829 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2830 src_cb= s->edge_emu_buffer;
2832 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2835 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2836 src_cr= s->edge_emu_buffer;
2838 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2841 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2842 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2843 int x_offset, int y_offset,
2844 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2845 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2846 int list0, int list1){
2847 MpegEncContext * const s = &h->s;
2848 qpel_mc_func *qpix_op= qpix_put;
2849 h264_chroma_mc_func chroma_op= chroma_put;
2851 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2852 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2853 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2854 x_offset += 8*s->mb_x;
2855 y_offset += 8*(s->mb_y >> MB_MBAFF);
2858 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2859 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2860 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2861 qpix_op, chroma_op);
2864 chroma_op= chroma_avg;
2868 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2869 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2870 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2871 qpix_op, chroma_op);
2875 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2876 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2877 int x_offset, int y_offset,
2878 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2879 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2880 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2881 int list0, int list1){
2882 MpegEncContext * const s = &h->s;
2884 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2885 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2886 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2887 x_offset += 8*s->mb_x;
2888 y_offset += 8*(s->mb_y >> MB_MBAFF);
2891 /* don't optimize for luma-only case, since B-frames usually
2892 * use implicit weights => chroma too. */
2893 uint8_t *tmp_cb = s->obmc_scratchpad;
2894 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2895 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2896 int refn0 = h->ref_cache[0][ scan8[n] ];
2897 int refn1 = h->ref_cache[1][ scan8[n] ];
2899 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2900 dest_y, dest_cb, dest_cr,
2901 x_offset, y_offset, qpix_put, chroma_put);
2902 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2903 tmp_y, tmp_cb, tmp_cr,
2904 x_offset, y_offset, qpix_put, chroma_put);
2906 if(h->use_weight == 2){
2907 int weight0 = h->implicit_weight[refn0][refn1];
2908 int weight1 = 64 - weight0;
2909 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2910 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
2911 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
2913 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
2914 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2915 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
2916 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2917 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2918 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
2919 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2920 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2921 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
2924 int list = list1 ? 1 : 0;
2925 int refn = h->ref_cache[list][ scan8[n] ];
2926 Picture *ref= &h->ref_list[list][refn];
2927 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2928 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2929 qpix_put, chroma_put);
2931 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
2932 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2933 if(h->use_weight_chroma){
2934 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2935 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2936 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2937 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2942 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2943 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2944 int x_offset, int y_offset,
2945 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2946 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2947 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2948 int list0, int list1){
2949 if((h->use_weight==2 && list0 && list1
2950 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2951 || h->use_weight==1)
2952 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2953 x_offset, y_offset, qpix_put, chroma_put,
2954 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2956 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2957 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2960 static inline void prefetch_motion(H264Context *h, int list){
2961 /* fetch pixels for estimated mv 4 macroblocks ahead
2962 * optimized for 64byte cache lines */
2963 MpegEncContext * const s = &h->s;
2964 const int refn = h->ref_cache[list][scan8[0]];
2966 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
2967 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
2968 uint8_t **src= h->ref_list[list][refn].data;
2969 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
2970 s->dsp.prefetch(src[0]+off, s->linesize, 4);
2971 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
2972 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
2976 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2977 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2978 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2979 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2980 MpegEncContext * const s = &h->s;
2981 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2982 const int mb_type= s->current_picture.mb_type[mb_xy];
2984 assert(IS_INTER(mb_type));
2986 prefetch_motion(h, 0);
2988 if(IS_16X16(mb_type)){
2989 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2990 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2991 &weight_op[0], &weight_avg[0],
2992 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2993 }else if(IS_16X8(mb_type)){
2994 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2995 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2996 &weight_op[1], &weight_avg[1],
2997 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2998 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2999 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3000 &weight_op[1], &weight_avg[1],
3001 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3002 }else if(IS_8X16(mb_type)){
3003 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
3004 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3005 &weight_op[2], &weight_avg[2],
3006 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3007 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
3008 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3009 &weight_op[2], &weight_avg[2],
3010 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3014 assert(IS_8X8(mb_type));
3017 const int sub_mb_type= h->sub_mb_type[i];
3019 int x_offset= (i&1)<<2;
3020 int y_offset= (i&2)<<1;
3022 if(IS_SUB_8X8(sub_mb_type)){
3023 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3024 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3025 &weight_op[3], &weight_avg[3],
3026 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3027 }else if(IS_SUB_8X4(sub_mb_type)){
3028 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3029 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3030 &weight_op[4], &weight_avg[4],
3031 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3032 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
3033 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3034 &weight_op[4], &weight_avg[4],
3035 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3036 }else if(IS_SUB_4X8(sub_mb_type)){
3037 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3038 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3039 &weight_op[5], &weight_avg[5],
3040 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3041 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
3042 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3043 &weight_op[5], &weight_avg[5],
3044 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3047 assert(IS_SUB_4X4(sub_mb_type));
3049 int sub_x_offset= x_offset + 2*(j&1);
3050 int sub_y_offset= y_offset + (j&2);
3051 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
3052 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3053 &weight_op[6], &weight_avg[6],
3054 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3060 prefetch_motion(h, 1);
3063 static void decode_init_vlc(){
3064 static int done = 0;
3070 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
3071 &chroma_dc_coeff_token_len [0], 1, 1,
3072 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
3075 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
3076 &coeff_token_len [i][0], 1, 1,
3077 &coeff_token_bits[i][0], 1, 1, 1);
3081 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
3082 &chroma_dc_total_zeros_len [i][0], 1, 1,
3083 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
3085 for(i=0; i<15; i++){
3086 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
3087 &total_zeros_len [i][0], 1, 1,
3088 &total_zeros_bits[i][0], 1, 1, 1);
3092 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
3093 &run_len [i][0], 1, 1,
3094 &run_bits[i][0], 1, 1, 1);
3096 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
3097 &run_len [6][0], 1, 1,
3098 &run_bits[6][0], 1, 1, 1);
3103 * Sets the intra prediction function pointers.
3105 static void init_pred_ptrs(H264Context *h){
3106 // MpegEncContext * const s = &h->s;
3108 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
3109 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
3110 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
3111 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
3112 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
3113 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
3114 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
3115 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
3116 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
3117 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
3118 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
3119 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
3121 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
3122 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
3123 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
3124 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
3125 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
3126 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
3127 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
3128 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
3129 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
3130 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
3131 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
3132 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
3134 h->pred8x8[DC_PRED8x8 ]= ff_pred8x8_dc_c;
3135 h->pred8x8[VERT_PRED8x8 ]= ff_pred8x8_vertical_c;
3136 h->pred8x8[HOR_PRED8x8 ]= ff_pred8x8_horizontal_c;
3137 h->pred8x8[PLANE_PRED8x8 ]= ff_pred8x8_plane_c;
3138 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
3139 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
3140 h->pred8x8[DC_128_PRED8x8 ]= ff_pred8x8_128_dc_c;
3142 h->pred16x16[DC_PRED8x8 ]= ff_pred16x16_dc_c;
3143 h->pred16x16[VERT_PRED8x8 ]= ff_pred16x16_vertical_c;
3144 h->pred16x16[HOR_PRED8x8 ]= ff_pred16x16_horizontal_c;
3145 h->pred16x16[PLANE_PRED8x8 ]= ff_pred16x16_plane_c;
3146 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
3147 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
3148 h->pred16x16[DC_128_PRED8x8 ]= ff_pred16x16_128_dc_c;
3151 static void free_tables(H264Context *h){
3152 av_freep(&h->intra4x4_pred_mode);
3153 av_freep(&h->chroma_pred_mode_table);
3154 av_freep(&h->cbp_table);
3155 av_freep(&h->mvd_table[0]);
3156 av_freep(&h->mvd_table[1]);
3157 av_freep(&h->direct_table);
3158 av_freep(&h->non_zero_count);
3159 av_freep(&h->slice_table_base);
3160 av_freep(&h->top_borders[1]);
3161 av_freep(&h->top_borders[0]);
3162 h->slice_table= NULL;
3164 av_freep(&h->mb2b_xy);
3165 av_freep(&h->mb2b8_xy);
3167 av_freep(&h->s.obmc_scratchpad);
3170 static void init_dequant8_coeff_table(H264Context *h){
3172 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
3173 h->dequant8_coeff[0] = h->dequant8_buffer[0];
3174 h->dequant8_coeff[1] = h->dequant8_buffer[1];
3176 for(i=0; i<2; i++ ){
3177 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
3178 h->dequant8_coeff[1] = h->dequant8_buffer[0];
3182 for(q=0; q<52; q++){
3183 int shift = div6[q];
3186 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
3187 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
3188 h->pps.scaling_matrix8[i][x]) << shift;
3193 static void init_dequant4_coeff_table(H264Context *h){
3195 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
3196 for(i=0; i<6; i++ ){
3197 h->dequant4_coeff[i] = h->dequant4_buffer[i];
3199 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
3200 h->dequant4_coeff[i] = h->dequant4_buffer[j];
3207 for(q=0; q<52; q++){
3208 int shift = div6[q] + 2;
3211 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
3212 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
3213 h->pps.scaling_matrix4[i][x]) << shift;
3218 static void init_dequant_tables(H264Context *h){
3220 init_dequant4_coeff_table(h);
3221 if(h->pps.transform_8x8_mode)
3222 init_dequant8_coeff_table(h);
3223 if(h->sps.transform_bypass){
3226 h->dequant4_coeff[i][0][x] = 1<<6;
3227 if(h->pps.transform_8x8_mode)
3230 h->dequant8_coeff[i][0][x] = 1<<6;
3237 * needs width/height
3239 static int alloc_tables(H264Context *h){
3240 MpegEncContext * const s = &h->s;
3241 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3244 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3246 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3247 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
3248 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3249 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3250 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3252 if( h->pps.cabac ) {
3253 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3254 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3255 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3256 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3259 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
3260 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
3262 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3263 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3264 for(y=0; y<s->mb_height; y++){
3265 for(x=0; x<s->mb_width; x++){
3266 const int mb_xy= x + y*s->mb_stride;
3267 const int b_xy = 4*x + 4*y*h->b_stride;
3268 const int b8_xy= 2*x + 2*y*h->b8_stride;
3270 h->mb2b_xy [mb_xy]= b_xy;
3271 h->mb2b8_xy[mb_xy]= b8_xy;
3275 s->obmc_scratchpad = NULL;
3277 if(!h->dequant4_coeff[0])
3278 init_dequant_tables(h);
3286 static void common_init(H264Context *h){
3287 MpegEncContext * const s = &h->s;
3289 s->width = s->avctx->width;
3290 s->height = s->avctx->height;
3291 s->codec_id= s->avctx->codec->id;
3295 h->dequant_coeff_pps= -1;
3296 s->unrestricted_mv=1;
3297 s->decode=1; //FIXME
3299 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3300 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3303 static int decode_init(AVCodecContext *avctx){
3304 H264Context *h= avctx->priv_data;
3305 MpegEncContext * const s = &h->s;
3307 MPV_decode_defaults(s);
3312 s->out_format = FMT_H264;
3313 s->workaround_bugs= avctx->workaround_bugs;
3316 // s->decode_mb= ff_h263_decode_mb;
3318 avctx->pix_fmt= PIX_FMT_YUV420P;
3322 if(avctx->extradata_size > 0 && avctx->extradata &&
3323 *(char *)avctx->extradata == 1){
3333 static int frame_start(H264Context *h){
3334 MpegEncContext * const s = &h->s;
3337 if(MPV_frame_start(s, s->avctx) < 0)
3339 ff_er_frame_start(s);
3341 assert(s->linesize && s->uvlinesize);
3343 for(i=0; i<16; i++){
3344 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3345 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3348 h->block_offset[16+i]=
3349 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3350 h->block_offset[24+16+i]=
3351 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3354 /* can't be in alloc_tables because linesize isn't known there.
3355 * FIXME: redo bipred weight to not require extra buffer? */
3356 if(!s->obmc_scratchpad)
3357 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
3359 /* some macroblocks will be accessed before they're available */
3361 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
3363 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3367 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3368 MpegEncContext * const s = &h->s;
3372 src_cb -= uvlinesize;
3373 src_cr -= uvlinesize;
3375 // There are two lines saved, the line above the the top macroblock of a pair,
3376 // and the line above the bottom macroblock
3377 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3378 for(i=1; i<17; i++){
3379 h->left_border[i]= src_y[15+i* linesize];
3382 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3383 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3385 if(!(s->flags&CODEC_FLAG_GRAY)){
3386 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3387 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3389 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3390 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3392 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3393 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3397 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3398 MpegEncContext * const s = &h->s;
3401 int deblock_left = (s->mb_x > 0);
3402 int deblock_top = (s->mb_y > 0);
3404 src_y -= linesize + 1;
3405 src_cb -= uvlinesize + 1;
3406 src_cr -= uvlinesize + 1;
3408 #define XCHG(a,b,t,xchg)\
3415 for(i = !deblock_top; i<17; i++){
3416 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3421 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3422 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3423 if(s->mb_x+1 < s->mb_width){
3424 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3428 if(!(s->flags&CODEC_FLAG_GRAY)){
3430 for(i = !deblock_top; i<9; i++){
3431 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3432 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3436 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3437 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3442 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3443 MpegEncContext * const s = &h->s;
3446 src_y -= 2 * linesize;
3447 src_cb -= 2 * uvlinesize;
3448 src_cr -= 2 * uvlinesize;
3450 // There are two lines saved, the line above the the top macroblock of a pair,
3451 // and the line above the bottom macroblock
3452 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3453 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3454 for(i=2; i<34; i++){
3455 h->left_border[i]= src_y[15+i* linesize];
3458 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3459 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3460 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3461 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3463 if(!(s->flags&CODEC_FLAG_GRAY)){
3464 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3465 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3466 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3467 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3468 for(i=2; i<18; i++){
3469 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3470 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3472 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3473 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3474 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3475 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3479 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3480 MpegEncContext * const s = &h->s;
3483 int deblock_left = (s->mb_x > 0);
3484 int deblock_top = (s->mb_y > 1);
3486 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3488 src_y -= 2 * linesize + 1;
3489 src_cb -= 2 * uvlinesize + 1;
3490 src_cr -= 2 * uvlinesize + 1;
3492 #define XCHG(a,b,t,xchg)\
3499 for(i = (!deblock_top)<<1; i<34; i++){
3500 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3505 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3506 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3507 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3508 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3509 if(s->mb_x+1 < s->mb_width){
3510 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3511 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3515 if(!(s->flags&CODEC_FLAG_GRAY)){
3517 for(i = (!deblock_top) << 1; i<18; i++){
3518 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3519 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3523 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3524 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3525 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3526 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3531 static void hl_decode_mb(H264Context *h){
3532 MpegEncContext * const s = &h->s;
3533 const int mb_x= s->mb_x;
3534 const int mb_y= s->mb_y;
3535 const int mb_xy= mb_x + mb_y*s->mb_stride;
3536 const int mb_type= s->current_picture.mb_type[mb_xy];
3537 uint8_t *dest_y, *dest_cb, *dest_cr;
3538 int linesize, uvlinesize /*dct_offset*/;
3540 int *block_offset = &h->block_offset[0];
3541 const unsigned int bottom = mb_y & 1;
3542 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3543 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3544 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3549 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3550 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3551 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3553 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
3554 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
3557 linesize = h->mb_linesize = s->linesize * 2;
3558 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3559 block_offset = &h->block_offset[24];
3560 if(mb_y&1){ //FIXME move out of this func?
3561 dest_y -= s->linesize*15;
3562 dest_cb-= s->uvlinesize*7;
3563 dest_cr-= s->uvlinesize*7;
3567 for(list=0; list<2; list++){
3568 if(!USES_LIST(mb_type, list))
3570 if(IS_16X16(mb_type)){
3571 int8_t *ref = &h->ref_cache[list][scan8[0]];
3572 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3574 for(i=0; i<16; i+=4){
3575 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3576 int ref = h->ref_cache[list][scan8[i]];
3578 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3584 linesize = h->mb_linesize = s->linesize;
3585 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3586 // dct_offset = s->linesize * 16;
3589 if(transform_bypass){
3591 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3592 }else if(IS_8x8DCT(mb_type)){
3593 idct_dc_add = s->dsp.h264_idct8_dc_add;
3594 idct_add = s->dsp.h264_idct8_add;
3596 idct_dc_add = s->dsp.h264_idct_dc_add;
3597 idct_add = s->dsp.h264_idct_add;
3600 if(FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3601 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3602 int mbt_y = mb_y&~1;
3603 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3604 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3605 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3606 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3609 if (IS_INTRA_PCM(mb_type)) {
3612 // The pixels are stored in h->mb array in the same order as levels,
3613 // copy them in output in the correct order.
3614 for(i=0; i<16; i++) {
3615 for (y=0; y<4; y++) {
3616 for (x=0; x<4; x++) {
3617 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3621 for(i=16; i<16+4; i++) {
3622 for (y=0; y<4; y++) {
3623 for (x=0; x<4; x++) {
3624 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3628 for(i=20; i<20+4; i++) {
3629 for (y=0; y<4; y++) {
3630 for (x=0; x<4; x++) {
3631 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3636 if(IS_INTRA(mb_type)){
3637 if(h->deblocking_filter && !FRAME_MBAFF)
3638 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3640 if(!(s->flags&CODEC_FLAG_GRAY)){
3641 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3642 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3645 if(IS_INTRA4x4(mb_type)){
3647 if(IS_8x8DCT(mb_type)){
3648 for(i=0; i<16; i+=4){
3649 uint8_t * const ptr= dest_y + block_offset[i];
3650 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3651 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3652 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3653 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3655 if(nnz == 1 && h->mb[i*16])
3656 idct_dc_add(ptr, h->mb + i*16, linesize);
3658 idct_add(ptr, h->mb + i*16, linesize);
3662 for(i=0; i<16; i++){
3663 uint8_t * const ptr= dest_y + block_offset[i];
3665 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3668 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3669 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3670 assert(mb_y || linesize <= block_offset[i]);
3671 if(!topright_avail){
3672 tr= ptr[3 - linesize]*0x01010101;
3673 topright= (uint8_t*) &tr;
3675 topright= ptr + 4 - linesize;
3679 h->pred4x4[ dir ](ptr, topright, linesize);
3680 nnz = h->non_zero_count_cache[ scan8[i] ];
3682 if(s->codec_id == CODEC_ID_H264){
3683 if(nnz == 1 && h->mb[i*16])
3684 idct_dc_add(ptr, h->mb + i*16, linesize);
3686 idct_add(ptr, h->mb + i*16, linesize);
3688 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3693 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3694 if(s->codec_id == CODEC_ID_H264){
3695 if(!transform_bypass)
3696 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3698 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3700 if(h->deblocking_filter && !FRAME_MBAFF)
3701 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3702 }else if(s->codec_id == CODEC_ID_H264){
3703 hl_motion(h, dest_y, dest_cb, dest_cr,
3704 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
3705 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
3706 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3710 if(!IS_INTRA4x4(mb_type)){
3711 if(s->codec_id == CODEC_ID_H264){
3712 if(IS_INTRA16x16(mb_type)){
3713 for(i=0; i<16; i++){
3714 if(h->non_zero_count_cache[ scan8[i] ])
3715 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3716 else if(h->mb[i*16])
3717 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3720 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3721 for(i=0; i<16; i+=di){
3722 int nnz = h->non_zero_count_cache[ scan8[i] ];
3724 if(nnz==1 && h->mb[i*16])
3725 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3727 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3732 for(i=0; i<16; i++){
3733 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3734 uint8_t * const ptr= dest_y + block_offset[i];
3735 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3741 if(!(s->flags&CODEC_FLAG_GRAY)){
3742 uint8_t *dest[2] = {dest_cb, dest_cr};
3743 if(transform_bypass){
3744 idct_add = idct_dc_add = s->dsp.add_pixels4;
3746 idct_add = s->dsp.h264_idct_add;
3747 idct_dc_add = s->dsp.h264_idct_dc_add;
3748 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3749 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3751 if(s->codec_id == CODEC_ID_H264){
3752 for(i=16; i<16+8; i++){
3753 if(h->non_zero_count_cache[ scan8[i] ])
3754 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3755 else if(h->mb[i*16])
3756 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3759 for(i=16; i<16+8; i++){
3760 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3761 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3762 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3768 if(h->deblocking_filter) {
3770 //FIXME try deblocking one mb at a time?
3771 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3772 const int mb_y = s->mb_y - 1;
3773 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3774 const int mb_xy= mb_x + mb_y*s->mb_stride;
3775 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3776 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3777 if (!bottom) return;
3778 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3779 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3780 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3782 if(IS_INTRA(mb_type_top | mb_type_bottom))
3783 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3785 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3789 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3790 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3791 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
3792 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3795 tprintf("call mbaff filter_mb\n");
3796 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3797 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3798 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3800 tprintf("call filter_mb\n");
3801 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3802 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3803 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3809 * fills the default_ref_list.
3811 static int fill_default_ref_list(H264Context *h){
3812 MpegEncContext * const s = &h->s;
3814 int smallest_poc_greater_than_current = -1;
3815 Picture sorted_short_ref[32];
3817 if(h->slice_type==B_TYPE){
3821 /* sort frame according to poc in B slice */
3822 for(out_i=0; out_i<h->short_ref_count; out_i++){
3824 int best_poc=INT_MAX;
3826 for(i=0; i<h->short_ref_count; i++){
3827 const int poc= h->short_ref[i]->poc;
3828 if(poc > limit && poc < best_poc){
3834 assert(best_i != INT_MIN);
3837 sorted_short_ref[out_i]= *h->short_ref[best_i];
3838 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3839 if (-1 == smallest_poc_greater_than_current) {
3840 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3841 smallest_poc_greater_than_current = out_i;
3847 if(s->picture_structure == PICT_FRAME){
3848 if(h->slice_type==B_TYPE){
3850 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3852 // find the largest poc
3853 for(list=0; list<2; list++){
3856 int step= list ? -1 : 1;
3858 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3859 while(j<0 || j>= h->short_ref_count){
3860 if(j != -99 && step == (list ? -1 : 1))
3863 j= smallest_poc_greater_than_current + (step>>1);
3865 if(sorted_short_ref[j].reference != 3) continue;
3866 h->default_ref_list[list][index ]= sorted_short_ref[j];
3867 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3870 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3871 if(h->long_ref[i] == NULL) continue;
3872 if(h->long_ref[i]->reference != 3) continue;
3874 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3875 h->default_ref_list[ list ][index++].pic_id= i;;
3878 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3879 // swap the two first elements of L1 when
3880 // L0 and L1 are identical
3881 Picture temp= h->default_ref_list[1][0];
3882 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3883 h->default_ref_list[1][1] = temp;
3886 if(index < h->ref_count[ list ])
3887 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3891 for(i=0; i<h->short_ref_count; i++){
3892 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3893 h->default_ref_list[0][index ]= *h->short_ref[i];
3894 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3896 for(i = 0; i < 16; i++){
3897 if(h->long_ref[i] == NULL) continue;
3898 if(h->long_ref[i]->reference != 3) continue;
3899 h->default_ref_list[0][index ]= *h->long_ref[i];
3900 h->default_ref_list[0][index++].pic_id= i;;
3902 if(index < h->ref_count[0])
3903 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3906 if(h->slice_type==B_TYPE){
3908 //FIXME second field balh
3912 for (i=0; i<h->ref_count[0]; i++) {
3913 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3915 if(h->slice_type==B_TYPE){
3916 for (i=0; i<h->ref_count[1]; i++) {
3917 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3924 static void print_short_term(H264Context *h);
3925 static void print_long_term(H264Context *h);
3927 static int decode_ref_pic_list_reordering(H264Context *h){
3928 MpegEncContext * const s = &h->s;
3931 print_short_term(h);
3933 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3935 for(list=0; list<2; list++){
3936 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3938 if(get_bits1(&s->gb)){
3939 int pred= h->curr_pic_num;
3941 for(index=0; ; index++){
3942 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3945 Picture *ref = NULL;
3947 if(reordering_of_pic_nums_idc==3)
3950 if(index >= h->ref_count[list]){
3951 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3955 if(reordering_of_pic_nums_idc<3){
3956 if(reordering_of_pic_nums_idc<2){
3957 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3959 if(abs_diff_pic_num >= h->max_pic_num){
3960 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3964 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3965 else pred+= abs_diff_pic_num;
3966 pred &= h->max_pic_num - 1;
3968 for(i= h->short_ref_count-1; i>=0; i--){
3969 ref = h->short_ref[i];
3970 assert(ref->reference == 3);
3971 assert(!ref->long_ref);
3972 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3976 ref->pic_id= ref->frame_num;
3978 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3979 ref = h->long_ref[pic_id];
3980 ref->pic_id= pic_id;
3981 assert(ref->reference == 3);
3982 assert(ref->long_ref);
3987 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3988 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3990 for(i=index; i+1<h->ref_count[list]; i++){
3991 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3994 for(; i > index; i--){
3995 h->ref_list[list][i]= h->ref_list[list][i-1];
3997 h->ref_list[list][index]= *ref;
4000 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
4006 if(h->slice_type!=B_TYPE) break;
4008 for(list=0; list<2; list++){
4009 for(index= 0; index < h->ref_count[list]; index++){
4010 if(!h->ref_list[list][index].data[0])
4011 h->ref_list[list][index]= s->current_picture;
4013 if(h->slice_type!=B_TYPE) break;
4016 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
4017 direct_dist_scale_factor(h);
4018 direct_ref_list_init(h);
4022 static void fill_mbaff_ref_list(H264Context *h){
4024 for(list=0; list<2; list++){
4025 for(i=0; i<h->ref_count[list]; i++){
4026 Picture *frame = &h->ref_list[list][i];
4027 Picture *field = &h->ref_list[list][16+2*i];
4030 field[0].linesize[j] <<= 1;
4031 field[1] = field[0];
4033 field[1].data[j] += frame->linesize[j];
4035 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
4036 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
4038 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
4039 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
4043 for(j=0; j<h->ref_count[1]; j++){
4044 for(i=0; i<h->ref_count[0]; i++)
4045 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
4046 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
4047 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
4051 static int pred_weight_table(H264Context *h){
4052 MpegEncContext * const s = &h->s;
4054 int luma_def, chroma_def;
4057 h->use_weight_chroma= 0;
4058 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
4059 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
4060 luma_def = 1<<h->luma_log2_weight_denom;
4061 chroma_def = 1<<h->chroma_log2_weight_denom;
4063 for(list=0; list<2; list++){
4064 for(i=0; i<h->ref_count[list]; i++){
4065 int luma_weight_flag, chroma_weight_flag;
4067 luma_weight_flag= get_bits1(&s->gb);
4068 if(luma_weight_flag){
4069 h->luma_weight[list][i]= get_se_golomb(&s->gb);
4070 h->luma_offset[list][i]= get_se_golomb(&s->gb);
4071 if( h->luma_weight[list][i] != luma_def
4072 || h->luma_offset[list][i] != 0)
4075 h->luma_weight[list][i]= luma_def;
4076 h->luma_offset[list][i]= 0;
4079 chroma_weight_flag= get_bits1(&s->gb);
4080 if(chroma_weight_flag){
4083 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
4084 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
4085 if( h->chroma_weight[list][i][j] != chroma_def
4086 || h->chroma_offset[list][i][j] != 0)
4087 h->use_weight_chroma= 1;
4092 h->chroma_weight[list][i][j]= chroma_def;
4093 h->chroma_offset[list][i][j]= 0;
4097 if(h->slice_type != B_TYPE) break;
4099 h->use_weight= h->use_weight || h->use_weight_chroma;
4103 static void implicit_weight_table(H264Context *h){
4104 MpegEncContext * const s = &h->s;
4106 int cur_poc = s->current_picture_ptr->poc;
4108 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
4109 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
4111 h->use_weight_chroma= 0;
4116 h->use_weight_chroma= 2;
4117 h->luma_log2_weight_denom= 5;
4118 h->chroma_log2_weight_denom= 5;
4120 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
4121 int poc0 = h->ref_list[0][ref0].poc;
4122 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
4123 int poc1 = h->ref_list[1][ref1].poc;
4124 int td = clip(poc1 - poc0, -128, 127);
4126 int tb = clip(cur_poc - poc0, -128, 127);
4127 int tx = (16384 + (FFABS(td) >> 1)) / td;
4128 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
4129 if(dist_scale_factor < -64 || dist_scale_factor > 128)
4130 h->implicit_weight[ref0][ref1] = 32;
4132 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
4134 h->implicit_weight[ref0][ref1] = 32;
4139 static inline void unreference_pic(H264Context *h, Picture *pic){
4142 if(pic == h->delayed_output_pic)
4145 for(i = 0; h->delayed_pic[i]; i++)
4146 if(pic == h->delayed_pic[i]){
4154 * instantaneous decoder refresh.
4156 static void idr(H264Context *h){
4159 for(i=0; i<16; i++){
4160 if (h->long_ref[i] != NULL) {
4161 unreference_pic(h, h->long_ref[i]);
4162 h->long_ref[i]= NULL;
4165 h->long_ref_count=0;
4167 for(i=0; i<h->short_ref_count; i++){
4168 unreference_pic(h, h->short_ref[i]);
4169 h->short_ref[i]= NULL;
4171 h->short_ref_count=0;
4174 /* forget old pics after a seek */
4175 static void flush_dpb(AVCodecContext *avctx){
4176 H264Context *h= avctx->priv_data;
4178 for(i=0; i<16; i++) {
4179 if(h->delayed_pic[i])
4180 h->delayed_pic[i]->reference= 0;
4181 h->delayed_pic[i]= NULL;
4183 if(h->delayed_output_pic)
4184 h->delayed_output_pic->reference= 0;
4185 h->delayed_output_pic= NULL;
4187 if(h->s.current_picture_ptr)
4188 h->s.current_picture_ptr->reference= 0;
4193 * @return the removed picture or NULL if an error occurs
4195 static Picture * remove_short(H264Context *h, int frame_num){
4196 MpegEncContext * const s = &h->s;
4199 if(s->avctx->debug&FF_DEBUG_MMCO)
4200 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
4202 for(i=0; i<h->short_ref_count; i++){
4203 Picture *pic= h->short_ref[i];
4204 if(s->avctx->debug&FF_DEBUG_MMCO)
4205 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
4206 if(pic->frame_num == frame_num){
4207 h->short_ref[i]= NULL;
4208 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
4209 h->short_ref_count--;
4218 * @return the removed picture or NULL if an error occurs
4220 static Picture * remove_long(H264Context *h, int i){
4223 pic= h->long_ref[i];
4224 h->long_ref[i]= NULL;
4225 if(pic) h->long_ref_count--;
4231 * print short term list
4233 static void print_short_term(H264Context *h) {
4235 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4236 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
4237 for(i=0; i<h->short_ref_count; i++){
4238 Picture *pic= h->short_ref[i];
4239 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4245 * print long term list
4247 static void print_long_term(H264Context *h) {
4249 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4250 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
4251 for(i = 0; i < 16; i++){
4252 Picture *pic= h->long_ref[i];
4254 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4261 * Executes the reference picture marking (memory management control operations).
4263 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
4264 MpegEncContext * const s = &h->s;
4266 int current_is_long=0;
4269 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
4270 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4272 for(i=0; i<mmco_count; i++){
4273 if(s->avctx->debug&FF_DEBUG_MMCO)
4274 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4276 switch(mmco[i].opcode){
4277 case MMCO_SHORT2UNUSED:
4278 pic= remove_short(h, mmco[i].short_frame_num);
4280 unreference_pic(h, pic);
4281 else if(s->avctx->debug&FF_DEBUG_MMCO)
4282 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4284 case MMCO_SHORT2LONG:
4285 pic= remove_long(h, mmco[i].long_index);
4286 if(pic) unreference_pic(h, pic);
4288 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4289 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4290 h->long_ref_count++;
4292 case MMCO_LONG2UNUSED:
4293 pic= remove_long(h, mmco[i].long_index);
4295 unreference_pic(h, pic);
4296 else if(s->avctx->debug&FF_DEBUG_MMCO)
4297 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4300 pic= remove_long(h, mmco[i].long_index);
4301 if(pic) unreference_pic(h, pic);
4303 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4304 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4305 h->long_ref_count++;
4309 case MMCO_SET_MAX_LONG:
4310 assert(mmco[i].long_index <= 16);
4311 // just remove the long term which index is greater than new max
4312 for(j = mmco[i].long_index; j<16; j++){
4313 pic = remove_long(h, j);
4314 if (pic) unreference_pic(h, pic);
4318 while(h->short_ref_count){
4319 pic= remove_short(h, h->short_ref[0]->frame_num);
4320 unreference_pic(h, pic);
4322 for(j = 0; j < 16; j++) {
4323 pic= remove_long(h, j);
4324 if(pic) unreference_pic(h, pic);
4331 if(!current_is_long){
4332 pic= remove_short(h, s->current_picture_ptr->frame_num);
4334 unreference_pic(h, pic);
4335 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4338 if(h->short_ref_count)
4339 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4341 h->short_ref[0]= s->current_picture_ptr;
4342 h->short_ref[0]->long_ref=0;
4343 h->short_ref_count++;
4346 print_short_term(h);
4351 static int decode_ref_pic_marking(H264Context *h){
4352 MpegEncContext * const s = &h->s;
4355 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4356 s->broken_link= get_bits1(&s->gb) -1;
4357 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4358 if(h->mmco[0].long_index == -1)
4361 h->mmco[0].opcode= MMCO_LONG;
4365 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4366 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4367 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4369 h->mmco[i].opcode= opcode;
4370 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4371 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4372 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4373 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4377 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4378 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4379 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4380 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4385 if(opcode > MMCO_LONG){
4386 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4389 if(opcode == MMCO_END)
4394 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4396 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4397 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4398 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4408 static int init_poc(H264Context *h){
4409 MpegEncContext * const s = &h->s;
4410 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4413 if(h->nal_unit_type == NAL_IDR_SLICE){
4414 h->frame_num_offset= 0;
4416 if(h->frame_num < h->prev_frame_num)
4417 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4419 h->frame_num_offset= h->prev_frame_num_offset;
4422 if(h->sps.poc_type==0){
4423 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4425 if(h->nal_unit_type == NAL_IDR_SLICE){
4430 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4431 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4432 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4433 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4435 h->poc_msb = h->prev_poc_msb;
4436 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4438 field_poc[1] = h->poc_msb + h->poc_lsb;
4439 if(s->picture_structure == PICT_FRAME)
4440 field_poc[1] += h->delta_poc_bottom;
4441 }else if(h->sps.poc_type==1){
4442 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4445 if(h->sps.poc_cycle_length != 0)
4446 abs_frame_num = h->frame_num_offset + h->frame_num;
4450 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4453 expected_delta_per_poc_cycle = 0;
4454 for(i=0; i < h->sps.poc_cycle_length; i++)
4455 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4457 if(abs_frame_num > 0){
4458 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4459 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4461 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4462 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4463 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4467 if(h->nal_ref_idc == 0)
4468 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4470 field_poc[0] = expectedpoc + h->delta_poc[0];
4471 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4473 if(s->picture_structure == PICT_FRAME)
4474 field_poc[1] += h->delta_poc[1];
4477 if(h->nal_unit_type == NAL_IDR_SLICE){
4480 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4481 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4487 if(s->picture_structure != PICT_BOTTOM_FIELD)
4488 s->current_picture_ptr->field_poc[0]= field_poc[0];
4489 if(s->picture_structure != PICT_TOP_FIELD)
4490 s->current_picture_ptr->field_poc[1]= field_poc[1];
4491 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4492 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4498 * decodes a slice header.
4499 * this will allso call MPV_common_init() and frame_start() as needed
4501 static int decode_slice_header(H264Context *h){
4502 MpegEncContext * const s = &h->s;
4503 int first_mb_in_slice, pps_id;
4504 int num_ref_idx_active_override_flag;
4505 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4507 int default_ref_list_done = 0;
4509 s->current_picture.reference= h->nal_ref_idc != 0;
4510 s->dropable= h->nal_ref_idc == 0;
4512 first_mb_in_slice= get_ue_golomb(&s->gb);
4514 slice_type= get_ue_golomb(&s->gb);
4516 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4521 h->slice_type_fixed=1;
4523 h->slice_type_fixed=0;
4525 slice_type= slice_type_map[ slice_type ];
4526 if (slice_type == I_TYPE
4527 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4528 default_ref_list_done = 1;
4530 h->slice_type= slice_type;
4532 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4534 pps_id= get_ue_golomb(&s->gb);
4536 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4539 h->pps= h->pps_buffer[pps_id];
4540 if(h->pps.slice_group_count == 0){
4541 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4545 h->sps= h->sps_buffer[ h->pps.sps_id ];
4546 if(h->sps.log2_max_frame_num == 0){
4547 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4551 if(h->dequant_coeff_pps != pps_id){
4552 h->dequant_coeff_pps = pps_id;
4553 init_dequant_tables(h);
4556 s->mb_width= h->sps.mb_width;
4557 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4559 h->b_stride= s->mb_width*4;
4560 h->b8_stride= s->mb_width*2;
4562 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4563 if(h->sps.frame_mbs_only_flag)
4564 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4566 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4568 if (s->context_initialized
4569 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4573 if (!s->context_initialized) {
4574 if (MPV_common_init(s) < 0)
4577 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4578 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4579 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4582 for(i=0; i<16; i++){
4583 #define T(x) (x>>2) | ((x<<2) & 0xF)
4584 h->zigzag_scan[i] = T(zigzag_scan[i]);
4585 h-> field_scan[i] = T( field_scan[i]);
4589 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4590 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4591 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4592 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4593 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4596 for(i=0; i<64; i++){
4597 #define T(x) (x>>3) | ((x&7)<<3)
4598 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4599 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4600 h->field_scan8x8[i] = T(field_scan8x8[i]);
4601 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4605 if(h->sps.transform_bypass){ //FIXME same ugly
4606 h->zigzag_scan_q0 = zigzag_scan;
4607 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4608 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4609 h->field_scan_q0 = field_scan;
4610 h->field_scan8x8_q0 = field_scan8x8;
4611 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4613 h->zigzag_scan_q0 = h->zigzag_scan;
4614 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4615 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4616 h->field_scan_q0 = h->field_scan;
4617 h->field_scan8x8_q0 = h->field_scan8x8;
4618 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4623 s->avctx->width = s->width;
4624 s->avctx->height = s->height;
4625 s->avctx->sample_aspect_ratio= h->sps.sar;
4626 if(!s->avctx->sample_aspect_ratio.den)
4627 s->avctx->sample_aspect_ratio.den = 1;
4629 if(h->sps.timing_info_present_flag){
4630 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4631 if(h->x264_build > 0 && h->x264_build < 44)
4632 s->avctx->time_base.den *= 2;
4633 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4634 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4638 if(h->slice_num == 0){
4639 if(frame_start(h) < 0)
4643 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4644 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4647 h->mb_aff_frame = 0;
4648 if(h->sps.frame_mbs_only_flag){
4649 s->picture_structure= PICT_FRAME;
4651 if(get_bits1(&s->gb)) { //field_pic_flag
4652 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4653 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4655 s->picture_structure= PICT_FRAME;
4656 h->mb_aff_frame = h->sps.mb_aff;
4660 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4661 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4662 if(s->mb_y >= s->mb_height){
4666 if(s->picture_structure==PICT_FRAME){
4667 h->curr_pic_num= h->frame_num;
4668 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4670 h->curr_pic_num= 2*h->frame_num;
4671 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4674 if(h->nal_unit_type == NAL_IDR_SLICE){
4675 get_ue_golomb(&s->gb); /* idr_pic_id */
4678 if(h->sps.poc_type==0){
4679 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4681 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4682 h->delta_poc_bottom= get_se_golomb(&s->gb);
4686 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4687 h->delta_poc[0]= get_se_golomb(&s->gb);
4689 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4690 h->delta_poc[1]= get_se_golomb(&s->gb);
4695 if(h->pps.redundant_pic_cnt_present){
4696 h->redundant_pic_count= get_ue_golomb(&s->gb);
4699 //set defaults, might be overriden a few line later
4700 h->ref_count[0]= h->pps.ref_count[0];
4701 h->ref_count[1]= h->pps.ref_count[1];
4703 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4704 if(h->slice_type == B_TYPE){
4705 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4706 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4707 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4709 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4711 if(num_ref_idx_active_override_flag){
4712 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4713 if(h->slice_type==B_TYPE)
4714 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4716 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4717 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4723 if(!default_ref_list_done){
4724 fill_default_ref_list(h);
4727 if(decode_ref_pic_list_reordering(h) < 0)
4730 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4731 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4732 pred_weight_table(h);
4733 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4734 implicit_weight_table(h);
4738 if(s->current_picture.reference)
4739 decode_ref_pic_marking(h);
4742 fill_mbaff_ref_list(h);
4744 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4745 h->cabac_init_idc = get_ue_golomb(&s->gb);
4747 h->last_qscale_diff = 0;
4748 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4749 if(s->qscale<0 || s->qscale>51){
4750 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4753 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4754 //FIXME qscale / qp ... stuff
4755 if(h->slice_type == SP_TYPE){
4756 get_bits1(&s->gb); /* sp_for_switch_flag */
4758 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4759 get_se_golomb(&s->gb); /* slice_qs_delta */
4762 h->deblocking_filter = 1;
4763 h->slice_alpha_c0_offset = 0;
4764 h->slice_beta_offset = 0;
4765 if( h->pps.deblocking_filter_parameters_present ) {
4766 h->deblocking_filter= get_ue_golomb(&s->gb);
4767 if(h->deblocking_filter < 2)
4768 h->deblocking_filter^= 1; // 1<->0
4770 if( h->deblocking_filter ) {
4771 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4772 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4775 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4776 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4777 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4778 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4779 h->deblocking_filter= 0;
4782 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4783 slice_group_change_cycle= get_bits(&s->gb, ?);
4788 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4789 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4791 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4792 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4794 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4796 av_get_pict_type_char(h->slice_type),
4797 pps_id, h->frame_num,
4798 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4799 h->ref_count[0], h->ref_count[1],
4801 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4803 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4807 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
4808 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4809 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4811 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4812 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4821 static inline int get_level_prefix(GetBitContext *gb){
4825 OPEN_READER(re, gb);
4826 UPDATE_CACHE(re, gb);
4827 buf=GET_CACHE(re, gb);
4829 log= 32 - av_log2(buf);
4831 print_bin(buf>>(32-log), log);
4832 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4835 LAST_SKIP_BITS(re, gb, log);
4836 CLOSE_READER(re, gb);
4841 static inline int get_dct8x8_allowed(H264Context *h){
4844 if(!IS_SUB_8X8(h->sub_mb_type[i])
4845 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4852 * decodes a residual block.
4853 * @param n block index
4854 * @param scantable scantable
4855 * @param max_coeff number of coefficients in the block
4856 * @return <0 if an error occured
4858 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4859 MpegEncContext * const s = &h->s;
4860 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4862 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4864 //FIXME put trailing_onex into the context
4866 if(n == CHROMA_DC_BLOCK_INDEX){
4867 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4868 total_coeff= coeff_token>>2;
4870 if(n == LUMA_DC_BLOCK_INDEX){
4871 total_coeff= pred_non_zero_count(h, 0);
4872 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4873 total_coeff= coeff_token>>2;
4875 total_coeff= pred_non_zero_count(h, n);
4876 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4877 total_coeff= coeff_token>>2;
4878 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4882 //FIXME set last_non_zero?
4887 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff<0)\n", s->mb_x, s->mb_y);
4891 trailing_ones= coeff_token&3;
4892 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4893 assert(total_coeff<=16);
4895 for(i=0; i<trailing_ones; i++){
4896 level[i]= 1 - 2*get_bits1(gb);
4900 int level_code, mask;
4901 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4902 int prefix= get_level_prefix(gb);
4904 //first coefficient has suffix_length equal to 0 or 1
4905 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4907 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4909 level_code= (prefix<<suffix_length); //part
4910 }else if(prefix==14){
4912 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4914 level_code= prefix + get_bits(gb, 4); //part
4915 }else if(prefix==15){
4916 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4917 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4919 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4923 if(trailing_ones < 3) level_code += 2;
4928 mask= -(level_code&1);
4929 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4932 //remaining coefficients have suffix_length > 0
4933 for(;i<total_coeff;i++) {
4934 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4935 prefix = get_level_prefix(gb);
4937 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4938 }else if(prefix==15){
4939 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4941 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4944 mask= -(level_code&1);
4945 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4946 if(level_code > suffix_limit[suffix_length])
4951 if(total_coeff == max_coeff)
4954 if(n == CHROMA_DC_BLOCK_INDEX)
4955 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4957 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4960 coeff_num = zeros_left + total_coeff - 1;
4961 j = scantable[coeff_num];
4963 block[j] = level[0];
4964 for(i=1;i<total_coeff;i++) {
4967 else if(zeros_left < 7){
4968 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4970 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4972 zeros_left -= run_before;
4973 coeff_num -= 1 + run_before;
4974 j= scantable[ coeff_num ];
4979 block[j] = (level[0] * qmul[j] + 32)>>6;
4980 for(i=1;i<total_coeff;i++) {
4983 else if(zeros_left < 7){
4984 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4986 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4988 zeros_left -= run_before;
4989 coeff_num -= 1 + run_before;
4990 j= scantable[ coeff_num ];
4992 block[j]= (level[i] * qmul[j] + 32)>>6;
4997 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
5004 static void predict_field_decoding_flag(H264Context *h){
5005 MpegEncContext * const s = &h->s;
5006 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5007 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
5008 ? s->current_picture.mb_type[mb_xy-1]
5009 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
5010 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
5012 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
5016 * decodes a P_SKIP or B_SKIP macroblock
5018 static void decode_mb_skip(H264Context *h){
5019 MpegEncContext * const s = &h->s;
5020 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5023 memset(h->non_zero_count[mb_xy], 0, 16);
5024 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
5027 mb_type|= MB_TYPE_INTERLACED;
5029 if( h->slice_type == B_TYPE )
5031 // just for fill_caches. pred_direct_motion will set the real mb_type
5032 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
5034 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5035 pred_direct_motion(h, &mb_type);
5036 mb_type|= MB_TYPE_SKIP;
5041 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
5043 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5044 pred_pskip_motion(h, &mx, &my);
5045 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
5046 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
5049 write_back_motion(h, mb_type);
5050 s->current_picture.mb_type[mb_xy]= mb_type;
5051 s->current_picture.qscale_table[mb_xy]= s->qscale;
5052 h->slice_table[ mb_xy ]= h->slice_num;
5053 h->prev_mb_skipped= 1;
5057 * decodes a macroblock
5058 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5060 static int decode_mb_cavlc(H264Context *h){
5061 MpegEncContext * const s = &h->s;
5062 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5063 int mb_type, partition_count, cbp;
5064 int dct8x8_allowed= h->pps.transform_8x8_mode;
5066 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
5068 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5069 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
5071 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
5072 if(s->mb_skip_run==-1)
5073 s->mb_skip_run= get_ue_golomb(&s->gb);
5075 if (s->mb_skip_run--) {
5076 if(FRAME_MBAFF && (s->mb_y&1) == 0){
5077 if(s->mb_skip_run==0)
5078 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5080 predict_field_decoding_flag(h);
5087 if( (s->mb_y&1) == 0 )
5088 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5090 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5092 h->prev_mb_skipped= 0;
5094 mb_type= get_ue_golomb(&s->gb);
5095 if(h->slice_type == B_TYPE){
5097 partition_count= b_mb_type_info[mb_type].partition_count;
5098 mb_type= b_mb_type_info[mb_type].type;
5101 goto decode_intra_mb;
5103 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
5105 partition_count= p_mb_type_info[mb_type].partition_count;
5106 mb_type= p_mb_type_info[mb_type].type;
5109 goto decode_intra_mb;
5112 assert(h->slice_type == I_TYPE);
5115 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
5119 cbp= i_mb_type_info[mb_type].cbp;
5120 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5121 mb_type= i_mb_type_info[mb_type].type;
5125 mb_type |= MB_TYPE_INTERLACED;
5127 h->slice_table[ mb_xy ]= h->slice_num;
5129 if(IS_INTRA_PCM(mb_type)){
5132 // we assume these blocks are very rare so we dont optimize it
5133 align_get_bits(&s->gb);
5135 // The pixels are stored in the same order as levels in h->mb array.
5136 for(y=0; y<16; y++){
5137 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5138 for(x=0; x<16; x++){
5139 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5140 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
5144 const int index= 256 + 4*(y&3) + 32*(y>>2);
5146 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5147 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5151 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5153 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5154 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5158 // In deblocking, the quantizer is 0
5159 s->current_picture.qscale_table[mb_xy]= 0;
5160 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5161 // All coeffs are present
5162 memset(h->non_zero_count[mb_xy], 16, 16);
5164 s->current_picture.mb_type[mb_xy]= mb_type;
5169 h->ref_count[0] <<= 1;
5170 h->ref_count[1] <<= 1;
5173 fill_caches(h, mb_type, 0);
5176 if(IS_INTRA(mb_type)){
5177 // init_top_left_availability(h);
5178 if(IS_INTRA4x4(mb_type)){
5181 if(dct8x8_allowed && get_bits1(&s->gb)){
5182 mb_type |= MB_TYPE_8x8DCT;
5186 // fill_intra4x4_pred_table(h);
5187 for(i=0; i<16; i+=di){
5188 int mode= pred_intra_mode(h, i);
5190 if(!get_bits1(&s->gb)){
5191 const int rem_mode= get_bits(&s->gb, 3);
5192 mode = rem_mode + (rem_mode >= mode);
5196 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5198 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
5200 write_back_intra_pred_mode(h);
5201 if( check_intra4x4_pred_mode(h) < 0)
5204 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
5205 if(h->intra16x16_pred_mode < 0)
5208 h->chroma_pred_mode= get_ue_golomb(&s->gb);
5210 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
5211 if(h->chroma_pred_mode < 0)
5213 }else if(partition_count==4){
5214 int i, j, sub_partition_count[4], list, ref[2][4];
5216 if(h->slice_type == B_TYPE){
5218 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5219 if(h->sub_mb_type[i] >=13){
5220 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5223 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5224 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5226 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5227 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5228 pred_direct_motion(h, &mb_type);
5229 h->ref_cache[0][scan8[4]] =
5230 h->ref_cache[1][scan8[4]] =
5231 h->ref_cache[0][scan8[12]] =
5232 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5235 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
5237 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5238 if(h->sub_mb_type[i] >=4){
5239 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5242 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5243 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5247 for(list=0; list<2; list++){
5248 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5249 if(ref_count == 0) continue;
5251 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5252 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5253 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
5262 dct8x8_allowed = get_dct8x8_allowed(h);
5264 for(list=0; list<2; list++){
5265 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5266 if(ref_count == 0) continue;
5269 if(IS_DIRECT(h->sub_mb_type[i])) {
5270 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
5273 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
5274 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5276 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5277 const int sub_mb_type= h->sub_mb_type[i];
5278 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5279 for(j=0; j<sub_partition_count[i]; j++){
5281 const int index= 4*i + block_width*j;
5282 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5283 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
5284 mx += get_se_golomb(&s->gb);
5285 my += get_se_golomb(&s->gb);
5286 tprintf("final mv:%d %d\n", mx, my);
5288 if(IS_SUB_8X8(sub_mb_type)){
5289 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5290 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5291 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5292 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5293 }else if(IS_SUB_8X4(sub_mb_type)){
5294 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5295 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5296 }else if(IS_SUB_4X8(sub_mb_type)){
5297 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5298 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5300 assert(IS_SUB_4X4(sub_mb_type));
5301 mv_cache[ 0 ][0]= mx;
5302 mv_cache[ 0 ][1]= my;
5306 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5312 }else if(IS_DIRECT(mb_type)){
5313 pred_direct_motion(h, &mb_type);
5314 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5316 int list, mx, my, i;
5317 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5318 if(IS_16X16(mb_type)){
5319 for(list=0; list<2; list++){
5320 if(h->ref_count[list]>0){
5321 if(IS_DIR(mb_type, 0, list)){
5322 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5323 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5325 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
5328 for(list=0; list<2; list++){
5329 if(IS_DIR(mb_type, 0, list)){
5330 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5331 mx += get_se_golomb(&s->gb);
5332 my += get_se_golomb(&s->gb);
5333 tprintf("final mv:%d %d\n", mx, my);
5335 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5337 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5340 else if(IS_16X8(mb_type)){
5341 for(list=0; list<2; list++){
5342 if(h->ref_count[list]>0){
5344 if(IS_DIR(mb_type, i, list)){
5345 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5346 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5348 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5352 for(list=0; list<2; list++){
5354 if(IS_DIR(mb_type, i, list)){
5355 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5356 mx += get_se_golomb(&s->gb);
5357 my += get_se_golomb(&s->gb);
5358 tprintf("final mv:%d %d\n", mx, my);
5360 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5362 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5366 assert(IS_8X16(mb_type));
5367 for(list=0; list<2; list++){
5368 if(h->ref_count[list]>0){
5370 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5371 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5372 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5374 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5378 for(list=0; list<2; list++){
5380 if(IS_DIR(mb_type, i, list)){
5381 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5382 mx += get_se_golomb(&s->gb);
5383 my += get_se_golomb(&s->gb);
5384 tprintf("final mv:%d %d\n", mx, my);
5386 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5388 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5394 if(IS_INTER(mb_type))
5395 write_back_motion(h, mb_type);
5397 if(!IS_INTRA16x16(mb_type)){
5398 cbp= get_ue_golomb(&s->gb);
5400 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5404 if(IS_INTRA4x4(mb_type))
5405 cbp= golomb_to_intra4x4_cbp[cbp];
5407 cbp= golomb_to_inter_cbp[cbp];
5411 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5412 if(get_bits1(&s->gb))
5413 mb_type |= MB_TYPE_8x8DCT;
5415 s->current_picture.mb_type[mb_xy]= mb_type;
5417 if(cbp || IS_INTRA16x16(mb_type)){
5418 int i8x8, i4x4, chroma_idx;
5419 int chroma_qp, dquant;
5420 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5421 const uint8_t *scan, *scan8x8, *dc_scan;
5423 // fill_non_zero_count_cache(h);
5425 if(IS_INTERLACED(mb_type)){
5426 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5427 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5428 dc_scan= luma_dc_field_scan;
5430 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5431 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5432 dc_scan= luma_dc_zigzag_scan;
5435 dquant= get_se_golomb(&s->gb);
5437 if( dquant > 25 || dquant < -26 ){
5438 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5442 s->qscale += dquant;
5443 if(((unsigned)s->qscale) > 51){
5444 if(s->qscale<0) s->qscale+= 52;
5445 else s->qscale-= 52;
5448 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5449 if(IS_INTRA16x16(mb_type)){
5450 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5451 return -1; //FIXME continue if partitioned and other return -1 too
5454 assert((cbp&15) == 0 || (cbp&15) == 15);
5457 for(i8x8=0; i8x8<4; i8x8++){
5458 for(i4x4=0; i4x4<4; i4x4++){
5459 const int index= i4x4 + 4*i8x8;
5460 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5466 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5469 for(i8x8=0; i8x8<4; i8x8++){
5470 if(cbp & (1<<i8x8)){
5471 if(IS_8x8DCT(mb_type)){
5472 DCTELEM *buf = &h->mb[64*i8x8];
5474 for(i4x4=0; i4x4<4; i4x4++){
5475 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5476 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5479 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5480 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5482 for(i4x4=0; i4x4<4; i4x4++){
5483 const int index= i4x4 + 4*i8x8;
5485 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5491 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5492 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5498 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5499 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5505 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5506 for(i4x4=0; i4x4<4; i4x4++){
5507 const int index= 16 + 4*chroma_idx + i4x4;
5508 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5514 uint8_t * const nnz= &h->non_zero_count_cache[0];
5515 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5516 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5519 uint8_t * const nnz= &h->non_zero_count_cache[0];
5520 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5521 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5522 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5524 s->current_picture.qscale_table[mb_xy]= s->qscale;
5525 write_back_non_zero_count(h);
5528 h->ref_count[0] >>= 1;
5529 h->ref_count[1] >>= 1;
5535 static int decode_cabac_field_decoding_flag(H264Context *h) {
5536 MpegEncContext * const s = &h->s;
5537 const int mb_x = s->mb_x;
5538 const int mb_y = s->mb_y & ~1;
5539 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5540 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5542 unsigned int ctx = 0;
5544 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5547 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5551 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5554 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5555 uint8_t *state= &h->cabac_state[ctx_base];
5559 MpegEncContext * const s = &h->s;
5560 const int mba_xy = h->left_mb_xy[0];
5561 const int mbb_xy = h->top_mb_xy;
5563 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5565 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5567 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5568 return 0; /* I4x4 */
5571 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5572 return 0; /* I4x4 */
5575 if( get_cabac_terminate( &h->cabac ) )
5576 return 25; /* PCM */
5578 mb_type = 1; /* I16x16 */
5579 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5580 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5581 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5582 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5583 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5587 static int decode_cabac_mb_type( H264Context *h ) {
5588 MpegEncContext * const s = &h->s;
5590 if( h->slice_type == I_TYPE ) {
5591 return decode_cabac_intra_mb_type(h, 3, 1);
5592 } else if( h->slice_type == P_TYPE ) {
5593 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5595 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5596 /* P_L0_D16x16, P_8x8 */
5597 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5599 /* P_L0_D8x16, P_L0_D16x8 */
5600 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5603 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5605 } else if( h->slice_type == B_TYPE ) {
5606 const int mba_xy = h->left_mb_xy[0];
5607 const int mbb_xy = h->top_mb_xy;
5611 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5613 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5616 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5617 return 0; /* B_Direct_16x16 */
5619 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5620 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5623 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5624 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5625 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5626 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5628 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5629 else if( bits == 13 ) {
5630 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5631 } else if( bits == 14 )
5632 return 11; /* B_L1_L0_8x16 */
5633 else if( bits == 15 )
5634 return 22; /* B_8x8 */
5636 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5637 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5639 /* TODO SI/SP frames? */
5644 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5645 MpegEncContext * const s = &h->s;
5649 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5650 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5653 && h->slice_table[mba_xy] == h->slice_num
5654 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5655 mba_xy += s->mb_stride;
5657 mbb_xy = mb_xy - s->mb_stride;
5659 && h->slice_table[mbb_xy] == h->slice_num
5660 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5661 mbb_xy -= s->mb_stride;
5663 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5665 int mb_xy = mb_x + mb_y*s->mb_stride;
5667 mbb_xy = mb_xy - s->mb_stride;
5670 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5672 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5675 if( h->slice_type == B_TYPE )
5677 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5680 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5683 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5686 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5687 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5688 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5690 if( mode >= pred_mode )
5696 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5697 const int mba_xy = h->left_mb_xy[0];
5698 const int mbb_xy = h->top_mb_xy;
5702 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5703 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5706 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5709 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5712 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5714 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5720 static const uint8_t block_idx_x[16] = {
5721 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5723 static const uint8_t block_idx_y[16] = {
5724 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5726 static const uint8_t block_idx_xy[4][4] = {
5733 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5738 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5740 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5743 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5748 x = block_idx_x[4*i8x8];
5749 y = block_idx_y[4*i8x8];
5753 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5754 cbp_a = h->left_cbp;
5755 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5761 /* No need to test for skip as we put 0 for skip block */
5762 /* No need to test for IPCM as we put 1 for IPCM block */
5764 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5765 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5770 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5771 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5775 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5781 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5785 cbp_a = (h->left_cbp>>4)&0x03;
5786 cbp_b = (h-> top_cbp>>4)&0x03;
5789 if( cbp_a > 0 ) ctx++;
5790 if( cbp_b > 0 ) ctx += 2;
5791 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5795 if( cbp_a == 2 ) ctx++;
5796 if( cbp_b == 2 ) ctx += 2;
5797 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5799 static int decode_cabac_mb_dqp( H264Context *h) {
5800 MpegEncContext * const s = &h->s;
5806 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5808 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5810 if( h->last_qscale_diff != 0 )
5813 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5819 if(val > 102) //prevent infinite loop
5826 return -(val + 1)/2;
5828 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5829 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5831 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5833 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5837 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5839 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5840 return 0; /* B_Direct_8x8 */
5841 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5842 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5844 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5845 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5846 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5849 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5850 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5854 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5855 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5858 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5859 int refa = h->ref_cache[list][scan8[n] - 1];
5860 int refb = h->ref_cache[list][scan8[n] - 8];
5864 if( h->slice_type == B_TYPE) {
5865 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5867 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5876 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5886 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5887 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5888 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5889 int ctxbase = (l == 0) ? 40 : 47;
5894 else if( amvd > 32 )
5899 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5904 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5912 while( get_cabac_bypass( &h->cabac ) ) {
5917 if( get_cabac_bypass( &h->cabac ) )
5921 return get_cabac_bypass_sign( &h->cabac, -mvd );
5924 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5929 nza = h->left_cbp&0x100;
5930 nzb = h-> top_cbp&0x100;
5931 } else if( cat == 1 || cat == 2 ) {
5932 nza = h->non_zero_count_cache[scan8[idx] - 1];
5933 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5934 } else if( cat == 3 ) {
5935 nza = (h->left_cbp>>(6+idx))&0x01;
5936 nzb = (h-> top_cbp>>(6+idx))&0x01;
5939 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5940 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5949 return ctx + 4 * cat;
5952 static const __attribute((used)) uint8_t last_coeff_flag_offset_8x8[63] = {
5953 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5954 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5955 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5956 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5959 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5960 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5961 static const int significant_coeff_flag_offset[2][6] = {
5962 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5963 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5965 static const int last_coeff_flag_offset[2][6] = {
5966 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5967 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5969 static const int coeff_abs_level_m1_offset[6] = {
5970 227+0, 227+10, 227+20, 227+30, 227+39, 426
5972 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5973 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5974 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5975 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5976 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5977 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5978 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5979 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5980 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5986 int coeff_count = 0;
5989 int abslevelgt1 = 0;
5991 uint8_t *significant_coeff_ctx_base;
5992 uint8_t *last_coeff_ctx_base;
5993 uint8_t *abs_level_m1_ctx_base;
5996 #define CABAC_ON_STACK
5998 #ifdef CABAC_ON_STACK
6001 cc.range = h->cabac.range;
6002 cc.low = h->cabac.low;
6003 cc.bytestream= h->cabac.bytestream;
6005 #define CC &h->cabac
6009 /* cat: 0-> DC 16x16 n = 0
6010 * 1-> AC 16x16 n = luma4x4idx
6011 * 2-> Luma4x4 n = luma4x4idx
6012 * 3-> DC Chroma n = iCbCr
6013 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
6014 * 5-> Luma8x8 n = 4 * luma8x8idx
6017 /* read coded block flag */
6019 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
6020 if( cat == 1 || cat == 2 )
6021 h->non_zero_count_cache[scan8[n]] = 0;
6023 h->non_zero_count_cache[scan8[16+n]] = 0;
6024 #ifdef CABAC_ON_STACK
6025 h->cabac.range = cc.range ;
6026 h->cabac.low = cc.low ;
6027 h->cabac.bytestream= cc.bytestream;
6033 significant_coeff_ctx_base = h->cabac_state
6034 + significant_coeff_flag_offset[MB_FIELD][cat];
6035 last_coeff_ctx_base = h->cabac_state
6036 + last_coeff_flag_offset[MB_FIELD][cat];
6037 abs_level_m1_ctx_base = h->cabac_state
6038 + coeff_abs_level_m1_offset[cat];
6041 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
6042 for(last= 0; last < coefs; last++) { \
6043 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
6044 if( get_cabac( CC, sig_ctx )) { \
6045 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
6046 index[coeff_count++] = last; \
6047 if( get_cabac( CC, last_ctx ) ) { \
6053 if( last == max_coeff -1 ) {\
6054 index[coeff_count++] = last;\
6056 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
6057 #if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
6058 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
6060 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
6062 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
6064 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
6067 assert(coeff_count > 0);
6070 h->cbp_table[mb_xy] |= 0x100;
6071 else if( cat == 1 || cat == 2 )
6072 h->non_zero_count_cache[scan8[n]] = coeff_count;
6074 h->cbp_table[mb_xy] |= 0x40 << n;
6076 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
6079 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
6082 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
6083 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
6084 int j= scantable[index[coeff_count]];
6086 if( get_cabac( CC, ctx ) == 0 ) {
6088 block[j] = get_cabac_bypass_sign( CC, -1);
6090 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
6096 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
6097 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
6101 if( coeff_abs >= 15 ) {
6103 while( get_cabac_bypass( CC ) ) {
6109 coeff_abs += coeff_abs + get_cabac_bypass( CC );
6115 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
6116 else block[j] = coeff_abs;
6118 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
6119 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
6125 #ifdef CABAC_ON_STACK
6126 h->cabac.range = cc.range ;
6127 h->cabac.low = cc.low ;
6128 h->cabac.bytestream= cc.bytestream;
6133 static void inline compute_mb_neighbors(H264Context *h)
6135 MpegEncContext * const s = &h->s;
6136 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
6137 h->top_mb_xy = mb_xy - s->mb_stride;
6138 h->left_mb_xy[0] = mb_xy - 1;
6140 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
6141 const int top_pair_xy = pair_xy - s->mb_stride;
6142 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
6143 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
6144 const int curr_mb_frame_flag = !MB_FIELD;
6145 const int bottom = (s->mb_y & 1);
6147 ? !curr_mb_frame_flag // bottom macroblock
6148 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
6150 h->top_mb_xy -= s->mb_stride;
6152 if (left_mb_frame_flag != curr_mb_frame_flag) {
6153 h->left_mb_xy[0] = pair_xy - 1;
6160 * decodes a macroblock
6161 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
6163 static int decode_mb_cabac(H264Context *h) {
6164 MpegEncContext * const s = &h->s;
6165 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
6166 int mb_type, partition_count, cbp = 0;
6167 int dct8x8_allowed= h->pps.transform_8x8_mode;
6169 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
6171 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
6172 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
6174 /* a skipped mb needs the aff flag from the following mb */
6175 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
6176 predict_field_decoding_flag(h);
6177 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
6178 skip = h->next_mb_skipped;
6180 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
6181 /* read skip flags */
6183 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
6184 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
6185 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
6186 if(h->next_mb_skipped)
6187 predict_field_decoding_flag(h);
6189 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6194 h->cbp_table[mb_xy] = 0;
6195 h->chroma_pred_mode_table[mb_xy] = 0;
6196 h->last_qscale_diff = 0;
6203 if( (s->mb_y&1) == 0 )
6205 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6207 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
6209 h->prev_mb_skipped = 0;
6211 compute_mb_neighbors(h);
6212 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
6213 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
6217 if( h->slice_type == B_TYPE ) {
6219 partition_count= b_mb_type_info[mb_type].partition_count;
6220 mb_type= b_mb_type_info[mb_type].type;
6223 goto decode_intra_mb;
6225 } else if( h->slice_type == P_TYPE ) {
6227 partition_count= p_mb_type_info[mb_type].partition_count;
6228 mb_type= p_mb_type_info[mb_type].type;
6231 goto decode_intra_mb;
6234 assert(h->slice_type == I_TYPE);
6236 partition_count = 0;
6237 cbp= i_mb_type_info[mb_type].cbp;
6238 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
6239 mb_type= i_mb_type_info[mb_type].type;
6242 mb_type |= MB_TYPE_INTERLACED;
6244 h->slice_table[ mb_xy ]= h->slice_num;
6246 if(IS_INTRA_PCM(mb_type)) {
6250 // We assume these blocks are very rare so we dont optimize it.
6251 // FIXME The two following lines get the bitstream position in the cabac
6252 // decode, I think it should be done by a function in cabac.h (or cabac.c).
6253 ptr= h->cabac.bytestream;
6254 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
6256 // The pixels are stored in the same order as levels in h->mb array.
6257 for(y=0; y<16; y++){
6258 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
6259 for(x=0; x<16; x++){
6260 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
6261 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
6265 const int index= 256 + 4*(y&3) + 32*(y>>2);
6267 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
6268 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6272 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
6274 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
6275 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6279 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6281 // All blocks are present
6282 h->cbp_table[mb_xy] = 0x1ef;
6283 h->chroma_pred_mode_table[mb_xy] = 0;
6284 // In deblocking, the quantizer is 0
6285 s->current_picture.qscale_table[mb_xy]= 0;
6286 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
6287 // All coeffs are present
6288 memset(h->non_zero_count[mb_xy], 16, 16);
6289 s->current_picture.mb_type[mb_xy]= mb_type;
6294 h->ref_count[0] <<= 1;
6295 h->ref_count[1] <<= 1;
6298 fill_caches(h, mb_type, 0);
6300 if( IS_INTRA( mb_type ) ) {
6302 if( IS_INTRA4x4( mb_type ) ) {
6303 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6304 mb_type |= MB_TYPE_8x8DCT;
6305 for( i = 0; i < 16; i+=4 ) {
6306 int pred = pred_intra_mode( h, i );
6307 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6308 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6311 for( i = 0; i < 16; i++ ) {
6312 int pred = pred_intra_mode( h, i );
6313 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6315 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6318 write_back_intra_pred_mode(h);
6319 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6321 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6322 if( h->intra16x16_pred_mode < 0 ) return -1;
6324 h->chroma_pred_mode_table[mb_xy] =
6325 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6327 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
6328 if( h->chroma_pred_mode < 0 ) return -1;
6329 } else if( partition_count == 4 ) {
6330 int i, j, sub_partition_count[4], list, ref[2][4];
6332 if( h->slice_type == B_TYPE ) {
6333 for( i = 0; i < 4; i++ ) {
6334 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6335 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6336 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6338 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
6339 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
6340 pred_direct_motion(h, &mb_type);
6341 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6342 for( i = 0; i < 4; i++ )
6343 if( IS_DIRECT(h->sub_mb_type[i]) )
6344 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6348 for( i = 0; i < 4; i++ ) {
6349 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6350 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6351 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6355 for( list = 0; list < 2; list++ ) {
6356 if( h->ref_count[list] > 0 ) {
6357 for( i = 0; i < 4; i++ ) {
6358 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6359 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6360 if( h->ref_count[list] > 1 )
6361 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6367 h->ref_cache[list][ scan8[4*i]+1 ]=
6368 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6374 dct8x8_allowed = get_dct8x8_allowed(h);
6376 for(list=0; list<2; list++){
6378 if(IS_DIRECT(h->sub_mb_type[i])){
6379 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6382 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6384 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6385 const int sub_mb_type= h->sub_mb_type[i];
6386 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6387 for(j=0; j<sub_partition_count[i]; j++){
6390 const int index= 4*i + block_width*j;
6391 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6392 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6393 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6395 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6396 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6397 tprintf("final mv:%d %d\n", mx, my);
6399 if(IS_SUB_8X8(sub_mb_type)){
6400 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
6401 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6402 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
6403 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6405 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
6406 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6407 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
6408 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6409 }else if(IS_SUB_8X4(sub_mb_type)){
6410 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
6411 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
6413 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
6414 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
6415 }else if(IS_SUB_4X8(sub_mb_type)){
6416 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
6417 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
6419 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
6420 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
6422 assert(IS_SUB_4X4(sub_mb_type));
6423 mv_cache[ 0 ][0]= mx;
6424 mv_cache[ 0 ][1]= my;
6426 mvd_cache[ 0 ][0]= mx - mpx;
6427 mvd_cache[ 0 ][1]= my - mpy;
6431 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6432 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6433 p[0] = p[1] = p[8] = p[9] = 0;
6434 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6438 } else if( IS_DIRECT(mb_type) ) {
6439 pred_direct_motion(h, &mb_type);
6440 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6441 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6442 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6444 int list, mx, my, i, mpx, mpy;
6445 if(IS_16X16(mb_type)){
6446 for(list=0; list<2; list++){
6447 if(IS_DIR(mb_type, 0, list)){
6448 if(h->ref_count[list] > 0 ){
6449 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6450 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6453 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6455 for(list=0; list<2; list++){
6456 if(IS_DIR(mb_type, 0, list)){
6457 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6459 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6460 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6461 tprintf("final mv:%d %d\n", mx, my);
6463 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6464 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6466 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6469 else if(IS_16X8(mb_type)){
6470 for(list=0; list<2; list++){
6471 if(h->ref_count[list]>0){
6473 if(IS_DIR(mb_type, i, list)){
6474 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6475 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6477 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6481 for(list=0; list<2; list++){
6483 if(IS_DIR(mb_type, i, list)){
6484 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6485 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6486 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6487 tprintf("final mv:%d %d\n", mx, my);
6489 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6490 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6492 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6493 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6498 assert(IS_8X16(mb_type));
6499 for(list=0; list<2; list++){
6500 if(h->ref_count[list]>0){
6502 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6503 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6504 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6506 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6510 for(list=0; list<2; list++){
6512 if(IS_DIR(mb_type, i, list)){
6513 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6514 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6515 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6517 tprintf("final mv:%d %d\n", mx, my);
6518 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6519 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6521 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6522 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6529 if( IS_INTER( mb_type ) ) {
6530 h->chroma_pred_mode_table[mb_xy] = 0;
6531 write_back_motion( h, mb_type );
6534 if( !IS_INTRA16x16( mb_type ) ) {
6535 cbp = decode_cabac_mb_cbp_luma( h );
6536 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6539 h->cbp_table[mb_xy] = h->cbp = cbp;
6541 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6542 if( decode_cabac_mb_transform_size( h ) )
6543 mb_type |= MB_TYPE_8x8DCT;
6545 s->current_picture.mb_type[mb_xy]= mb_type;
6547 if( cbp || IS_INTRA16x16( mb_type ) ) {
6548 const uint8_t *scan, *scan8x8, *dc_scan;
6551 if(IS_INTERLACED(mb_type)){
6552 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6553 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6554 dc_scan= luma_dc_field_scan;
6556 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6557 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6558 dc_scan= luma_dc_zigzag_scan;
6561 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6562 if( dqp == INT_MIN ){
6563 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6567 if(((unsigned)s->qscale) > 51){
6568 if(s->qscale<0) s->qscale+= 52;
6569 else s->qscale-= 52;
6571 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6573 if( IS_INTRA16x16( mb_type ) ) {
6575 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6576 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6579 for( i = 0; i < 16; i++ ) {
6580 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6581 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6585 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6589 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6590 if( cbp & (1<<i8x8) ) {
6591 if( IS_8x8DCT(mb_type) ) {
6592 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6593 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6596 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6597 const int index = 4*i8x8 + i4x4;
6598 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6600 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6602 //STOP_TIMER("decode_residual")
6605 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6606 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6613 for( c = 0; c < 2; c++ ) {
6614 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6615 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6622 for( c = 0; c < 2; c++ ) {
6623 for( i = 0; i < 4; i++ ) {
6624 const int index = 16 + 4 * c + i;
6625 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6626 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6631 uint8_t * const nnz= &h->non_zero_count_cache[0];
6632 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6633 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6636 uint8_t * const nnz= &h->non_zero_count_cache[0];
6637 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6638 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6639 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6640 h->last_qscale_diff = 0;
6643 s->current_picture.qscale_table[mb_xy]= s->qscale;
6644 write_back_non_zero_count(h);
6647 h->ref_count[0] >>= 1;
6648 h->ref_count[1] >>= 1;
6655 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6657 const int index_a = qp + h->slice_alpha_c0_offset;
6658 const int alpha = (alpha_table+52)[index_a];
6659 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6664 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6665 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6667 /* 16px edge length, because bS=4 is triggered by being at
6668 * the edge of an intra MB, so all 4 bS are the same */
6669 for( d = 0; d < 16; d++ ) {
6670 const int p0 = pix[-1];
6671 const int p1 = pix[-2];
6672 const int p2 = pix[-3];
6674 const int q0 = pix[0];
6675 const int q1 = pix[1];
6676 const int q2 = pix[2];
6678 if( FFABS( p0 - q0 ) < alpha &&
6679 FFABS( p1 - p0 ) < beta &&
6680 FFABS( q1 - q0 ) < beta ) {
6682 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6683 if( FFABS( p2 - p0 ) < beta)
6685 const int p3 = pix[-4];
6687 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6688 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6689 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6692 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6694 if( FFABS( q2 - q0 ) < beta)
6696 const int q3 = pix[3];
6698 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6699 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6700 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6703 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6707 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6708 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6710 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6716 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6718 const int index_a = qp + h->slice_alpha_c0_offset;
6719 const int alpha = (alpha_table+52)[index_a];
6720 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6725 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6726 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6728 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6732 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6734 for( i = 0; i < 16; i++, pix += stride) {
6740 int bS_index = (i >> 1);
6743 bS_index |= (i & 1);
6746 if( bS[bS_index] == 0 ) {
6750 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6751 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6752 alpha = (alpha_table+52)[index_a];
6753 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6755 if( bS[bS_index] < 4 ) {
6756 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6757 const int p0 = pix[-1];
6758 const int p1 = pix[-2];
6759 const int p2 = pix[-3];
6760 const int q0 = pix[0];
6761 const int q1 = pix[1];
6762 const int q2 = pix[2];
6764 if( FFABS( p0 - q0 ) < alpha &&
6765 FFABS( p1 - p0 ) < beta &&
6766 FFABS( q1 - q0 ) < beta ) {
6770 if( FFABS( p2 - p0 ) < beta ) {
6771 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6774 if( FFABS( q2 - q0 ) < beta ) {
6775 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6779 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6780 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6781 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6782 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6785 const int p0 = pix[-1];
6786 const int p1 = pix[-2];
6787 const int p2 = pix[-3];
6789 const int q0 = pix[0];
6790 const int q1 = pix[1];
6791 const int q2 = pix[2];
6793 if( FFABS( p0 - q0 ) < alpha &&
6794 FFABS( p1 - p0 ) < beta &&
6795 FFABS( q1 - q0 ) < beta ) {
6797 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6798 if( FFABS( p2 - p0 ) < beta)
6800 const int p3 = pix[-4];
6802 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6803 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6804 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6807 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6809 if( FFABS( q2 - q0 ) < beta)
6811 const int q3 = pix[3];
6813 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6814 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6815 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6818 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6822 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6823 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6825 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6830 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6832 for( i = 0; i < 8; i++, pix += stride) {
6840 if( bS[bS_index] == 0 ) {
6844 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6845 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6846 alpha = (alpha_table+52)[index_a];
6847 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6849 if( bS[bS_index] < 4 ) {
6850 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6851 const int p0 = pix[-1];
6852 const int p1 = pix[-2];
6853 const int q0 = pix[0];
6854 const int q1 = pix[1];
6856 if( FFABS( p0 - q0 ) < alpha &&
6857 FFABS( p1 - p0 ) < beta &&
6858 FFABS( q1 - q0 ) < beta ) {
6859 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6861 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6862 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6863 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6866 const int p0 = pix[-1];
6867 const int p1 = pix[-2];
6868 const int q0 = pix[0];
6869 const int q1 = pix[1];
6871 if( FFABS( p0 - q0 ) < alpha &&
6872 FFABS( p1 - p0 ) < beta &&
6873 FFABS( q1 - q0 ) < beta ) {
6875 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6876 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6877 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6883 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6885 const int index_a = qp + h->slice_alpha_c0_offset;
6886 const int alpha = (alpha_table+52)[index_a];
6887 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6888 const int pix_next = stride;
6893 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6894 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6896 /* 16px edge length, see filter_mb_edgev */
6897 for( d = 0; d < 16; d++ ) {
6898 const int p0 = pix[-1*pix_next];
6899 const int p1 = pix[-2*pix_next];
6900 const int p2 = pix[-3*pix_next];
6901 const int q0 = pix[0];
6902 const int q1 = pix[1*pix_next];
6903 const int q2 = pix[2*pix_next];
6905 if( FFABS( p0 - q0 ) < alpha &&
6906 FFABS( p1 - p0 ) < beta &&
6907 FFABS( q1 - q0 ) < beta ) {
6909 const int p3 = pix[-4*pix_next];
6910 const int q3 = pix[ 3*pix_next];
6912 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6913 if( FFABS( p2 - p0 ) < beta) {
6915 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6916 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6917 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6920 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6922 if( FFABS( q2 - q0 ) < beta) {
6924 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6925 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6926 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6929 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6933 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6934 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6936 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6943 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6945 const int index_a = qp + h->slice_alpha_c0_offset;
6946 const int alpha = (alpha_table+52)[index_a];
6947 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6952 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6953 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6955 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6959 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6960 MpegEncContext * const s = &h->s;
6962 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6964 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) {
6965 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6968 assert(!FRAME_MBAFF);
6970 mb_xy = mb_x + mb_y*s->mb_stride;
6971 mb_type = s->current_picture.mb_type[mb_xy];
6972 qp = s->current_picture.qscale_table[mb_xy];
6973 qp0 = s->current_picture.qscale_table[mb_xy-1];
6974 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6975 qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
6976 qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
6977 qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
6978 qp0 = (qp + qp0 + 1) >> 1;
6979 qp1 = (qp + qp1 + 1) >> 1;
6980 qpc0 = (qpc + qpc0 + 1) >> 1;
6981 qpc1 = (qpc + qpc1 + 1) >> 1;
6982 qp_thresh = 15 - h->slice_alpha_c0_offset;
6983 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6984 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6987 if( IS_INTRA(mb_type) ) {
6988 int16_t bS4[4] = {4,4,4,4};
6989 int16_t bS3[4] = {3,3,3,3};
6990 if( IS_8x8DCT(mb_type) ) {
6991 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6992 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6993 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
6994 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6996 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6997 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6998 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6999 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
7000 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7001 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
7002 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7003 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
7005 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
7006 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
7007 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
7008 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
7009 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7010 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
7011 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7012 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
7015 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
7016 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
7018 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
7020 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
7022 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
7023 (mb_type & MB_TYPE_16x8) ? 1 : 0;
7024 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
7025 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
7027 int step = IS_8x8DCT(mb_type) ? 2 : 1;
7028 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
7029 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
7030 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
7032 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
7033 bSv[0][0] = 0x0004000400040004ULL;
7034 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
7035 bSv[1][0] = 0x0004000400040004ULL;
7037 #define FILTER(hv,dir,edge)\
7038 if(bSv[dir][edge]) {\
7039 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
7041 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7042 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7048 } else if( IS_8x8DCT(mb_type) ) {
7067 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7068 MpegEncContext * const s = &h->s;
7069 const int mb_xy= mb_x + mb_y*s->mb_stride;
7070 const int mb_type = s->current_picture.mb_type[mb_xy];
7071 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
7072 int first_vertical_edge_done = 0;
7074 /* FIXME: A given frame may occupy more than one position in
7075 * the reference list. So ref2frm should be populated with
7076 * frame numbers, not indices. */
7077 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
7078 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7080 //for sufficiently low qp, filtering wouldn't do anything
7081 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
7083 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
7084 int qp = s->current_picture.qscale_table[mb_xy];
7086 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
7087 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
7093 // left mb is in picture
7094 && h->slice_table[mb_xy-1] != 255
7095 // and current and left pair do not have the same interlaced type
7096 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
7097 // and left mb is in the same slice if deblocking_filter == 2
7098 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
7099 /* First vertical edge is different in MBAFF frames
7100 * There are 8 different bS to compute and 2 different Qp
7102 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
7103 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
7107 int mb_qp, mbn0_qp, mbn1_qp;
7109 first_vertical_edge_done = 1;
7111 if( IS_INTRA(mb_type) )
7112 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
7114 for( i = 0; i < 8; i++ ) {
7115 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
7117 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
7119 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
7120 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
7121 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
7128 mb_qp = s->current_picture.qscale_table[mb_xy];
7129 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
7130 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
7131 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
7132 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7133 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
7134 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
7135 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7136 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
7139 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
7140 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7141 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
7142 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
7143 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
7145 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
7146 for( dir = 0; dir < 2; dir++ )
7149 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
7150 const int mbm_type = s->current_picture.mb_type[mbm_xy];
7151 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
7153 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
7154 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
7155 // how often to recheck mv-based bS when iterating between edges
7156 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
7157 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
7158 // how often to recheck mv-based bS when iterating along each edge
7159 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
7161 if (first_vertical_edge_done) {
7163 first_vertical_edge_done = 0;
7166 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
7169 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
7170 && !IS_INTERLACED(mb_type)
7171 && IS_INTERLACED(mbm_type)
7173 // This is a special case in the norm where the filtering must
7174 // be done twice (one each of the field) even if we are in a
7175 // frame macroblock.
7177 static const int nnz_idx[4] = {4,5,6,3};
7178 unsigned int tmp_linesize = 2 * linesize;
7179 unsigned int tmp_uvlinesize = 2 * uvlinesize;
7180 int mbn_xy = mb_xy - 2 * s->mb_stride;
7185 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
7186 if( IS_INTRA(mb_type) ||
7187 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
7188 bS[0] = bS[1] = bS[2] = bS[3] = 3;
7190 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
7191 for( i = 0; i < 4; i++ ) {
7192 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
7193 mbn_nnz[nnz_idx[i]] != 0 )
7199 // Do not use s->qscale as luma quantizer because it has not the same
7200 // value in IPCM macroblocks.
7201 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7202 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
7203 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7204 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
7205 chroma_qp = ( h->chroma_qp +
7206 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7207 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7208 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7215 for( edge = start; edge < edges; edge++ ) {
7216 /* mbn_xy: neighbor macroblock */
7217 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
7218 const int mbn_type = s->current_picture.mb_type[mbn_xy];
7222 if( (edge&1) && IS_8x8DCT(mb_type) )
7225 if( IS_INTRA(mb_type) ||
7226 IS_INTRA(mbn_type) ) {
7229 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
7230 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
7239 bS[0] = bS[1] = bS[2] = bS[3] = value;
7244 if( edge & mask_edge ) {
7245 bS[0] = bS[1] = bS[2] = bS[3] = 0;
7248 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
7249 bS[0] = bS[1] = bS[2] = bS[3] = 1;
7252 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
7253 int b_idx= 8 + 4 + edge * (dir ? 8:1);
7254 int bn_idx= b_idx - (dir ? 8:1);
7256 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
7257 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7258 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7259 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
7261 bS[0] = bS[1] = bS[2] = bS[3] = v;
7267 for( i = 0; i < 4; i++ ) {
7268 int x = dir == 0 ? edge : i;
7269 int y = dir == 0 ? i : edge;
7270 int b_idx= 8 + 4 + x + 8*y;
7271 int bn_idx= b_idx - (dir ? 8:1);
7273 if( h->non_zero_count_cache[b_idx] != 0 ||
7274 h->non_zero_count_cache[bn_idx] != 0 ) {
7280 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
7281 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7282 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7283 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7291 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7296 // Do not use s->qscale as luma quantizer because it has not the same
7297 // value in IPCM macroblocks.
7298 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7299 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7300 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7301 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7303 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7304 if( (edge&1) == 0 ) {
7305 int chroma_qp = ( h->chroma_qp +
7306 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7307 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
7308 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
7311 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7312 if( (edge&1) == 0 ) {
7313 int chroma_qp = ( h->chroma_qp +
7314 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7315 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7316 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7323 static int decode_slice(H264Context *h){
7324 MpegEncContext * const s = &h->s;
7325 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7329 if( h->pps.cabac ) {
7333 align_get_bits( &s->gb );
7336 ff_init_cabac_states( &h->cabac);
7337 ff_init_cabac_decoder( &h->cabac,
7338 s->gb.buffer + get_bits_count(&s->gb)/8,
7339 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7340 /* calculate pre-state */
7341 for( i= 0; i < 460; i++ ) {
7343 if( h->slice_type == I_TYPE )
7344 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7346 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7349 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
7351 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7356 int ret = decode_mb_cabac(h);
7358 //STOP_TIMER("decode_mb_cabac")
7360 if(ret>=0) hl_decode_mb(h);
7362 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7365 if(ret>=0) ret = decode_mb_cabac(h);
7367 if(ret>=0) hl_decode_mb(h);
7370 eos = get_cabac_terminate( &h->cabac );
7372 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
7373 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7374 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7378 if( ++s->mb_x >= s->mb_width ) {
7380 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7387 if( eos || s->mb_y >= s->mb_height ) {
7388 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7389 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7396 int ret = decode_mb_cavlc(h);
7398 if(ret>=0) hl_decode_mb(h);
7400 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7402 ret = decode_mb_cavlc(h);
7404 if(ret>=0) hl_decode_mb(h);
7409 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7410 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7415 if(++s->mb_x >= s->mb_width){
7417 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7422 if(s->mb_y >= s->mb_height){
7423 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7425 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7426 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7430 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7437 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7438 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7439 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7440 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7444 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7453 for(;s->mb_y < s->mb_height; s->mb_y++){
7454 for(;s->mb_x < s->mb_width; s->mb_x++){
7455 int ret= decode_mb(h);
7460 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7461 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7466 if(++s->mb_x >= s->mb_width){
7468 if(++s->mb_y >= s->mb_height){
7469 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7470 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7474 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7481 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7482 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7483 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7487 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7494 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7497 return -1; //not reached
7500 static int decode_unregistered_user_data(H264Context *h, int size){
7501 MpegEncContext * const s = &h->s;
7502 uint8_t user_data[16+256];
7508 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7509 user_data[i]= get_bits(&s->gb, 8);
7513 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7514 if(e==1 && build>=0)
7515 h->x264_build= build;
7517 if(s->avctx->debug & FF_DEBUG_BUGS)
7518 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7521 skip_bits(&s->gb, 8);
7526 static int decode_sei(H264Context *h){
7527 MpegEncContext * const s = &h->s;
7529 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7534 type+= show_bits(&s->gb, 8);
7535 }while(get_bits(&s->gb, 8) == 255);
7539 size+= show_bits(&s->gb, 8);
7540 }while(get_bits(&s->gb, 8) == 255);
7544 if(decode_unregistered_user_data(h, size) < 0)
7548 skip_bits(&s->gb, 8*size);
7551 //FIXME check bits here
7552 align_get_bits(&s->gb);
7558 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7559 MpegEncContext * const s = &h->s;
7561 cpb_count = get_ue_golomb(&s->gb) + 1;
7562 get_bits(&s->gb, 4); /* bit_rate_scale */
7563 get_bits(&s->gb, 4); /* cpb_size_scale */
7564 for(i=0; i<cpb_count; i++){
7565 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7566 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7567 get_bits1(&s->gb); /* cbr_flag */
7569 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7570 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7571 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7572 get_bits(&s->gb, 5); /* time_offset_length */
7575 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7576 MpegEncContext * const s = &h->s;
7577 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7578 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7580 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7582 if( aspect_ratio_info_present_flag ) {
7583 aspect_ratio_idc= get_bits(&s->gb, 8);
7584 if( aspect_ratio_idc == EXTENDED_SAR ) {
7585 sps->sar.num= get_bits(&s->gb, 16);
7586 sps->sar.den= get_bits(&s->gb, 16);
7587 }else if(aspect_ratio_idc < 14){
7588 sps->sar= pixel_aspect[aspect_ratio_idc];
7590 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7597 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7599 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7600 get_bits1(&s->gb); /* overscan_appropriate_flag */
7603 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7604 get_bits(&s->gb, 3); /* video_format */
7605 get_bits1(&s->gb); /* video_full_range_flag */
7606 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7607 get_bits(&s->gb, 8); /* colour_primaries */
7608 get_bits(&s->gb, 8); /* transfer_characteristics */
7609 get_bits(&s->gb, 8); /* matrix_coefficients */
7613 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7614 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7615 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7618 sps->timing_info_present_flag = get_bits1(&s->gb);
7619 if(sps->timing_info_present_flag){
7620 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7621 sps->time_scale = get_bits_long(&s->gb, 32);
7622 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7625 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7626 if(nal_hrd_parameters_present_flag)
7627 decode_hrd_parameters(h, sps);
7628 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7629 if(vcl_hrd_parameters_present_flag)
7630 decode_hrd_parameters(h, sps);
7631 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7632 get_bits1(&s->gb); /* low_delay_hrd_flag */
7633 get_bits1(&s->gb); /* pic_struct_present_flag */
7635 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7636 if(sps->bitstream_restriction_flag){
7637 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7638 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7639 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7640 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7641 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7642 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7643 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7649 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7650 const uint8_t *jvt_list, const uint8_t *fallback_list){
7651 MpegEncContext * const s = &h->s;
7652 int i, last = 8, next = 8;
7653 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7654 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7655 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7657 for(i=0;i<size;i++){
7659 next = (last + get_se_golomb(&s->gb)) & 0xff;
7660 if(!i && !next){ /* matrix not written, we use the preset one */
7661 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7664 last = factors[scan[i]] = next ? next : last;
7668 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7669 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7670 MpegEncContext * const s = &h->s;
7671 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7672 const uint8_t *fallback[4] = {
7673 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7674 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7675 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7676 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7678 if(get_bits1(&s->gb)){
7679 sps->scaling_matrix_present |= is_sps;
7680 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7681 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7682 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7683 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7684 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7685 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7686 if(is_sps || pps->transform_8x8_mode){
7687 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7688 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7690 } else if(fallback_sps) {
7691 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7692 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7696 static inline int decode_seq_parameter_set(H264Context *h){
7697 MpegEncContext * const s = &h->s;
7698 int profile_idc, level_idc;
7702 profile_idc= get_bits(&s->gb, 8);
7703 get_bits1(&s->gb); //constraint_set0_flag
7704 get_bits1(&s->gb); //constraint_set1_flag
7705 get_bits1(&s->gb); //constraint_set2_flag
7706 get_bits1(&s->gb); //constraint_set3_flag
7707 get_bits(&s->gb, 4); // reserved
7708 level_idc= get_bits(&s->gb, 8);
7709 sps_id= get_ue_golomb(&s->gb);
7711 sps= &h->sps_buffer[ sps_id ];
7712 sps->profile_idc= profile_idc;
7713 sps->level_idc= level_idc;
7715 if(sps->profile_idc >= 100){ //high profile
7716 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7717 get_bits1(&s->gb); //residual_color_transform_flag
7718 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7719 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7720 sps->transform_bypass = get_bits1(&s->gb);
7721 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7723 sps->scaling_matrix_present = 0;
7725 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7726 sps->poc_type= get_ue_golomb(&s->gb);
7728 if(sps->poc_type == 0){ //FIXME #define
7729 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7730 } else if(sps->poc_type == 1){//FIXME #define
7731 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7732 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7733 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7734 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7736 for(i=0; i<sps->poc_cycle_length; i++)
7737 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7739 if(sps->poc_type > 2){
7740 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7744 sps->ref_frame_count= get_ue_golomb(&s->gb);
7745 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7746 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7748 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7749 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7750 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7751 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7752 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7755 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7756 if(!sps->frame_mbs_only_flag)
7757 sps->mb_aff= get_bits1(&s->gb);
7761 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7763 #ifndef ALLOW_INTERLACE
7765 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7767 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7768 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7770 sps->crop= get_bits1(&s->gb);
7772 sps->crop_left = get_ue_golomb(&s->gb);
7773 sps->crop_right = get_ue_golomb(&s->gb);
7774 sps->crop_top = get_ue_golomb(&s->gb);
7775 sps->crop_bottom= get_ue_golomb(&s->gb);
7776 if(sps->crop_left || sps->crop_top){
7777 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7783 sps->crop_bottom= 0;
7786 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7787 if( sps->vui_parameters_present_flag )
7788 decode_vui_parameters(h, sps);
7790 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7791 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7792 sps_id, sps->profile_idc, sps->level_idc,
7794 sps->ref_frame_count,
7795 sps->mb_width, sps->mb_height,
7796 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7797 sps->direct_8x8_inference_flag ? "8B8" : "",
7798 sps->crop_left, sps->crop_right,
7799 sps->crop_top, sps->crop_bottom,
7800 sps->vui_parameters_present_flag ? "VUI" : ""
7806 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7807 MpegEncContext * const s = &h->s;
7808 int pps_id= get_ue_golomb(&s->gb);
7809 PPS *pps= &h->pps_buffer[pps_id];
7811 pps->sps_id= get_ue_golomb(&s->gb);
7812 pps->cabac= get_bits1(&s->gb);
7813 pps->pic_order_present= get_bits1(&s->gb);
7814 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7815 if(pps->slice_group_count > 1 ){
7816 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7817 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7818 switch(pps->mb_slice_group_map_type){
7821 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7822 | run_length[ i ] |1 |ue(v) |
7827 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7829 | top_left_mb[ i ] |1 |ue(v) |
7830 | bottom_right_mb[ i ] |1 |ue(v) |
7838 | slice_group_change_direction_flag |1 |u(1) |
7839 | slice_group_change_rate_minus1 |1 |ue(v) |
7844 | slice_group_id_cnt_minus1 |1 |ue(v) |
7845 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7847 | slice_group_id[ i ] |1 |u(v) |
7852 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7853 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7854 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7855 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7859 pps->weighted_pred= get_bits1(&s->gb);
7860 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7861 pps->init_qp= get_se_golomb(&s->gb) + 26;
7862 pps->init_qs= get_se_golomb(&s->gb) + 26;
7863 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7864 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7865 pps->constrained_intra_pred= get_bits1(&s->gb);
7866 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7868 pps->transform_8x8_mode= 0;
7869 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7870 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7871 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7873 if(get_bits_count(&s->gb) < bit_length){
7874 pps->transform_8x8_mode= get_bits1(&s->gb);
7875 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7876 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7879 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7880 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7881 pps_id, pps->sps_id,
7882 pps->cabac ? "CABAC" : "CAVLC",
7883 pps->slice_group_count,
7884 pps->ref_count[0], pps->ref_count[1],
7885 pps->weighted_pred ? "weighted" : "",
7886 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7887 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7888 pps->constrained_intra_pred ? "CONSTR" : "",
7889 pps->redundant_pic_cnt_present ? "REDU" : "",
7890 pps->transform_8x8_mode ? "8x8DCT" : ""
7898 * finds the end of the current frame in the bitstream.
7899 * @return the position of the first byte of the next frame, or -1
7901 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7904 ParseContext *pc = &(h->s.parse_context);
7905 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7906 // mb_addr= pc->mb_addr - 1;
7908 for(i=0; i<=buf_size; i++){
7909 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7910 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7911 if(pc->frame_start_found){
7912 // If there isn't one more byte in the buffer
7913 // the test on first_mb_in_slice cannot be done yet
7914 // do it at next call.
7915 if (i >= buf_size) break;
7916 if (buf[i] & 0x80) {
7917 // first_mb_in_slice is 0, probably the first nal of a new
7919 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7921 pc->frame_start_found= 0;
7925 pc->frame_start_found = 1;
7927 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7928 if(pc->frame_start_found){
7930 pc->frame_start_found= 0;
7935 state= (state<<8) | buf[i];
7939 return END_NOT_FOUND;
7942 #ifdef CONFIG_H264_PARSER
7943 static int h264_parse(AVCodecParserContext *s,
7944 AVCodecContext *avctx,
7945 uint8_t **poutbuf, int *poutbuf_size,
7946 const uint8_t *buf, int buf_size)
7948 H264Context *h = s->priv_data;
7949 ParseContext *pc = &h->s.parse_context;
7952 next= find_frame_end(h, buf, buf_size);
7954 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
7960 *poutbuf = (uint8_t *)buf;
7961 *poutbuf_size = buf_size;
7965 static int h264_split(AVCodecContext *avctx,
7966 const uint8_t *buf, int buf_size)
7969 uint32_t state = -1;
7972 for(i=0; i<=buf_size; i++){
7973 if((state&0xFFFFFF1F) == 0x107)
7975 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7977 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
7979 while(i>4 && buf[i-5]==0) i--;
7984 state= (state<<8) | buf[i];
7988 #endif /* CONFIG_H264_PARSER */
7990 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7991 MpegEncContext * const s = &h->s;
7992 AVCodecContext * const avctx= s->avctx;
7996 for(i=0; i<50; i++){
7997 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
8001 s->current_picture_ptr= NULL;
8010 if(buf_index >= buf_size) break;
8012 for(i = 0; i < h->nal_length_size; i++)
8013 nalsize = (nalsize << 8) | buf[buf_index++];
8019 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
8024 // start code prefix search
8025 for(; buf_index + 3 < buf_size; buf_index++){
8026 // this should allways succeed in the first iteration
8027 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
8031 if(buf_index+3 >= buf_size) break;
8036 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
8037 while(ptr[dst_length - 1] == 0 && dst_length > 1)
8039 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
8041 if(s->avctx->debug&FF_DEBUG_STARTCODE){
8042 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
8045 if (h->is_avc && (nalsize != consumed))
8046 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
8048 buf_index += consumed;
8050 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
8051 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
8054 switch(h->nal_unit_type){
8056 idr(h); //FIXME ensure we don't loose some frames if there is reordering
8058 init_get_bits(&s->gb, ptr, bit_length);
8060 h->inter_gb_ptr= &s->gb;
8061 s->data_partitioning = 0;
8063 if(decode_slice_header(h) < 0){
8064 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8067 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
8068 if(h->redundant_pic_count==0 && s->hurry_up < 5
8069 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8070 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8071 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8072 && avctx->skip_frame < AVDISCARD_ALL)
8076 init_get_bits(&s->gb, ptr, bit_length);
8078 h->inter_gb_ptr= NULL;
8079 s->data_partitioning = 1;
8081 if(decode_slice_header(h) < 0){
8082 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8086 init_get_bits(&h->intra_gb, ptr, bit_length);
8087 h->intra_gb_ptr= &h->intra_gb;
8090 init_get_bits(&h->inter_gb, ptr, bit_length);
8091 h->inter_gb_ptr= &h->inter_gb;
8093 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
8095 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8096 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8097 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8098 && avctx->skip_frame < AVDISCARD_ALL)
8102 init_get_bits(&s->gb, ptr, bit_length);
8106 init_get_bits(&s->gb, ptr, bit_length);
8107 decode_seq_parameter_set(h);
8109 if(s->flags& CODEC_FLAG_LOW_DELAY)
8112 if(avctx->has_b_frames < 2)
8113 avctx->has_b_frames= !s->low_delay;
8116 init_get_bits(&s->gb, ptr, bit_length);
8118 decode_picture_parameter_set(h, bit_length);
8122 case NAL_END_SEQUENCE:
8123 case NAL_END_STREAM:
8124 case NAL_FILLER_DATA:
8126 case NAL_AUXILIARY_SLICE:
8129 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
8133 if(!s->current_picture_ptr) return buf_index; //no frame
8135 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
8136 s->current_picture_ptr->pict_type= s->pict_type;
8138 h->prev_frame_num_offset= h->frame_num_offset;
8139 h->prev_frame_num= h->frame_num;
8140 if(s->current_picture_ptr->reference){
8141 h->prev_poc_msb= h->poc_msb;
8142 h->prev_poc_lsb= h->poc_lsb;
8144 if(s->current_picture_ptr->reference)
8145 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
8155 * returns the number of bytes consumed for building the current frame
8157 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
8158 if(s->flags&CODEC_FLAG_TRUNCATED){
8159 pos -= s->parse_context.last_index;
8160 if(pos<0) pos=0; // FIXME remove (unneeded?)
8164 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
8165 if(pos+10>buf_size) pos=buf_size; // oops ;)
8171 static int decode_frame(AVCodecContext *avctx,
8172 void *data, int *data_size,
8173 uint8_t *buf, int buf_size)
8175 H264Context *h = avctx->priv_data;
8176 MpegEncContext *s = &h->s;
8177 AVFrame *pict = data;
8180 s->flags= avctx->flags;
8181 s->flags2= avctx->flags2;
8183 /* no supplementary picture */
8184 if (buf_size == 0) {
8188 if(s->flags&CODEC_FLAG_TRUNCATED){
8189 int next= find_frame_end(h, buf, buf_size);
8191 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
8193 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
8196 if(h->is_avc && !h->got_avcC) {
8197 int i, cnt, nalsize;
8198 unsigned char *p = avctx->extradata;
8199 if(avctx->extradata_size < 7) {
8200 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
8204 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
8207 /* sps and pps in the avcC always have length coded with 2 bytes,
8208 so put a fake nal_length_size = 2 while parsing them */
8209 h->nal_length_size = 2;
8210 // Decode sps from avcC
8211 cnt = *(p+5) & 0x1f; // Number of sps
8213 for (i = 0; i < cnt; i++) {
8214 nalsize = BE_16(p) + 2;
8215 if(decode_nal_units(h, p, nalsize) < 0) {
8216 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
8221 // Decode pps from avcC
8222 cnt = *(p++); // Number of pps
8223 for (i = 0; i < cnt; i++) {
8224 nalsize = BE_16(p) + 2;
8225 if(decode_nal_units(h, p, nalsize) != nalsize) {
8226 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
8231 // Now store right nal length size, that will be use to parse all other nals
8232 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
8233 // Do not reparse avcC
8237 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
8238 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
8242 buf_index=decode_nal_units(h, buf, buf_size);
8246 //FIXME do something with unavailable reference frames
8248 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
8249 if(!s->current_picture_ptr){
8250 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
8255 Picture *out = s->current_picture_ptr;
8256 #if 0 //decode order
8257 *data_size = sizeof(AVFrame);
8259 /* Sort B-frames into display order */
8260 Picture *cur = s->current_picture_ptr;
8261 Picture *prev = h->delayed_output_pic;
8262 int i, pics, cross_idr, out_of_order, out_idx;
8264 if(h->sps.bitstream_restriction_flag
8265 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
8266 s->avctx->has_b_frames = h->sps.num_reorder_frames;
8271 while(h->delayed_pic[pics]) pics++;
8272 h->delayed_pic[pics++] = cur;
8273 if(cur->reference == 0)
8277 for(i=0; h->delayed_pic[i]; i++)
8278 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
8281 out = h->delayed_pic[0];
8283 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8284 if(h->delayed_pic[i]->poc < out->poc){
8285 out = h->delayed_pic[i];
8289 out_of_order = !cross_idr && prev && out->poc < prev->poc;
8290 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
8292 else if(prev && pics <= s->avctx->has_b_frames)
8294 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8296 ((!cross_idr && prev && out->poc > prev->poc + 2)
8297 || cur->pict_type == B_TYPE)))
8300 s->avctx->has_b_frames++;
8303 else if(out_of_order)
8306 if(out_of_order || pics > s->avctx->has_b_frames){
8307 for(i=out_idx; h->delayed_pic[i]; i++)
8308 h->delayed_pic[i] = h->delayed_pic[i+1];
8314 *data_size = sizeof(AVFrame);
8315 if(prev && prev != out && prev->reference == 1)
8316 prev->reference = 0;
8317 h->delayed_output_pic = out;
8321 *pict= *(AVFrame*)out;
8323 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8326 assert(pict->data[0] || !*data_size);
8327 ff_print_debug_info(s, pict);
8328 //printf("out %d\n", (int)pict->data[0]);
8331 /* Return the Picture timestamp as the frame number */
8332 /* we substract 1 because it is added on utils.c */
8333 avctx->frame_number = s->picture_number - 1;
8335 return get_consumed_bytes(s, buf_index, buf_size);
8338 static inline void fill_mb_avail(H264Context *h){
8339 MpegEncContext * const s = &h->s;
8340 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8343 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8344 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8345 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8351 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8352 h->mb_avail[4]= 1; //FIXME move out
8353 h->mb_avail[5]= 0; //FIXME move out
8359 #define SIZE (COUNT*40)
8365 // int int_temp[10000];
8367 AVCodecContext avctx;
8369 dsputil_init(&dsp, &avctx);
8371 init_put_bits(&pb, temp, SIZE);
8372 printf("testing unsigned exp golomb\n");
8373 for(i=0; i<COUNT; i++){
8375 set_ue_golomb(&pb, i);
8376 STOP_TIMER("set_ue_golomb");
8378 flush_put_bits(&pb);
8380 init_get_bits(&gb, temp, 8*SIZE);
8381 for(i=0; i<COUNT; i++){
8384 s= show_bits(&gb, 24);
8387 j= get_ue_golomb(&gb);
8389 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8392 STOP_TIMER("get_ue_golomb");
8396 init_put_bits(&pb, temp, SIZE);
8397 printf("testing signed exp golomb\n");
8398 for(i=0; i<COUNT; i++){
8400 set_se_golomb(&pb, i - COUNT/2);
8401 STOP_TIMER("set_se_golomb");
8403 flush_put_bits(&pb);
8405 init_get_bits(&gb, temp, 8*SIZE);
8406 for(i=0; i<COUNT; i++){
8409 s= show_bits(&gb, 24);
8412 j= get_se_golomb(&gb);
8413 if(j != i - COUNT/2){
8414 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8417 STOP_TIMER("get_se_golomb");
8420 printf("testing 4x4 (I)DCT\n");
8423 uint8_t src[16], ref[16];
8424 uint64_t error= 0, max_error=0;
8426 for(i=0; i<COUNT; i++){
8428 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8429 for(j=0; j<16; j++){
8430 ref[j]= random()%255;
8431 src[j]= random()%255;
8434 h264_diff_dct_c(block, src, ref, 4);
8437 for(j=0; j<16; j++){
8438 // printf("%d ", block[j]);
8439 block[j]= block[j]*4;
8440 if(j&1) block[j]= (block[j]*4 + 2)/5;
8441 if(j&4) block[j]= (block[j]*4 + 2)/5;
8445 s->dsp.h264_idct_add(ref, block, 4);
8446 /* for(j=0; j<16; j++){
8447 printf("%d ", ref[j]);
8451 for(j=0; j<16; j++){
8452 int diff= FFABS(src[j] - ref[j]);
8455 max_error= FFMAX(max_error, diff);
8458 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8460 printf("testing quantizer\n");
8461 for(qp=0; qp<52; qp++){
8463 src1_block[i]= src2_block[i]= random()%255;
8467 printf("Testing NAL layer\n");
8469 uint8_t bitstream[COUNT];
8470 uint8_t nal[COUNT*2];
8472 memset(&h, 0, sizeof(H264Context));
8474 for(i=0; i<COUNT; i++){
8482 for(j=0; j<COUNT; j++){
8483 bitstream[j]= (random() % 255) + 1;
8486 for(j=0; j<zeros; j++){
8487 int pos= random() % COUNT;
8488 while(bitstream[pos] == 0){
8497 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8499 printf("encoding failed\n");
8503 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8507 if(out_length != COUNT){
8508 printf("incorrect length %d %d\n", out_length, COUNT);
8512 if(consumed != nal_length){
8513 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8517 if(memcmp(bitstream, out, COUNT)){
8518 printf("missmatch\n");
8523 printf("Testing RBSP\n");
8531 static int decode_end(AVCodecContext *avctx)
8533 H264Context *h = avctx->priv_data;
8534 MpegEncContext *s = &h->s;
8536 av_freep(&h->rbsp_buffer);
8537 free_tables(h); //FIXME cleanup init stuff perhaps
8540 // memset(h, 0, sizeof(H264Context));
8546 AVCodec h264_decoder = {
8550 sizeof(H264Context),
8555 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8559 #ifdef CONFIG_H264_PARSER
8560 AVCodecParser h264_parser = {
8562 sizeof(H264Context),