2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
57 /* Compiling in interlaced support reduces the speed
58 * of progressive decoding by about 2%. */
59 #define ALLOW_INTERLACE
61 #ifdef ALLOW_INTERLACE
62 #define MB_MBAFF h->mb_mbaff
63 #define MB_FIELD h->mb_field_decoding_flag
64 #define FRAME_MBAFF h->mb_aff_frame
70 #define IS_INTERLACED(mb_type) 0
74 * Sequence parameter set
80 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
81 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
82 int poc_type; ///< pic_order_cnt_type
83 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
84 int delta_pic_order_always_zero_flag;
85 int offset_for_non_ref_pic;
86 int offset_for_top_to_bottom_field;
87 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
88 int ref_frame_count; ///< num_ref_frames
89 int gaps_in_frame_num_allowed_flag;
90 int mb_width; ///< frame_width_in_mbs_minus1 + 1
91 int mb_height; ///< frame_height_in_mbs_minus1 + 1
92 int frame_mbs_only_flag;
93 int mb_aff; ///<mb_adaptive_frame_field_flag
94 int direct_8x8_inference_flag;
95 int crop; ///< frame_cropping_flag
96 int crop_left; ///< frame_cropping_rect_left_offset
97 int crop_right; ///< frame_cropping_rect_right_offset
98 int crop_top; ///< frame_cropping_rect_top_offset
99 int crop_bottom; ///< frame_cropping_rect_bottom_offset
100 int vui_parameters_present_flag;
102 int timing_info_present_flag;
103 uint32_t num_units_in_tick;
105 int fixed_frame_rate_flag;
106 short offset_for_ref_frame[256]; //FIXME dyn aloc?
107 int bitstream_restriction_flag;
108 int num_reorder_frames;
109 int scaling_matrix_present;
110 uint8_t scaling_matrix4[6][16];
111 uint8_t scaling_matrix8[2][64];
115 * Picture parameter set
119 int cabac; ///< entropy_coding_mode_flag
120 int pic_order_present; ///< pic_order_present_flag
121 int slice_group_count; ///< num_slice_groups_minus1 + 1
122 int mb_slice_group_map_type;
123 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
124 int weighted_pred; ///< weighted_pred_flag
125 int weighted_bipred_idc;
126 int init_qp; ///< pic_init_qp_minus26 + 26
127 int init_qs; ///< pic_init_qs_minus26 + 26
128 int chroma_qp_index_offset;
129 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
130 int constrained_intra_pred; ///< constrained_intra_pred_flag
131 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
132 int transform_8x8_mode; ///< transform_8x8_mode_flag
133 uint8_t scaling_matrix4[6][16];
134 uint8_t scaling_matrix8[2][64];
138 * Memory management control operation opcode.
140 typedef enum MMCOOpcode{
151 * Memory management control operation.
162 typedef struct H264Context{
170 #define NAL_IDR_SLICE 5
175 #define NAL_END_SEQUENCE 10
176 #define NAL_END_STREAM 11
177 #define NAL_FILLER_DATA 12
178 #define NAL_SPS_EXT 13
179 #define NAL_AUXILIARY_SLICE 19
180 uint8_t *rbsp_buffer;
181 unsigned int rbsp_buffer_size;
184 * Used to parse AVC variant of h264
186 int is_avc; ///< this flag is != 0 if codec is avc1
187 int got_avcC; ///< flag used to parse avcC data only once
188 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
196 int chroma_pred_mode;
197 int intra16x16_pred_mode;
202 int8_t intra4x4_pred_mode_cache[5*8];
203 int8_t (*intra4x4_pred_mode)[8];
204 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
205 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
206 void (*pred8x8 [4+3])(uint8_t *src, int stride);
207 void (*pred16x16[4+3])(uint8_t *src, int stride);
208 unsigned int topleft_samples_available;
209 unsigned int top_samples_available;
210 unsigned int topright_samples_available;
211 unsigned int left_samples_available;
212 uint8_t (*top_borders[2])[16+2*8];
213 uint8_t left_border[2*(17+2*9)];
216 * non zero coeff count cache.
217 * is 64 if not available.
219 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
220 uint8_t (*non_zero_count)[16];
223 * Motion vector cache.
225 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
226 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
227 #define LIST_NOT_USED -1 //FIXME rename?
228 #define PART_NOT_AVAILABLE -2
231 * is 1 if the specific list MV&references are set to 0,0,-2.
233 int mv_cache_clean[2];
236 * number of neighbors (top and/or left) that used 8x8 dct
238 int neighbor_transform_size;
241 * block_offset[ 0..23] for frame macroblocks
242 * block_offset[24..47] for field macroblocks
244 int block_offset[2*(16+8)];
246 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
248 int b_stride; //FIXME use s->b4_stride
251 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff
260 int unknown_svq3_flag;
261 int next_slice_index;
263 SPS sps_buffer[MAX_SPS_COUNT];
264 SPS sps; ///< current sps
266 PPS pps_buffer[MAX_PPS_COUNT];
270 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
272 uint32_t dequant4_buffer[6][52][16];
273 uint32_t dequant8_buffer[2][52][64];
274 uint32_t (*dequant4_coeff[6])[16];
275 uint32_t (*dequant8_coeff[2])[64];
276 int dequant_coeff_pps; ///< reinit tables when pps changes
279 uint8_t *slice_table_base;
280 uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
282 int slice_type_fixed;
284 //interlacing specific flags
286 int mb_field_decoding_flag;
287 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
294 int delta_poc_bottom;
297 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
298 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
299 int frame_num_offset; ///< for POC type 2
300 int prev_frame_num_offset; ///< for POC type 2
301 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
304 * frame_num for frames or 2*frame_num for field pics.
309 * max_frame_num or 2*max_frame_num for field pics.
313 //Weighted pred stuff
315 int use_weight_chroma;
316 int luma_log2_weight_denom;
317 int chroma_log2_weight_denom;
318 int luma_weight[2][48];
319 int luma_offset[2][48];
320 int chroma_weight[2][48][2];
321 int chroma_offset[2][48][2];
322 int implicit_weight[48][48];
325 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
326 int slice_alpha_c0_offset;
327 int slice_beta_offset;
329 int redundant_pic_count;
331 int direct_spatial_mv_pred;
332 int dist_scale_factor[16];
333 int dist_scale_factor_field[32];
334 int map_col_to_list0[2][16];
335 int map_col_to_list0_field[2][32];
338 * num_ref_idx_l0/1_active_minus1 + 1
340 int ref_count[2]; ///< counts frames or fields, depending on current mb mode
341 Picture *short_ref[32];
342 Picture *long_ref[32];
343 Picture default_ref_list[2][32];
344 Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs
345 Picture *delayed_pic[16]; //FIXME size?
346 Picture *delayed_output_pic;
349 * memory management control operations buffer.
351 MMCO mmco[MAX_MMCO_COUNT];
354 int long_ref_count; ///< number of actual long term references
355 int short_ref_count; ///< number of actual short term references
358 GetBitContext intra_gb;
359 GetBitContext inter_gb;
360 GetBitContext *intra_gb_ptr;
361 GetBitContext *inter_gb_ptr;
363 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
369 uint8_t cabac_state[460];
372 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
376 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
377 uint8_t *chroma_pred_mode_table;
378 int last_qscale_diff;
379 int16_t (*mvd_table[2])[2];
380 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
381 uint8_t *direct_table;
382 uint8_t direct_cache[5*8];
384 uint8_t zigzag_scan[16];
385 uint8_t zigzag_scan8x8[64];
386 uint8_t zigzag_scan8x8_cavlc[64];
387 uint8_t field_scan[16];
388 uint8_t field_scan8x8[64];
389 uint8_t field_scan8x8_cavlc[64];
390 const uint8_t *zigzag_scan_q0;
391 const uint8_t *zigzag_scan8x8_q0;
392 const uint8_t *zigzag_scan8x8_cavlc_q0;
393 const uint8_t *field_scan_q0;
394 const uint8_t *field_scan8x8_q0;
395 const uint8_t *field_scan8x8_cavlc_q0;
400 static VLC coeff_token_vlc[4];
401 static VLC chroma_dc_coeff_token_vlc;
403 static VLC total_zeros_vlc[15];
404 static VLC chroma_dc_total_zeros_vlc[3];
406 static VLC run_vlc[6];
409 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
410 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
411 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
413 static always_inline uint32_t pack16to32(int a, int b){
414 #ifdef WORDS_BIGENDIAN
415 return (b&0xFFFF) + (a<<16);
417 return (a&0xFFFF) + (b<<16);
423 * @param h height of the rectangle, should be a constant
424 * @param w width of the rectangle, should be a constant
425 * @param size the size of val (1 or 4), should be a constant
427 static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
428 uint8_t *p= (uint8_t*)vp;
429 assert(size==1 || size==4);
435 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
436 assert((stride&(w-1))==0);
438 const uint16_t v= size==4 ? val : val*0x0101;
439 *(uint16_t*)(p + 0*stride)= v;
441 *(uint16_t*)(p + 1*stride)= v;
443 *(uint16_t*)(p + 2*stride)=
444 *(uint16_t*)(p + 3*stride)= v;
446 const uint32_t v= size==4 ? val : val*0x01010101;
447 *(uint32_t*)(p + 0*stride)= v;
449 *(uint32_t*)(p + 1*stride)= v;
451 *(uint32_t*)(p + 2*stride)=
452 *(uint32_t*)(p + 3*stride)= v;
454 //gcc can't optimize 64bit math on x86_32
455 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
456 const uint64_t v= val*0x0100000001ULL;
457 *(uint64_t*)(p + 0*stride)= v;
459 *(uint64_t*)(p + 1*stride)= v;
461 *(uint64_t*)(p + 2*stride)=
462 *(uint64_t*)(p + 3*stride)= v;
464 const uint64_t v= val*0x0100000001ULL;
465 *(uint64_t*)(p + 0+0*stride)=
466 *(uint64_t*)(p + 8+0*stride)=
467 *(uint64_t*)(p + 0+1*stride)=
468 *(uint64_t*)(p + 8+1*stride)= v;
470 *(uint64_t*)(p + 0+2*stride)=
471 *(uint64_t*)(p + 8+2*stride)=
472 *(uint64_t*)(p + 0+3*stride)=
473 *(uint64_t*)(p + 8+3*stride)= v;
475 *(uint32_t*)(p + 0+0*stride)=
476 *(uint32_t*)(p + 4+0*stride)= val;
478 *(uint32_t*)(p + 0+1*stride)=
479 *(uint32_t*)(p + 4+1*stride)= val;
481 *(uint32_t*)(p + 0+2*stride)=
482 *(uint32_t*)(p + 4+2*stride)=
483 *(uint32_t*)(p + 0+3*stride)=
484 *(uint32_t*)(p + 4+3*stride)= val;
486 *(uint32_t*)(p + 0+0*stride)=
487 *(uint32_t*)(p + 4+0*stride)=
488 *(uint32_t*)(p + 8+0*stride)=
489 *(uint32_t*)(p +12+0*stride)=
490 *(uint32_t*)(p + 0+1*stride)=
491 *(uint32_t*)(p + 4+1*stride)=
492 *(uint32_t*)(p + 8+1*stride)=
493 *(uint32_t*)(p +12+1*stride)= val;
495 *(uint32_t*)(p + 0+2*stride)=
496 *(uint32_t*)(p + 4+2*stride)=
497 *(uint32_t*)(p + 8+2*stride)=
498 *(uint32_t*)(p +12+2*stride)=
499 *(uint32_t*)(p + 0+3*stride)=
500 *(uint32_t*)(p + 4+3*stride)=
501 *(uint32_t*)(p + 8+3*stride)=
502 *(uint32_t*)(p +12+3*stride)= val;
509 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
510 MpegEncContext * const s = &h->s;
511 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
512 int topleft_xy, top_xy, topright_xy, left_xy[2];
513 int topleft_type, top_type, topright_type, left_type[2];
517 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
518 // the actual condition is whether we're on the edge of a slice,
519 // and even then the intra and nnz parts are unnecessary.
520 if(for_deblock && h->slice_num == 1 && !FRAME_MBAFF)
523 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
525 top_xy = mb_xy - s->mb_stride;
526 topleft_xy = top_xy - 1;
527 topright_xy= top_xy + 1;
528 left_xy[1] = left_xy[0] = mb_xy-1;
538 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
539 const int top_pair_xy = pair_xy - s->mb_stride;
540 const int topleft_pair_xy = top_pair_xy - 1;
541 const int topright_pair_xy = top_pair_xy + 1;
542 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
543 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
544 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
545 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
546 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
547 const int bottom = (s->mb_y & 1);
548 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
550 ? !curr_mb_frame_flag // bottom macroblock
551 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
553 top_xy -= s->mb_stride;
556 ? !curr_mb_frame_flag // bottom macroblock
557 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
559 topleft_xy -= s->mb_stride;
562 ? !curr_mb_frame_flag // bottom macroblock
563 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
565 topright_xy -= s->mb_stride;
567 if (left_mb_frame_flag != curr_mb_frame_flag) {
568 left_xy[1] = left_xy[0] = pair_xy - 1;
569 if (curr_mb_frame_flag) {
590 left_xy[1] += s->mb_stride;
603 h->top_mb_xy = top_xy;
604 h->left_mb_xy[0] = left_xy[0];
605 h->left_mb_xy[1] = left_xy[1];
607 topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
608 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
609 topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
610 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
611 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
613 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
615 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
617 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
618 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
619 if(USES_LIST(mb_type,list)){
620 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
621 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
622 uint8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
623 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
629 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
630 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
632 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
633 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
635 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
636 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
641 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
642 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
643 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
644 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
645 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
648 if(IS_INTRA(mb_type)){
649 h->topleft_samples_available=
650 h->top_samples_available=
651 h->left_samples_available= 0xFFFF;
652 h->topright_samples_available= 0xEEEA;
654 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
655 h->topleft_samples_available= 0xB3FF;
656 h->top_samples_available= 0x33FF;
657 h->topright_samples_available= 0x26EA;
660 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
661 h->topleft_samples_available&= 0xDF5F;
662 h->left_samples_available&= 0x5F5F;
666 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
667 h->topleft_samples_available&= 0x7FFF;
669 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
670 h->topright_samples_available&= 0xFBFF;
672 if(IS_INTRA4x4(mb_type)){
673 if(IS_INTRA4x4(top_type)){
674 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
675 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
676 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
677 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
680 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
685 h->intra4x4_pred_mode_cache[4+8*0]=
686 h->intra4x4_pred_mode_cache[5+8*0]=
687 h->intra4x4_pred_mode_cache[6+8*0]=
688 h->intra4x4_pred_mode_cache[7+8*0]= pred;
691 if(IS_INTRA4x4(left_type[i])){
692 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
693 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
696 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
701 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
702 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
717 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
719 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
720 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
721 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
722 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
724 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
725 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
727 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
728 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
731 h->non_zero_count_cache[4+8*0]=
732 h->non_zero_count_cache[5+8*0]=
733 h->non_zero_count_cache[6+8*0]=
734 h->non_zero_count_cache[7+8*0]=
736 h->non_zero_count_cache[1+8*0]=
737 h->non_zero_count_cache[2+8*0]=
739 h->non_zero_count_cache[1+8*3]=
740 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
744 for (i=0; i<2; i++) {
746 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
747 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
748 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
749 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
751 h->non_zero_count_cache[3+8*1 + 2*8*i]=
752 h->non_zero_count_cache[3+8*2 + 2*8*i]=
753 h->non_zero_count_cache[0+8*1 + 8*i]=
754 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
761 h->top_cbp = h->cbp_table[top_xy];
762 } else if(IS_INTRA(mb_type)) {
769 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
770 } else if(IS_INTRA(mb_type)) {
776 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
779 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
784 //FIXME direct mb can skip much of this
785 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
787 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
788 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
789 /*if(!h->mv_cache_clean[list]){
790 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
791 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
792 h->mv_cache_clean[list]= 1;
796 h->mv_cache_clean[list]= 0;
798 if(USES_LIST(top_type, list)){
799 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
800 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
801 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
802 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
803 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
804 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
805 h->ref_cache[list][scan8[0] + 0 - 1*8]=
806 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
807 h->ref_cache[list][scan8[0] + 2 - 1*8]=
808 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
810 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
811 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
812 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
813 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
814 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
817 //FIXME unify cleanup or sth
818 if(USES_LIST(left_type[0], list)){
819 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
820 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
821 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
822 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
823 h->ref_cache[list][scan8[0] - 1 + 0*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
824 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1]>>1)];
826 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
827 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
828 h->ref_cache[list][scan8[0] - 1 + 0*8]=
829 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
832 if(USES_LIST(left_type[1], list)){
833 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
834 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
835 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
836 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
837 h->ref_cache[list][scan8[0] - 1 + 2*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
838 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[3]>>1)];
840 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
841 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
842 h->ref_cache[list][scan8[0] - 1 + 2*8]=
843 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
844 assert((!left_type[0]) == (!left_type[1]));
847 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
850 if(USES_LIST(topleft_type, list)){
851 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
852 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
853 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
854 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
856 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
857 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
860 if(USES_LIST(topright_type, list)){
861 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
862 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
863 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
864 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
866 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
867 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
871 h->ref_cache[list][scan8[5 ]+1] =
872 h->ref_cache[list][scan8[7 ]+1] =
873 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
874 h->ref_cache[list][scan8[4 ]] =
875 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
876 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
877 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
878 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
879 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
880 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
883 /* XXX beurk, Load mvd */
884 if(USES_LIST(top_type, list)){
885 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
886 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
887 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
888 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
889 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
891 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
892 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
893 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
894 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
896 if(USES_LIST(left_type[0], list)){
897 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
898 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
899 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
901 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
902 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
904 if(USES_LIST(left_type[1], list)){
905 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
906 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
907 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
909 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
910 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
912 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
913 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
914 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
915 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
916 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
918 if(h->slice_type == B_TYPE){
919 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
921 if(IS_DIRECT(top_type)){
922 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
923 }else if(IS_8X8(top_type)){
924 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
925 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
926 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
928 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
931 if(IS_DIRECT(left_type[0]))
932 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
933 else if(IS_8X8(left_type[0]))
934 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
936 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
938 if(IS_DIRECT(left_type[1]))
939 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
940 else if(IS_8X8(left_type[1]))
941 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
943 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
949 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
950 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
951 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
952 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
953 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
954 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
955 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
956 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
957 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
958 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
960 #define MAP_F2F(idx, mb_type)\
961 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
962 h->ref_cache[list][idx] <<= 1;\
963 h->mv_cache[list][idx][1] /= 2;\
964 h->mvd_cache[list][idx][1] /= 2;\
969 #define MAP_F2F(idx, mb_type)\
970 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
971 h->ref_cache[list][idx] >>= 1;\
972 h->mv_cache[list][idx][1] <<= 1;\
973 h->mvd_cache[list][idx][1] <<= 1;\
983 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
986 static inline void write_back_intra_pred_mode(H264Context *h){
987 MpegEncContext * const s = &h->s;
988 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
990 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
991 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
992 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
993 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
994 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
995 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
996 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
1000 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1002 static inline int check_intra4x4_pred_mode(H264Context *h){
1003 MpegEncContext * const s = &h->s;
1004 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
1005 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
1008 if(!(h->top_samples_available&0x8000)){
1010 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
1012 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1015 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
1020 if(!(h->left_samples_available&0x8000)){
1022 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
1024 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1027 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
1033 } //FIXME cleanup like next
1036 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1038 static inline int check_intra_pred_mode(H264Context *h, int mode){
1039 MpegEncContext * const s = &h->s;
1040 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
1041 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
1043 if(mode < 0 || mode > 6) {
1044 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
1048 if(!(h->top_samples_available&0x8000)){
1051 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1056 if(!(h->left_samples_available&0x8000)){
1059 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1068 * gets the predicted intra4x4 prediction mode.
1070 static inline int pred_intra_mode(H264Context *h, int n){
1071 const int index8= scan8[n];
1072 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
1073 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
1074 const int min= FFMIN(left, top);
1076 tprintf("mode:%d %d min:%d\n", left ,top, min);
1078 if(min<0) return DC_PRED;
1082 static inline void write_back_non_zero_count(H264Context *h){
1083 MpegEncContext * const s = &h->s;
1084 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1086 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
1087 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
1088 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
1089 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
1090 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
1091 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
1092 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
1094 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
1095 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
1096 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
1098 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1099 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1100 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1103 // store all luma nnzs, for deblocking
1106 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
1107 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
1112 * gets the predicted number of non zero coefficients.
1113 * @param n block index
1115 static inline int pred_non_zero_count(H264Context *h, int n){
1116 const int index8= scan8[n];
1117 const int left= h->non_zero_count_cache[index8 - 1];
1118 const int top = h->non_zero_count_cache[index8 - 8];
1121 if(i<64) i= (i+1)>>1;
1123 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1128 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1129 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1131 /* there is no consistent mapping of mvs to neighboring locations that will
1132 * make mbaff happy, so we can't move all this logic to fill_caches */
1134 MpegEncContext *s = &h->s;
1135 const int *mb_types = s->current_picture_ptr->mb_type;
1137 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
1138 *C = h->mv_cache[list][scan8[0]-2];
1141 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
1142 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
1143 if(IS_INTERLACED(mb_types[topright_xy])){
1144 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
1145 const int x4 = X4, y4 = Y4;\
1146 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
1147 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
1148 return LIST_NOT_USED;\
1149 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
1150 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
1151 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
1152 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
1154 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
1157 if(topright_ref == PART_NOT_AVAILABLE
1158 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
1159 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
1161 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
1162 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
1165 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
1166 && i >= scan8[0]+8){
1167 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
1168 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
1174 if(topright_ref != PART_NOT_AVAILABLE){
1175 *C= h->mv_cache[list][ i - 8 + part_width ];
1176 return topright_ref;
1178 tprintf("topright MV not available\n");
1180 *C= h->mv_cache[list][ i - 8 - 1 ];
1181 return h->ref_cache[list][ i - 8 - 1 ];
1186 * gets the predicted MV.
1187 * @param n the block index
1188 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1189 * @param mx the x component of the predicted motion vector
1190 * @param my the y component of the predicted motion vector
1192 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1193 const int index8= scan8[n];
1194 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1195 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1196 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1197 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1199 int diagonal_ref, match_count;
1201 assert(part_width==1 || part_width==2 || part_width==4);
1211 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1212 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1213 tprintf("pred_motion match_count=%d\n", match_count);
1214 if(match_count > 1){ //most common
1215 *mx= mid_pred(A[0], B[0], C[0]);
1216 *my= mid_pred(A[1], B[1], C[1]);
1217 }else if(match_count==1){
1221 }else if(top_ref==ref){
1229 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1233 *mx= mid_pred(A[0], B[0], C[0]);
1234 *my= mid_pred(A[1], B[1], C[1]);
1238 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1242 * gets the directionally predicted 16x8 MV.
1243 * @param n the block index
1244 * @param mx the x component of the predicted motion vector
1245 * @param my the y component of the predicted motion vector
1247 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1249 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1250 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1252 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1260 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1261 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1263 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1265 if(left_ref == ref){
1273 pred_motion(h, n, 4, list, ref, mx, my);
1277 * gets the directionally predicted 8x16 MV.
1278 * @param n the block index
1279 * @param mx the x component of the predicted motion vector
1280 * @param my the y component of the predicted motion vector
1282 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1284 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1285 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1287 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1289 if(left_ref == ref){
1298 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1300 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1302 if(diagonal_ref == ref){
1310 pred_motion(h, n, 2, list, ref, mx, my);
1313 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1314 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1315 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1317 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1319 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1320 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1321 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1327 pred_motion(h, 0, 4, 0, 0, mx, my);
1332 static inline void direct_dist_scale_factor(H264Context * const h){
1333 const int poc = h->s.current_picture_ptr->poc;
1334 const int poc1 = h->ref_list[1][0].poc;
1336 for(i=0; i<h->ref_count[0]; i++){
1337 int poc0 = h->ref_list[0][i].poc;
1338 int td = clip(poc1 - poc0, -128, 127);
1339 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1340 h->dist_scale_factor[i] = 256;
1342 int tb = clip(poc - poc0, -128, 127);
1343 int tx = (16384 + (ABS(td) >> 1)) / td;
1344 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1348 for(i=0; i<h->ref_count[0]; i++){
1349 h->dist_scale_factor_field[2*i] =
1350 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
1354 static inline void direct_ref_list_init(H264Context * const h){
1355 MpegEncContext * const s = &h->s;
1356 Picture * const ref1 = &h->ref_list[1][0];
1357 Picture * const cur = s->current_picture_ptr;
1359 if(cur->pict_type == I_TYPE)
1360 cur->ref_count[0] = 0;
1361 if(cur->pict_type != B_TYPE)
1362 cur->ref_count[1] = 0;
1363 for(list=0; list<2; list++){
1364 cur->ref_count[list] = h->ref_count[list];
1365 for(j=0; j<h->ref_count[list]; j++)
1366 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1368 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1370 for(list=0; list<2; list++){
1371 for(i=0; i<ref1->ref_count[list]; i++){
1372 const int poc = ref1->ref_poc[list][i];
1373 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1374 for(j=0; j<h->ref_count[list]; j++)
1375 if(h->ref_list[list][j].poc == poc){
1376 h->map_col_to_list0[list][i] = j;
1382 for(list=0; list<2; list++){
1383 for(i=0; i<ref1->ref_count[list]; i++){
1384 j = h->map_col_to_list0[list][i];
1385 h->map_col_to_list0_field[list][2*i] = 2*j;
1386 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1392 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1393 MpegEncContext * const s = &h->s;
1394 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1395 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1396 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1397 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1398 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1399 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1400 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1401 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1402 const int is_b8x8 = IS_8X8(*mb_type);
1406 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1407 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1408 /* FIXME save sub mb types from previous frames (or derive from MVs)
1409 * so we know exactly what block size to use */
1410 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1411 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1412 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1413 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1414 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1416 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1417 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1420 *mb_type |= MB_TYPE_DIRECT2;
1422 *mb_type |= MB_TYPE_INTERLACED;
1424 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1426 if(h->direct_spatial_mv_pred){
1431 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1433 /* ref = min(neighbors) */
1434 for(list=0; list<2; list++){
1435 int refa = h->ref_cache[list][scan8[0] - 1];
1436 int refb = h->ref_cache[list][scan8[0] - 8];
1437 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1439 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1441 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1443 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1449 if(ref[0] < 0 && ref[1] < 0){
1450 ref[0] = ref[1] = 0;
1451 mv[0][0] = mv[0][1] =
1452 mv[1][0] = mv[1][1] = 0;
1454 for(list=0; list<2; list++){
1456 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1458 mv[list][0] = mv[list][1] = 0;
1463 *mb_type &= ~MB_TYPE_P0L1;
1464 sub_mb_type &= ~MB_TYPE_P0L1;
1465 }else if(ref[0] < 0){
1466 *mb_type &= ~MB_TYPE_P0L0;
1467 sub_mb_type &= ~MB_TYPE_P0L0;
1470 if(IS_16X16(*mb_type)){
1471 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1472 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1473 if(!IS_INTRA(mb_type_col)
1474 && ( (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1)
1475 || (l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1
1476 && (h->x264_build>33 || !h->x264_build)))){
1478 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1480 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1482 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1484 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1486 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1487 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1490 for(i8=0; i8<4; i8++){
1491 const int x8 = i8&1;
1492 const int y8 = i8>>1;
1494 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1496 h->sub_mb_type[i8] = sub_mb_type;
1498 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1499 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1500 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1501 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1504 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1505 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1506 && (h->x264_build>33 || !h->x264_build)))){
1507 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1508 if(IS_SUB_8X8(sub_mb_type)){
1509 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1510 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1512 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1514 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1517 for(i4=0; i4<4; i4++){
1518 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1519 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1521 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1523 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1529 }else{ /* direct temporal mv pred */
1530 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1531 const int *dist_scale_factor = h->dist_scale_factor;
1534 if(IS_INTERLACED(*mb_type)){
1535 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1536 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1537 dist_scale_factor = h->dist_scale_factor_field;
1539 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1540 /* FIXME assumes direct_8x8_inference == 1 */
1541 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1542 int mb_types_col[2];
1545 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1546 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1547 | (*mb_type & MB_TYPE_INTERLACED);
1548 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1550 if(IS_INTERLACED(*mb_type)){
1551 /* frame to field scaling */
1552 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1553 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1555 l1ref0 -= 2*h->b8_stride;
1556 l1ref1 -= 2*h->b8_stride;
1557 l1mv0 -= 4*h->b_stride;
1558 l1mv1 -= 4*h->b_stride;
1562 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1563 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1565 *mb_type |= MB_TYPE_16x8;
1567 *mb_type |= MB_TYPE_8x8;
1569 /* field to frame scaling */
1570 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1571 * but in MBAFF, top and bottom POC are equal */
1572 int dy = (s->mb_y&1) ? 1 : 2;
1574 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1575 l1ref0 += dy*h->b8_stride;
1576 l1ref1 += dy*h->b8_stride;
1577 l1mv0 += 2*dy*h->b_stride;
1578 l1mv1 += 2*dy*h->b_stride;
1581 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1583 *mb_type |= MB_TYPE_16x16;
1585 *mb_type |= MB_TYPE_8x8;
1588 for(i8=0; i8<4; i8++){
1589 const int x8 = i8&1;
1590 const int y8 = i8>>1;
1592 const int16_t (*l1mv)[2]= l1mv0;
1594 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1596 h->sub_mb_type[i8] = sub_mb_type;
1598 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1599 if(IS_INTRA(mb_types_col[y8])){
1600 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1601 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1602 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1606 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1608 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1610 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1613 scale = dist_scale_factor[ref0];
1614 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1617 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1618 int my_col = (mv_col[1]<<y_shift)/2;
1619 int mx = (scale * mv_col[0] + 128) >> 8;
1620 int my = (scale * my_col + 128) >> 8;
1621 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1622 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1629 /* one-to-one mv scaling */
1631 if(IS_16X16(*mb_type)){
1632 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1633 if(IS_INTRA(mb_type_col)){
1634 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1635 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1636 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1638 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1639 : map_col_to_list0[1][l1ref1[0]];
1640 const int scale = dist_scale_factor[ref0];
1641 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1643 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1644 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1645 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1646 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1647 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1650 for(i8=0; i8<4; i8++){
1651 const int x8 = i8&1;
1652 const int y8 = i8>>1;
1654 const int16_t (*l1mv)[2]= l1mv0;
1656 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1658 h->sub_mb_type[i8] = sub_mb_type;
1659 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1660 if(IS_INTRA(mb_type_col)){
1661 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1662 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1663 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1667 ref0 = l1ref0[x8 + y8*h->b8_stride];
1669 ref0 = map_col_to_list0[0][ref0];
1671 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1674 scale = dist_scale_factor[ref0];
1676 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1677 if(IS_SUB_8X8(sub_mb_type)){
1678 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1679 int mx = (scale * mv_col[0] + 128) >> 8;
1680 int my = (scale * mv_col[1] + 128) >> 8;
1681 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1682 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1684 for(i4=0; i4<4; i4++){
1685 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1686 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1687 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1688 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1689 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1690 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1697 static inline void write_back_motion(H264Context *h, int mb_type){
1698 MpegEncContext * const s = &h->s;
1699 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1700 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1703 if(!USES_LIST(mb_type, 0))
1704 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1706 for(list=0; list<2; list++){
1708 if(!USES_LIST(mb_type, list))
1712 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1713 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1715 if( h->pps.cabac ) {
1717 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1718 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1723 uint8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1724 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1725 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1726 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1727 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1731 if(h->slice_type == B_TYPE && h->pps.cabac){
1732 if(IS_8X8(mb_type)){
1733 uint8_t *direct_table = &h->direct_table[b8_xy];
1734 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1735 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1736 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1742 * Decodes a network abstraction layer unit.
1743 * @param consumed is the number of bytes used as input
1744 * @param length is the length of the array
1745 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1746 * @returns decoded bytes, might be src+1 if no escapes
1748 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1752 // src[0]&0x80; //forbidden bit
1753 h->nal_ref_idc= src[0]>>5;
1754 h->nal_unit_type= src[0]&0x1F;
1758 for(i=0; i<length; i++)
1759 printf("%2X ", src[i]);
1761 for(i=0; i+1<length; i+=2){
1762 if(src[i]) continue;
1763 if(i>0 && src[i-1]==0) i--;
1764 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1766 /* startcode, so we must be past the end */
1773 if(i>=length-1){ //no escaped 0
1774 *dst_length= length;
1775 *consumed= length+1; //+1 for the header
1779 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1780 dst= h->rbsp_buffer;
1782 //printf("decoding esc\n");
1785 //remove escapes (very rare 1:2^22)
1786 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1787 if(src[si+2]==3){ //escape
1792 }else //next start code
1796 dst[di++]= src[si++];
1800 *consumed= si + 1;//+1 for the header
1801 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1807 * @param src the data which should be escaped
1808 * @param dst the target buffer, dst+1 == src is allowed as a special case
1809 * @param length the length of the src data
1810 * @param dst_length the length of the dst array
1811 * @returns length of escaped data in bytes or -1 if an error occured
1813 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1814 int i, escape_count, si, di;
1818 assert(dst_length>0);
1820 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1822 if(length==0) return 1;
1825 for(i=0; i<length; i+=2){
1826 if(src[i]) continue;
1827 if(i>0 && src[i-1]==0)
1829 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1835 if(escape_count==0){
1837 memcpy(dst+1, src, length);
1841 if(length + escape_count + 1> dst_length)
1844 //this should be damn rare (hopefully)
1846 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1847 temp= h->rbsp_buffer;
1848 //printf("encoding esc\n");
1853 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1854 temp[di++]= 0; si++;
1855 temp[di++]= 0; si++;
1857 temp[di++]= src[si++];
1860 temp[di++]= src[si++];
1862 memcpy(dst+1, temp, length+escape_count);
1864 assert(di == length+escape_count);
1870 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1872 static void encode_rbsp_trailing(PutBitContext *pb){
1875 length= (-put_bits_count(pb))&7;
1876 if(length) put_bits(pb, length, 0);
1881 * identifies the exact end of the bitstream
1882 * @return the length of the trailing, or 0 if damaged
1884 static int decode_rbsp_trailing(uint8_t *src){
1888 tprintf("rbsp trailing %X\n", v);
1898 * idct tranforms the 16 dc values and dequantize them.
1899 * @param qp quantization parameter
1901 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1904 int temp[16]; //FIXME check if this is a good idea
1905 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1906 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1908 //memset(block, 64, 2*256);
1911 const int offset= y_offset[i];
1912 const int z0= block[offset+stride*0] + block[offset+stride*4];
1913 const int z1= block[offset+stride*0] - block[offset+stride*4];
1914 const int z2= block[offset+stride*1] - block[offset+stride*5];
1915 const int z3= block[offset+stride*1] + block[offset+stride*5];
1924 const int offset= x_offset[i];
1925 const int z0= temp[4*0+i] + temp[4*2+i];
1926 const int z1= temp[4*0+i] - temp[4*2+i];
1927 const int z2= temp[4*1+i] - temp[4*3+i];
1928 const int z3= temp[4*1+i] + temp[4*3+i];
1930 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1931 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1932 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1933 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1939 * dct tranforms the 16 dc values.
1940 * @param qp quantization parameter ??? FIXME
1942 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1943 // const int qmul= dequant_coeff[qp][0];
1945 int temp[16]; //FIXME check if this is a good idea
1946 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1947 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1950 const int offset= y_offset[i];
1951 const int z0= block[offset+stride*0] + block[offset+stride*4];
1952 const int z1= block[offset+stride*0] - block[offset+stride*4];
1953 const int z2= block[offset+stride*1] - block[offset+stride*5];
1954 const int z3= block[offset+stride*1] + block[offset+stride*5];
1963 const int offset= x_offset[i];
1964 const int z0= temp[4*0+i] + temp[4*2+i];
1965 const int z1= temp[4*0+i] - temp[4*2+i];
1966 const int z2= temp[4*1+i] - temp[4*3+i];
1967 const int z3= temp[4*1+i] + temp[4*3+i];
1969 block[stride*0 +offset]= (z0 + z3)>>1;
1970 block[stride*2 +offset]= (z1 + z2)>>1;
1971 block[stride*8 +offset]= (z1 - z2)>>1;
1972 block[stride*10+offset]= (z0 - z3)>>1;
1980 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1981 const int stride= 16*2;
1982 const int xStride= 16;
1985 a= block[stride*0 + xStride*0];
1986 b= block[stride*0 + xStride*1];
1987 c= block[stride*1 + xStride*0];
1988 d= block[stride*1 + xStride*1];
1995 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1996 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1997 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1998 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
2002 static void chroma_dc_dct_c(DCTELEM *block){
2003 const int stride= 16*2;
2004 const int xStride= 16;
2007 a= block[stride*0 + xStride*0];
2008 b= block[stride*0 + xStride*1];
2009 c= block[stride*1 + xStride*0];
2010 d= block[stride*1 + xStride*1];
2017 block[stride*0 + xStride*0]= (a+c);
2018 block[stride*0 + xStride*1]= (e+b);
2019 block[stride*1 + xStride*0]= (a-c);
2020 block[stride*1 + xStride*1]= (e-b);
2025 * gets the chroma qp.
2027 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
2029 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
2034 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
2036 //FIXME try int temp instead of block
2039 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
2040 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
2041 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
2042 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
2043 const int z0= d0 + d3;
2044 const int z3= d0 - d3;
2045 const int z1= d1 + d2;
2046 const int z2= d1 - d2;
2048 block[0 + 4*i]= z0 + z1;
2049 block[1 + 4*i]= 2*z3 + z2;
2050 block[2 + 4*i]= z0 - z1;
2051 block[3 + 4*i]= z3 - 2*z2;
2055 const int z0= block[0*4 + i] + block[3*4 + i];
2056 const int z3= block[0*4 + i] - block[3*4 + i];
2057 const int z1= block[1*4 + i] + block[2*4 + i];
2058 const int z2= block[1*4 + i] - block[2*4 + i];
2060 block[0*4 + i]= z0 + z1;
2061 block[1*4 + i]= 2*z3 + z2;
2062 block[2*4 + i]= z0 - z1;
2063 block[3*4 + i]= z3 - 2*z2;
2068 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
2069 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
2070 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
2072 const int * const quant_table= quant_coeff[qscale];
2073 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
2074 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
2075 const unsigned int threshold2= (threshold1<<1);
2081 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
2082 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
2083 const unsigned int dc_threshold2= (dc_threshold1<<1);
2085 int level= block[0]*quant_coeff[qscale+18][0];
2086 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2088 level= (dc_bias + level)>>(QUANT_SHIFT-2);
2091 level= (dc_bias - level)>>(QUANT_SHIFT-2);
2094 // last_non_zero = i;
2099 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
2100 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
2101 const unsigned int dc_threshold2= (dc_threshold1<<1);
2103 int level= block[0]*quant_table[0];
2104 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2106 level= (dc_bias + level)>>(QUANT_SHIFT+1);
2109 level= (dc_bias - level)>>(QUANT_SHIFT+1);
2112 // last_non_zero = i;
2125 const int j= scantable[i];
2126 int level= block[j]*quant_table[j];
2128 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
2129 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
2130 if(((unsigned)(level+threshold1))>threshold2){
2132 level= (bias + level)>>QUANT_SHIFT;
2135 level= (bias - level)>>QUANT_SHIFT;
2144 return last_non_zero;
2147 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
2148 const uint32_t a= ((uint32_t*)(src-stride))[0];
2149 ((uint32_t*)(src+0*stride))[0]= a;
2150 ((uint32_t*)(src+1*stride))[0]= a;
2151 ((uint32_t*)(src+2*stride))[0]= a;
2152 ((uint32_t*)(src+3*stride))[0]= a;
2155 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
2156 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
2157 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
2158 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
2159 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
2162 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
2163 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
2164 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
2166 ((uint32_t*)(src+0*stride))[0]=
2167 ((uint32_t*)(src+1*stride))[0]=
2168 ((uint32_t*)(src+2*stride))[0]=
2169 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2172 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
2173 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
2175 ((uint32_t*)(src+0*stride))[0]=
2176 ((uint32_t*)(src+1*stride))[0]=
2177 ((uint32_t*)(src+2*stride))[0]=
2178 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2181 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
2182 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
2184 ((uint32_t*)(src+0*stride))[0]=
2185 ((uint32_t*)(src+1*stride))[0]=
2186 ((uint32_t*)(src+2*stride))[0]=
2187 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2190 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
2191 ((uint32_t*)(src+0*stride))[0]=
2192 ((uint32_t*)(src+1*stride))[0]=
2193 ((uint32_t*)(src+2*stride))[0]=
2194 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
2198 #define LOAD_TOP_RIGHT_EDGE\
2199 const int t4= topright[0];\
2200 const int t5= topright[1];\
2201 const int t6= topright[2];\
2202 const int t7= topright[3];\
2204 #define LOAD_LEFT_EDGE\
2205 const int l0= src[-1+0*stride];\
2206 const int l1= src[-1+1*stride];\
2207 const int l2= src[-1+2*stride];\
2208 const int l3= src[-1+3*stride];\
2210 #define LOAD_TOP_EDGE\
2211 const int t0= src[ 0-1*stride];\
2212 const int t1= src[ 1-1*stride];\
2213 const int t2= src[ 2-1*stride];\
2214 const int t3= src[ 3-1*stride];\
2216 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
2217 const int lt= src[-1-1*stride];
2221 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
2223 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
2226 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
2230 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2233 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
2235 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2236 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2239 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
2244 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
2246 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
2249 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
2253 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
2256 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
2258 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
2259 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
2262 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
2263 const int lt= src[-1-1*stride];
2266 const __attribute__((unused)) int unu= l3;
2269 src[1+2*stride]=(lt + t0 + 1)>>1;
2271 src[2+2*stride]=(t0 + t1 + 1)>>1;
2273 src[3+2*stride]=(t1 + t2 + 1)>>1;
2274 src[3+0*stride]=(t2 + t3 + 1)>>1;
2276 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2278 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2280 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2281 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2282 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2283 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2286 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2289 const __attribute__((unused)) int unu= t7;
2291 src[0+0*stride]=(t0 + t1 + 1)>>1;
2293 src[0+2*stride]=(t1 + t2 + 1)>>1;
2295 src[1+2*stride]=(t2 + t3 + 1)>>1;
2297 src[2+2*stride]=(t3 + t4+ 1)>>1;
2298 src[3+2*stride]=(t4 + t5+ 1)>>1;
2299 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2301 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2303 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2305 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2306 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2309 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2312 src[0+0*stride]=(l0 + l1 + 1)>>1;
2313 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2315 src[0+1*stride]=(l1 + l2 + 1)>>1;
2317 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2319 src[0+2*stride]=(l2 + l3 + 1)>>1;
2321 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2330 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2331 const int lt= src[-1-1*stride];
2334 const __attribute__((unused)) int unu= t3;
2337 src[2+1*stride]=(lt + l0 + 1)>>1;
2339 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2340 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2341 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2343 src[2+2*stride]=(l0 + l1 + 1)>>1;
2345 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2347 src[2+3*stride]=(l1 + l2+ 1)>>1;
2349 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2350 src[0+3*stride]=(l2 + l3 + 1)>>1;
2351 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2354 static void pred16x16_vertical_c(uint8_t *src, int stride){
2356 const uint32_t a= ((uint32_t*)(src-stride))[0];
2357 const uint32_t b= ((uint32_t*)(src-stride))[1];
2358 const uint32_t c= ((uint32_t*)(src-stride))[2];
2359 const uint32_t d= ((uint32_t*)(src-stride))[3];
2361 for(i=0; i<16; i++){
2362 ((uint32_t*)(src+i*stride))[0]= a;
2363 ((uint32_t*)(src+i*stride))[1]= b;
2364 ((uint32_t*)(src+i*stride))[2]= c;
2365 ((uint32_t*)(src+i*stride))[3]= d;
2369 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2372 for(i=0; i<16; i++){
2373 ((uint32_t*)(src+i*stride))[0]=
2374 ((uint32_t*)(src+i*stride))[1]=
2375 ((uint32_t*)(src+i*stride))[2]=
2376 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2380 static void pred16x16_dc_c(uint8_t *src, int stride){
2384 dc+= src[-1+i*stride];
2391 dc= 0x01010101*((dc + 16)>>5);
2393 for(i=0; i<16; i++){
2394 ((uint32_t*)(src+i*stride))[0]=
2395 ((uint32_t*)(src+i*stride))[1]=
2396 ((uint32_t*)(src+i*stride))[2]=
2397 ((uint32_t*)(src+i*stride))[3]= dc;
2401 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2405 dc+= src[-1+i*stride];
2408 dc= 0x01010101*((dc + 8)>>4);
2410 for(i=0; i<16; i++){
2411 ((uint32_t*)(src+i*stride))[0]=
2412 ((uint32_t*)(src+i*stride))[1]=
2413 ((uint32_t*)(src+i*stride))[2]=
2414 ((uint32_t*)(src+i*stride))[3]= dc;
2418 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2424 dc= 0x01010101*((dc + 8)>>4);
2426 for(i=0; i<16; i++){
2427 ((uint32_t*)(src+i*stride))[0]=
2428 ((uint32_t*)(src+i*stride))[1]=
2429 ((uint32_t*)(src+i*stride))[2]=
2430 ((uint32_t*)(src+i*stride))[3]= dc;
2434 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2437 for(i=0; i<16; i++){
2438 ((uint32_t*)(src+i*stride))[0]=
2439 ((uint32_t*)(src+i*stride))[1]=
2440 ((uint32_t*)(src+i*stride))[2]=
2441 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2445 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2448 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2449 const uint8_t * const src0 = src+7-stride;
2450 const uint8_t *src1 = src+8*stride-1;
2451 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2452 int H = src0[1] - src0[-1];
2453 int V = src1[0] - src2[ 0];
2454 for(k=2; k<=8; ++k) {
2455 src1 += stride; src2 -= stride;
2456 H += k*(src0[k] - src0[-k]);
2457 V += k*(src1[0] - src2[ 0]);
2460 H = ( 5*(H/4) ) / 16;
2461 V = ( 5*(V/4) ) / 16;
2463 /* required for 100% accuracy */
2464 i = H; H = V; V = i;
2466 H = ( 5*H+32 ) >> 6;
2467 V = ( 5*V+32 ) >> 6;
2470 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2471 for(j=16; j>0; --j) {
2474 for(i=-16; i<0; i+=4) {
2475 src[16+i] = cm[ (b ) >> 5 ];
2476 src[17+i] = cm[ (b+ H) >> 5 ];
2477 src[18+i] = cm[ (b+2*H) >> 5 ];
2478 src[19+i] = cm[ (b+3*H) >> 5 ];
2485 static void pred16x16_plane_c(uint8_t *src, int stride){
2486 pred16x16_plane_compat_c(src, stride, 0);
2489 static void pred8x8_vertical_c(uint8_t *src, int stride){
2491 const uint32_t a= ((uint32_t*)(src-stride))[0];
2492 const uint32_t b= ((uint32_t*)(src-stride))[1];
2495 ((uint32_t*)(src+i*stride))[0]= a;
2496 ((uint32_t*)(src+i*stride))[1]= b;
2500 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2504 ((uint32_t*)(src+i*stride))[0]=
2505 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2509 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2513 ((uint32_t*)(src+i*stride))[0]=
2514 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2518 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2524 dc0+= src[-1+i*stride];
2525 dc2+= src[-1+(i+4)*stride];
2527 dc0= 0x01010101*((dc0 + 2)>>2);
2528 dc2= 0x01010101*((dc2 + 2)>>2);
2531 ((uint32_t*)(src+i*stride))[0]=
2532 ((uint32_t*)(src+i*stride))[1]= dc0;
2535 ((uint32_t*)(src+i*stride))[0]=
2536 ((uint32_t*)(src+i*stride))[1]= dc2;
2540 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2546 dc0+= src[i-stride];
2547 dc1+= src[4+i-stride];
2549 dc0= 0x01010101*((dc0 + 2)>>2);
2550 dc1= 0x01010101*((dc1 + 2)>>2);
2553 ((uint32_t*)(src+i*stride))[0]= dc0;
2554 ((uint32_t*)(src+i*stride))[1]= dc1;
2557 ((uint32_t*)(src+i*stride))[0]= dc0;
2558 ((uint32_t*)(src+i*stride))[1]= dc1;
2563 static void pred8x8_dc_c(uint8_t *src, int stride){
2565 int dc0, dc1, dc2, dc3;
2569 dc0+= src[-1+i*stride] + src[i-stride];
2570 dc1+= src[4+i-stride];
2571 dc2+= src[-1+(i+4)*stride];
2573 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2574 dc0= 0x01010101*((dc0 + 4)>>3);
2575 dc1= 0x01010101*((dc1 + 2)>>2);
2576 dc2= 0x01010101*((dc2 + 2)>>2);
2579 ((uint32_t*)(src+i*stride))[0]= dc0;
2580 ((uint32_t*)(src+i*stride))[1]= dc1;
2583 ((uint32_t*)(src+i*stride))[0]= dc2;
2584 ((uint32_t*)(src+i*stride))[1]= dc3;
2588 static void pred8x8_plane_c(uint8_t *src, int stride){
2591 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2592 const uint8_t * const src0 = src+3-stride;
2593 const uint8_t *src1 = src+4*stride-1;
2594 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2595 int H = src0[1] - src0[-1];
2596 int V = src1[0] - src2[ 0];
2597 for(k=2; k<=4; ++k) {
2598 src1 += stride; src2 -= stride;
2599 H += k*(src0[k] - src0[-k]);
2600 V += k*(src1[0] - src2[ 0]);
2602 H = ( 17*H+16 ) >> 5;
2603 V = ( 17*V+16 ) >> 5;
2605 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2606 for(j=8; j>0; --j) {
2609 src[0] = cm[ (b ) >> 5 ];
2610 src[1] = cm[ (b+ H) >> 5 ];
2611 src[2] = cm[ (b+2*H) >> 5 ];
2612 src[3] = cm[ (b+3*H) >> 5 ];
2613 src[4] = cm[ (b+4*H) >> 5 ];
2614 src[5] = cm[ (b+5*H) >> 5 ];
2615 src[6] = cm[ (b+6*H) >> 5 ];
2616 src[7] = cm[ (b+7*H) >> 5 ];
2621 #define SRC(x,y) src[(x)+(y)*stride]
2623 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2624 #define PREDICT_8x8_LOAD_LEFT \
2625 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2626 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2627 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2628 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2631 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2632 #define PREDICT_8x8_LOAD_TOP \
2633 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2634 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2635 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2636 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2637 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2640 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2641 #define PREDICT_8x8_LOAD_TOPRIGHT \
2642 int t8, t9, t10, t11, t12, t13, t14, t15; \
2643 if(has_topright) { \
2644 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2645 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2646 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2648 #define PREDICT_8x8_LOAD_TOPLEFT \
2649 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2651 #define PREDICT_8x8_DC(v) \
2653 for( y = 0; y < 8; y++ ) { \
2654 ((uint32_t*)src)[0] = \
2655 ((uint32_t*)src)[1] = v; \
2659 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2661 PREDICT_8x8_DC(0x80808080);
2663 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2665 PREDICT_8x8_LOAD_LEFT;
2666 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2669 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2671 PREDICT_8x8_LOAD_TOP;
2672 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2675 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2677 PREDICT_8x8_LOAD_LEFT;
2678 PREDICT_8x8_LOAD_TOP;
2679 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2680 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2683 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2685 PREDICT_8x8_LOAD_LEFT;
2686 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2687 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2688 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2691 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2694 PREDICT_8x8_LOAD_TOP;
2703 for( y = 1; y < 8; y++ )
2704 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2706 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2708 PREDICT_8x8_LOAD_TOP;
2709 PREDICT_8x8_LOAD_TOPRIGHT;
2710 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2711 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2712 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2713 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2714 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2715 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2716 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2717 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2718 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2719 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2720 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2721 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2722 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2723 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2724 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2726 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2728 PREDICT_8x8_LOAD_TOP;
2729 PREDICT_8x8_LOAD_LEFT;
2730 PREDICT_8x8_LOAD_TOPLEFT;
2731 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2732 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2733 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2734 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2735 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2736 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2737 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2738 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2739 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2740 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2741 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2742 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2743 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2744 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2745 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2748 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2750 PREDICT_8x8_LOAD_TOP;
2751 PREDICT_8x8_LOAD_LEFT;
2752 PREDICT_8x8_LOAD_TOPLEFT;
2753 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2754 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2755 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2756 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2757 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2758 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2759 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2760 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2761 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2762 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2763 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2764 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2765 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2766 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2767 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2768 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2769 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2770 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2771 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2772 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2773 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2774 SRC(7,0)= (t6 + t7 + 1) >> 1;
2776 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2778 PREDICT_8x8_LOAD_TOP;
2779 PREDICT_8x8_LOAD_LEFT;
2780 PREDICT_8x8_LOAD_TOPLEFT;
2781 SRC(0,7)= (l6 + l7 + 1) >> 1;
2782 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2783 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2784 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2785 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2786 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2787 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2788 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2789 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2790 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2791 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2792 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2793 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2794 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2795 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2796 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2797 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2798 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2799 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2800 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2801 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2802 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2804 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2806 PREDICT_8x8_LOAD_TOP;
2807 PREDICT_8x8_LOAD_TOPRIGHT;
2808 SRC(0,0)= (t0 + t1 + 1) >> 1;
2809 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2810 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2811 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2812 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2813 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2814 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2815 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2816 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2817 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2818 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2819 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2820 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2821 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2822 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2823 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2824 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2825 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2826 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2827 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2828 SRC(7,6)= (t10 + t11 + 1) >> 1;
2829 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2831 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2833 PREDICT_8x8_LOAD_LEFT;
2834 SRC(0,0)= (l0 + l1 + 1) >> 1;
2835 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2836 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2837 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2838 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2839 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2840 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2841 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2842 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2843 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2844 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2845 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2846 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2847 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2848 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2849 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2850 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2851 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2853 #undef PREDICT_8x8_LOAD_LEFT
2854 #undef PREDICT_8x8_LOAD_TOP
2855 #undef PREDICT_8x8_LOAD_TOPLEFT
2856 #undef PREDICT_8x8_LOAD_TOPRIGHT
2857 #undef PREDICT_8x8_DC
2863 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2864 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2865 int src_x_offset, int src_y_offset,
2866 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2867 MpegEncContext * const s = &h->s;
2868 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2869 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2870 const int luma_xy= (mx&3) + ((my&3)<<2);
2871 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2872 uint8_t * src_cb, * src_cr;
2873 int extra_width= h->emu_edge_width;
2874 int extra_height= h->emu_edge_height;
2876 const int full_mx= mx>>2;
2877 const int full_my= my>>2;
2878 const int pic_width = 16*s->mb_width;
2879 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2884 if(mx&7) extra_width -= 3;
2885 if(my&7) extra_height -= 3;
2887 if( full_mx < 0-extra_width
2888 || full_my < 0-extra_height
2889 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2890 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2891 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2892 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2896 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2898 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2901 if(s->flags&CODEC_FLAG_GRAY) return;
2904 // chroma offset when predicting from a field of opposite parity
2905 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2906 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2908 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2909 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2912 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2913 src_cb= s->edge_emu_buffer;
2915 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2918 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2919 src_cr= s->edge_emu_buffer;
2921 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2924 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2925 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2926 int x_offset, int y_offset,
2927 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2928 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2929 int list0, int list1){
2930 MpegEncContext * const s = &h->s;
2931 qpel_mc_func *qpix_op= qpix_put;
2932 h264_chroma_mc_func chroma_op= chroma_put;
2934 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2935 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2936 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2937 x_offset += 8*s->mb_x;
2938 y_offset += 8*(s->mb_y >> MB_MBAFF);
2941 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2942 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2943 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2944 qpix_op, chroma_op);
2947 chroma_op= chroma_avg;
2951 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2952 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2953 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2954 qpix_op, chroma_op);
2958 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2959 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2960 int x_offset, int y_offset,
2961 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2962 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2963 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2964 int list0, int list1){
2965 MpegEncContext * const s = &h->s;
2967 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2968 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2969 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2970 x_offset += 8*s->mb_x;
2971 y_offset += 8*(s->mb_y >> MB_MBAFF);
2974 /* don't optimize for luma-only case, since B-frames usually
2975 * use implicit weights => chroma too. */
2976 uint8_t *tmp_cb = s->obmc_scratchpad;
2977 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2978 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2979 int refn0 = h->ref_cache[0][ scan8[n] ];
2980 int refn1 = h->ref_cache[1][ scan8[n] ];
2982 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2983 dest_y, dest_cb, dest_cr,
2984 x_offset, y_offset, qpix_put, chroma_put);
2985 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2986 tmp_y, tmp_cb, tmp_cr,
2987 x_offset, y_offset, qpix_put, chroma_put);
2989 if(h->use_weight == 2){
2990 int weight0 = h->implicit_weight[refn0][refn1];
2991 int weight1 = 64 - weight0;
2992 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2993 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
2994 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
2996 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
2997 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2998 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
2999 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3000 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
3001 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
3002 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3003 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
3004 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
3007 int list = list1 ? 1 : 0;
3008 int refn = h->ref_cache[list][ scan8[n] ];
3009 Picture *ref= &h->ref_list[list][refn];
3010 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
3011 dest_y, dest_cb, dest_cr, x_offset, y_offset,
3012 qpix_put, chroma_put);
3014 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
3015 h->luma_weight[list][refn], h->luma_offset[list][refn]);
3016 if(h->use_weight_chroma){
3017 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3018 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
3019 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3020 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
3025 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
3026 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3027 int x_offset, int y_offset,
3028 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
3029 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
3030 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
3031 int list0, int list1){
3032 if((h->use_weight==2 && list0 && list1
3033 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
3034 || h->use_weight==1)
3035 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3036 x_offset, y_offset, qpix_put, chroma_put,
3037 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
3039 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3040 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
3043 static inline void prefetch_motion(H264Context *h, int list){
3044 /* fetch pixels for estimated mv 4 macroblocks ahead
3045 * optimized for 64byte cache lines */
3046 MpegEncContext * const s = &h->s;
3047 const int refn = h->ref_cache[list][scan8[0]];
3049 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
3050 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
3051 uint8_t **src= h->ref_list[list][refn].data;
3052 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
3053 s->dsp.prefetch(src[0]+off, s->linesize, 4);
3054 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3055 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
3059 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3060 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
3061 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
3062 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
3063 MpegEncContext * const s = &h->s;
3064 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
3065 const int mb_type= s->current_picture.mb_type[mb_xy];
3067 assert(IS_INTER(mb_type));
3069 prefetch_motion(h, 0);
3071 if(IS_16X16(mb_type)){
3072 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
3073 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
3074 &weight_op[0], &weight_avg[0],
3075 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3076 }else if(IS_16X8(mb_type)){
3077 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
3078 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3079 &weight_op[1], &weight_avg[1],
3080 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3081 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
3082 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3083 &weight_op[1], &weight_avg[1],
3084 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3085 }else if(IS_8X16(mb_type)){
3086 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
3087 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3088 &weight_op[2], &weight_avg[2],
3089 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3090 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
3091 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3092 &weight_op[2], &weight_avg[2],
3093 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3097 assert(IS_8X8(mb_type));
3100 const int sub_mb_type= h->sub_mb_type[i];
3102 int x_offset= (i&1)<<2;
3103 int y_offset= (i&2)<<1;
3105 if(IS_SUB_8X8(sub_mb_type)){
3106 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3107 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3108 &weight_op[3], &weight_avg[3],
3109 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3110 }else if(IS_SUB_8X4(sub_mb_type)){
3111 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3112 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3113 &weight_op[4], &weight_avg[4],
3114 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3115 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
3116 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3117 &weight_op[4], &weight_avg[4],
3118 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3119 }else if(IS_SUB_4X8(sub_mb_type)){
3120 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3121 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3122 &weight_op[5], &weight_avg[5],
3123 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3124 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
3125 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3126 &weight_op[5], &weight_avg[5],
3127 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3130 assert(IS_SUB_4X4(sub_mb_type));
3132 int sub_x_offset= x_offset + 2*(j&1);
3133 int sub_y_offset= y_offset + (j&2);
3134 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
3135 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3136 &weight_op[6], &weight_avg[6],
3137 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3143 prefetch_motion(h, 1);
3146 static void decode_init_vlc(H264Context *h){
3147 static int done = 0;
3153 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
3154 &chroma_dc_coeff_token_len [0], 1, 1,
3155 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
3158 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
3159 &coeff_token_len [i][0], 1, 1,
3160 &coeff_token_bits[i][0], 1, 1, 1);
3164 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
3165 &chroma_dc_total_zeros_len [i][0], 1, 1,
3166 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
3168 for(i=0; i<15; i++){
3169 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
3170 &total_zeros_len [i][0], 1, 1,
3171 &total_zeros_bits[i][0], 1, 1, 1);
3175 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
3176 &run_len [i][0], 1, 1,
3177 &run_bits[i][0], 1, 1, 1);
3179 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
3180 &run_len [6][0], 1, 1,
3181 &run_bits[6][0], 1, 1, 1);
3186 * Sets the intra prediction function pointers.
3188 static void init_pred_ptrs(H264Context *h){
3189 // MpegEncContext * const s = &h->s;
3191 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
3192 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
3193 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
3194 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
3195 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
3196 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
3197 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
3198 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
3199 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
3200 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
3201 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
3202 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
3204 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
3205 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
3206 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
3207 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
3208 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
3209 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
3210 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
3211 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
3212 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
3213 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
3214 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
3215 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
3217 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
3218 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
3219 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
3220 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
3221 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
3222 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
3223 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
3225 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
3226 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
3227 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
3228 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
3229 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
3230 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
3231 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
3234 static void free_tables(H264Context *h){
3235 av_freep(&h->intra4x4_pred_mode);
3236 av_freep(&h->chroma_pred_mode_table);
3237 av_freep(&h->cbp_table);
3238 av_freep(&h->mvd_table[0]);
3239 av_freep(&h->mvd_table[1]);
3240 av_freep(&h->direct_table);
3241 av_freep(&h->non_zero_count);
3242 av_freep(&h->slice_table_base);
3243 av_freep(&h->top_borders[1]);
3244 av_freep(&h->top_borders[0]);
3245 h->slice_table= NULL;
3247 av_freep(&h->mb2b_xy);
3248 av_freep(&h->mb2b8_xy);
3250 av_freep(&h->s.obmc_scratchpad);
3253 static void init_dequant8_coeff_table(H264Context *h){
3255 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
3256 h->dequant8_coeff[0] = h->dequant8_buffer[0];
3257 h->dequant8_coeff[1] = h->dequant8_buffer[1];
3259 for(i=0; i<2; i++ ){
3260 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
3261 h->dequant8_coeff[1] = h->dequant8_buffer[0];
3265 for(q=0; q<52; q++){
3266 int shift = div6[q];
3269 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
3270 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
3271 h->pps.scaling_matrix8[i][x]) << shift;
3276 static void init_dequant4_coeff_table(H264Context *h){
3278 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
3279 for(i=0; i<6; i++ ){
3280 h->dequant4_coeff[i] = h->dequant4_buffer[i];
3282 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
3283 h->dequant4_coeff[i] = h->dequant4_buffer[j];
3290 for(q=0; q<52; q++){
3291 int shift = div6[q] + 2;
3294 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
3295 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
3296 h->pps.scaling_matrix4[i][x]) << shift;
3301 static void init_dequant_tables(H264Context *h){
3303 init_dequant4_coeff_table(h);
3304 if(h->pps.transform_8x8_mode)
3305 init_dequant8_coeff_table(h);
3306 if(h->sps.transform_bypass){
3309 h->dequant4_coeff[i][0][x] = 1<<6;
3310 if(h->pps.transform_8x8_mode)
3313 h->dequant8_coeff[i][0][x] = 1<<6;
3320 * needs width/height
3322 static int alloc_tables(H264Context *h){
3323 MpegEncContext * const s = &h->s;
3324 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3327 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3329 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3330 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
3331 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3332 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3333 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3335 if( h->pps.cabac ) {
3336 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3337 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3338 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3339 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3342 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
3343 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
3345 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3346 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3347 for(y=0; y<s->mb_height; y++){
3348 for(x=0; x<s->mb_width; x++){
3349 const int mb_xy= x + y*s->mb_stride;
3350 const int b_xy = 4*x + 4*y*h->b_stride;
3351 const int b8_xy= 2*x + 2*y*h->b8_stride;
3353 h->mb2b_xy [mb_xy]= b_xy;
3354 h->mb2b8_xy[mb_xy]= b8_xy;
3358 s->obmc_scratchpad = NULL;
3360 if(!h->dequant4_coeff[0])
3361 init_dequant_tables(h);
3369 static void common_init(H264Context *h){
3370 MpegEncContext * const s = &h->s;
3372 s->width = s->avctx->width;
3373 s->height = s->avctx->height;
3374 s->codec_id= s->avctx->codec->id;
3378 h->dequant_coeff_pps= -1;
3379 s->unrestricted_mv=1;
3380 s->decode=1; //FIXME
3382 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3383 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3386 static int decode_init(AVCodecContext *avctx){
3387 H264Context *h= avctx->priv_data;
3388 MpegEncContext * const s = &h->s;
3390 MPV_decode_defaults(s);
3395 s->out_format = FMT_H264;
3396 s->workaround_bugs= avctx->workaround_bugs;
3399 // s->decode_mb= ff_h263_decode_mb;
3401 avctx->pix_fmt= PIX_FMT_YUV420P;
3405 if(avctx->extradata_size > 0 && avctx->extradata &&
3406 *(char *)avctx->extradata == 1){
3416 static int frame_start(H264Context *h){
3417 MpegEncContext * const s = &h->s;
3420 if(MPV_frame_start(s, s->avctx) < 0)
3422 ff_er_frame_start(s);
3424 assert(s->linesize && s->uvlinesize);
3426 for(i=0; i<16; i++){
3427 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3428 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3431 h->block_offset[16+i]=
3432 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3433 h->block_offset[24+16+i]=
3434 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3437 /* can't be in alloc_tables because linesize isn't known there.
3438 * FIXME: redo bipred weight to not require extra buffer? */
3439 if(!s->obmc_scratchpad)
3440 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
3442 /* some macroblocks will be accessed before they're available */
3444 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
3446 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3450 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3451 MpegEncContext * const s = &h->s;
3455 src_cb -= uvlinesize;
3456 src_cr -= uvlinesize;
3458 // There are two lines saved, the line above the the top macroblock of a pair,
3459 // and the line above the bottom macroblock
3460 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3461 for(i=1; i<17; i++){
3462 h->left_border[i]= src_y[15+i* linesize];
3465 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3466 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3468 if(!(s->flags&CODEC_FLAG_GRAY)){
3469 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3470 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3472 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3473 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3475 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3476 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3480 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3481 MpegEncContext * const s = &h->s;
3484 int deblock_left = (s->mb_x > 0);
3485 int deblock_top = (s->mb_y > 0);
3487 src_y -= linesize + 1;
3488 src_cb -= uvlinesize + 1;
3489 src_cr -= uvlinesize + 1;
3491 #define XCHG(a,b,t,xchg)\
3498 for(i = !deblock_top; i<17; i++){
3499 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3504 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3505 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3506 if(s->mb_x+1 < s->mb_width){
3507 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3511 if(!(s->flags&CODEC_FLAG_GRAY)){
3513 for(i = !deblock_top; i<9; i++){
3514 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3515 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3519 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3520 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3525 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3526 MpegEncContext * const s = &h->s;
3529 src_y -= 2 * linesize;
3530 src_cb -= 2 * uvlinesize;
3531 src_cr -= 2 * uvlinesize;
3533 // There are two lines saved, the line above the the top macroblock of a pair,
3534 // and the line above the bottom macroblock
3535 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3536 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3537 for(i=2; i<34; i++){
3538 h->left_border[i]= src_y[15+i* linesize];
3541 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3542 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3543 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3544 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3546 if(!(s->flags&CODEC_FLAG_GRAY)){
3547 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3548 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3549 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3550 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3551 for(i=2; i<18; i++){
3552 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3553 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3555 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3556 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3557 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3558 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3562 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3563 MpegEncContext * const s = &h->s;
3566 int deblock_left = (s->mb_x > 0);
3567 int deblock_top = (s->mb_y > 1);
3569 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3571 src_y -= 2 * linesize + 1;
3572 src_cb -= 2 * uvlinesize + 1;
3573 src_cr -= 2 * uvlinesize + 1;
3575 #define XCHG(a,b,t,xchg)\
3582 for(i = (!deblock_top)<<1; i<34; i++){
3583 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3588 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3589 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3590 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3591 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3592 if(s->mb_x+1 < s->mb_width){
3593 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3594 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3598 if(!(s->flags&CODEC_FLAG_GRAY)){
3600 for(i = (!deblock_top) << 1; i<18; i++){
3601 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3602 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3606 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3607 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3608 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3609 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3614 static void hl_decode_mb(H264Context *h){
3615 MpegEncContext * const s = &h->s;
3616 const int mb_x= s->mb_x;
3617 const int mb_y= s->mb_y;
3618 const int mb_xy= mb_x + mb_y*s->mb_stride;
3619 const int mb_type= s->current_picture.mb_type[mb_xy];
3620 uint8_t *dest_y, *dest_cb, *dest_cr;
3621 int linesize, uvlinesize /*dct_offset*/;
3623 int *block_offset = &h->block_offset[0];
3624 const unsigned int bottom = mb_y & 1;
3625 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3626 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3627 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3632 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3633 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3634 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3637 linesize = h->mb_linesize = s->linesize * 2;
3638 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3639 block_offset = &h->block_offset[24];
3640 if(mb_y&1){ //FIXME move out of this func?
3641 dest_y -= s->linesize*15;
3642 dest_cb-= s->uvlinesize*7;
3643 dest_cr-= s->uvlinesize*7;
3647 for(list=0; list<2; list++){
3648 if(!USES_LIST(mb_type, list))
3650 if(IS_16X16(mb_type)){
3651 int8_t *ref = &h->ref_cache[list][scan8[0]];
3652 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3654 for(i=0; i<16; i+=4){
3655 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3656 int ref = h->ref_cache[list][scan8[i]];
3658 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3664 linesize = h->mb_linesize = s->linesize;
3665 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3666 // dct_offset = s->linesize * 16;
3669 if(transform_bypass){
3671 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3672 }else if(IS_8x8DCT(mb_type)){
3673 idct_dc_add = s->dsp.h264_idct8_dc_add;
3674 idct_add = s->dsp.h264_idct8_add;
3676 idct_dc_add = s->dsp.h264_idct_dc_add;
3677 idct_add = s->dsp.h264_idct_add;
3680 if(FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3681 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3682 int mbt_y = mb_y&~1;
3683 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3684 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3685 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3686 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3689 if (IS_INTRA_PCM(mb_type)) {
3692 // The pixels are stored in h->mb array in the same order as levels,
3693 // copy them in output in the correct order.
3694 for(i=0; i<16; i++) {
3695 for (y=0; y<4; y++) {
3696 for (x=0; x<4; x++) {
3697 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3701 for(i=16; i<16+4; i++) {
3702 for (y=0; y<4; y++) {
3703 for (x=0; x<4; x++) {
3704 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3708 for(i=20; i<20+4; i++) {
3709 for (y=0; y<4; y++) {
3710 for (x=0; x<4; x++) {
3711 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3716 if(IS_INTRA(mb_type)){
3717 if(h->deblocking_filter && !FRAME_MBAFF)
3718 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3720 if(!(s->flags&CODEC_FLAG_GRAY)){
3721 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3722 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3725 if(IS_INTRA4x4(mb_type)){
3727 if(IS_8x8DCT(mb_type)){
3728 for(i=0; i<16; i+=4){
3729 uint8_t * const ptr= dest_y + block_offset[i];
3730 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3731 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3732 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3733 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3735 if(nnz == 1 && h->mb[i*16])
3736 idct_dc_add(ptr, h->mb + i*16, linesize);
3738 idct_add(ptr, h->mb + i*16, linesize);
3742 for(i=0; i<16; i++){
3743 uint8_t * const ptr= dest_y + block_offset[i];
3745 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3748 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3749 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3750 assert(mb_y || linesize <= block_offset[i]);
3751 if(!topright_avail){
3752 tr= ptr[3 - linesize]*0x01010101;
3753 topright= (uint8_t*) &tr;
3755 topright= ptr + 4 - linesize;
3759 h->pred4x4[ dir ](ptr, topright, linesize);
3760 nnz = h->non_zero_count_cache[ scan8[i] ];
3762 if(s->codec_id == CODEC_ID_H264){
3763 if(nnz == 1 && h->mb[i*16])
3764 idct_dc_add(ptr, h->mb + i*16, linesize);
3766 idct_add(ptr, h->mb + i*16, linesize);
3768 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3773 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3774 if(s->codec_id == CODEC_ID_H264){
3775 if(!transform_bypass)
3776 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3778 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3780 if(h->deblocking_filter && !FRAME_MBAFF)
3781 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3782 }else if(s->codec_id == CODEC_ID_H264){
3783 hl_motion(h, dest_y, dest_cb, dest_cr,
3784 s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
3785 s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
3786 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3790 if(!IS_INTRA4x4(mb_type)){
3791 if(s->codec_id == CODEC_ID_H264){
3792 if(IS_INTRA16x16(mb_type)){
3793 for(i=0; i<16; i++){
3794 if(h->non_zero_count_cache[ scan8[i] ])
3795 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3796 else if(h->mb[i*16])
3797 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3800 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3801 for(i=0; i<16; i+=di){
3802 int nnz = h->non_zero_count_cache[ scan8[i] ];
3804 if(nnz==1 && h->mb[i*16])
3805 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3807 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3812 for(i=0; i<16; i++){
3813 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3814 uint8_t * const ptr= dest_y + block_offset[i];
3815 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3821 if(!(s->flags&CODEC_FLAG_GRAY)){
3822 uint8_t *dest[2] = {dest_cb, dest_cr};
3823 if(transform_bypass){
3824 idct_add = idct_dc_add = s->dsp.add_pixels4;
3826 idct_add = s->dsp.h264_idct_add;
3827 idct_dc_add = s->dsp.h264_idct_dc_add;
3828 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3829 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3831 if(s->codec_id == CODEC_ID_H264){
3832 for(i=16; i<16+8; i++){
3833 if(h->non_zero_count_cache[ scan8[i] ])
3834 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3835 else if(h->mb[i*16])
3836 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3839 for(i=16; i<16+8; i++){
3840 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3841 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3842 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3848 if(h->deblocking_filter) {
3850 //FIXME try deblocking one mb at a time?
3851 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3852 const int mb_y = s->mb_y - 1;
3853 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3854 const int mb_xy= mb_x + mb_y*s->mb_stride;
3855 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3856 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3857 if (!bottom) return;
3858 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3859 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3860 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3862 if(IS_INTRA(mb_type_top | mb_type_bottom))
3863 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3865 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3869 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3870 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3871 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
3872 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3875 tprintf("call mbaff filter_mb\n");
3876 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3877 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3878 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3880 tprintf("call filter_mb\n");
3881 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3882 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3883 filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3889 * fills the default_ref_list.
3891 static int fill_default_ref_list(H264Context *h){
3892 MpegEncContext * const s = &h->s;
3894 int smallest_poc_greater_than_current = -1;
3895 Picture sorted_short_ref[32];
3897 if(h->slice_type==B_TYPE){
3901 /* sort frame according to poc in B slice */
3902 for(out_i=0; out_i<h->short_ref_count; out_i++){
3904 int best_poc=INT_MAX;
3906 for(i=0; i<h->short_ref_count; i++){
3907 const int poc= h->short_ref[i]->poc;
3908 if(poc > limit && poc < best_poc){
3914 assert(best_i != INT_MIN);
3917 sorted_short_ref[out_i]= *h->short_ref[best_i];
3918 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3919 if (-1 == smallest_poc_greater_than_current) {
3920 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3921 smallest_poc_greater_than_current = out_i;
3927 if(s->picture_structure == PICT_FRAME){
3928 if(h->slice_type==B_TYPE){
3930 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3932 // find the largest poc
3933 for(list=0; list<2; list++){
3936 int step= list ? -1 : 1;
3938 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3939 while(j<0 || j>= h->short_ref_count){
3940 if(j != -99 && step == (list ? -1 : 1))
3943 j= smallest_poc_greater_than_current + (step>>1);
3945 if(sorted_short_ref[j].reference != 3) continue;
3946 h->default_ref_list[list][index ]= sorted_short_ref[j];
3947 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3950 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3951 if(h->long_ref[i] == NULL) continue;
3952 if(h->long_ref[i]->reference != 3) continue;
3954 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3955 h->default_ref_list[ list ][index++].pic_id= i;;
3958 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3959 // swap the two first elements of L1 when
3960 // L0 and L1 are identical
3961 Picture temp= h->default_ref_list[1][0];
3962 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3963 h->default_ref_list[1][1] = temp;
3966 if(index < h->ref_count[ list ])
3967 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3971 for(i=0; i<h->short_ref_count; i++){
3972 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3973 h->default_ref_list[0][index ]= *h->short_ref[i];
3974 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3976 for(i = 0; i < 16; i++){
3977 if(h->long_ref[i] == NULL) continue;
3978 if(h->long_ref[i]->reference != 3) continue;
3979 h->default_ref_list[0][index ]= *h->long_ref[i];
3980 h->default_ref_list[0][index++].pic_id= i;;
3982 if(index < h->ref_count[0])
3983 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3986 if(h->slice_type==B_TYPE){
3988 //FIXME second field balh
3992 for (i=0; i<h->ref_count[0]; i++) {
3993 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3995 if(h->slice_type==B_TYPE){
3996 for (i=0; i<h->ref_count[1]; i++) {
3997 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
4004 static void print_short_term(H264Context *h);
4005 static void print_long_term(H264Context *h);
4007 static int decode_ref_pic_list_reordering(H264Context *h){
4008 MpegEncContext * const s = &h->s;
4011 print_short_term(h);
4013 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
4015 for(list=0; list<2; list++){
4016 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
4018 if(get_bits1(&s->gb)){
4019 int pred= h->curr_pic_num;
4021 for(index=0; ; index++){
4022 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
4025 Picture *ref = NULL;
4027 if(reordering_of_pic_nums_idc==3)
4030 if(index >= h->ref_count[list]){
4031 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
4035 if(reordering_of_pic_nums_idc<3){
4036 if(reordering_of_pic_nums_idc<2){
4037 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
4039 if(abs_diff_pic_num >= h->max_pic_num){
4040 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
4044 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
4045 else pred+= abs_diff_pic_num;
4046 pred &= h->max_pic_num - 1;
4048 for(i= h->short_ref_count-1; i>=0; i--){
4049 ref = h->short_ref[i];
4050 assert(ref->reference == 3);
4051 assert(!ref->long_ref);
4052 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
4056 ref->pic_id= ref->frame_num;
4058 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
4059 ref = h->long_ref[pic_id];
4060 ref->pic_id= pic_id;
4061 assert(ref->reference == 3);
4062 assert(ref->long_ref);
4067 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
4068 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
4070 for(i=index; i+1<h->ref_count[list]; i++){
4071 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
4074 for(; i > index; i--){
4075 h->ref_list[list][i]= h->ref_list[list][i-1];
4077 h->ref_list[list][index]= *ref;
4080 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
4086 if(h->slice_type!=B_TYPE) break;
4088 for(list=0; list<2; list++){
4089 for(index= 0; index < h->ref_count[list]; index++){
4090 if(!h->ref_list[list][index].data[0])
4091 h->ref_list[list][index]= s->current_picture;
4093 if(h->slice_type!=B_TYPE) break;
4096 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
4097 direct_dist_scale_factor(h);
4098 direct_ref_list_init(h);
4102 static void fill_mbaff_ref_list(H264Context *h){
4104 for(list=0; list<2; list++){
4105 for(i=0; i<h->ref_count[list]; i++){
4106 Picture *frame = &h->ref_list[list][i];
4107 Picture *field = &h->ref_list[list][16+2*i];
4110 field[0].linesize[j] <<= 1;
4111 field[1] = field[0];
4113 field[1].data[j] += frame->linesize[j];
4115 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
4116 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
4118 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
4119 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
4123 for(j=0; j<h->ref_count[1]; j++){
4124 for(i=0; i<h->ref_count[0]; i++)
4125 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
4126 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
4127 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
4131 static int pred_weight_table(H264Context *h){
4132 MpegEncContext * const s = &h->s;
4134 int luma_def, chroma_def;
4137 h->use_weight_chroma= 0;
4138 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
4139 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
4140 luma_def = 1<<h->luma_log2_weight_denom;
4141 chroma_def = 1<<h->chroma_log2_weight_denom;
4143 for(list=0; list<2; list++){
4144 for(i=0; i<h->ref_count[list]; i++){
4145 int luma_weight_flag, chroma_weight_flag;
4147 luma_weight_flag= get_bits1(&s->gb);
4148 if(luma_weight_flag){
4149 h->luma_weight[list][i]= get_se_golomb(&s->gb);
4150 h->luma_offset[list][i]= get_se_golomb(&s->gb);
4151 if( h->luma_weight[list][i] != luma_def
4152 || h->luma_offset[list][i] != 0)
4155 h->luma_weight[list][i]= luma_def;
4156 h->luma_offset[list][i]= 0;
4159 chroma_weight_flag= get_bits1(&s->gb);
4160 if(chroma_weight_flag){
4163 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
4164 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
4165 if( h->chroma_weight[list][i][j] != chroma_def
4166 || h->chroma_offset[list][i][j] != 0)
4167 h->use_weight_chroma= 1;
4172 h->chroma_weight[list][i][j]= chroma_def;
4173 h->chroma_offset[list][i][j]= 0;
4177 if(h->slice_type != B_TYPE) break;
4179 h->use_weight= h->use_weight || h->use_weight_chroma;
4183 static void implicit_weight_table(H264Context *h){
4184 MpegEncContext * const s = &h->s;
4186 int cur_poc = s->current_picture_ptr->poc;
4188 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
4189 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
4191 h->use_weight_chroma= 0;
4196 h->use_weight_chroma= 2;
4197 h->luma_log2_weight_denom= 5;
4198 h->chroma_log2_weight_denom= 5;
4200 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
4201 int poc0 = h->ref_list[0][ref0].poc;
4202 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
4203 int poc1 = h->ref_list[1][ref1].poc;
4204 int td = clip(poc1 - poc0, -128, 127);
4206 int tb = clip(cur_poc - poc0, -128, 127);
4207 int tx = (16384 + (ABS(td) >> 1)) / td;
4208 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
4209 if(dist_scale_factor < -64 || dist_scale_factor > 128)
4210 h->implicit_weight[ref0][ref1] = 32;
4212 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
4214 h->implicit_weight[ref0][ref1] = 32;
4219 static inline void unreference_pic(H264Context *h, Picture *pic){
4222 if(pic == h->delayed_output_pic)
4225 for(i = 0; h->delayed_pic[i]; i++)
4226 if(pic == h->delayed_pic[i]){
4234 * instantaneous decoder refresh.
4236 static void idr(H264Context *h){
4239 for(i=0; i<16; i++){
4240 if (h->long_ref[i] != NULL) {
4241 unreference_pic(h, h->long_ref[i]);
4242 h->long_ref[i]= NULL;
4245 h->long_ref_count=0;
4247 for(i=0; i<h->short_ref_count; i++){
4248 unreference_pic(h, h->short_ref[i]);
4249 h->short_ref[i]= NULL;
4251 h->short_ref_count=0;
4254 /* forget old pics after a seek */
4255 static void flush_dpb(AVCodecContext *avctx){
4256 H264Context *h= avctx->priv_data;
4258 for(i=0; i<16; i++) {
4259 if(h->delayed_pic[i])
4260 h->delayed_pic[i]->reference= 0;
4261 h->delayed_pic[i]= NULL;
4263 if(h->delayed_output_pic)
4264 h->delayed_output_pic->reference= 0;
4265 h->delayed_output_pic= NULL;
4267 if(h->s.current_picture_ptr)
4268 h->s.current_picture_ptr->reference= 0;
4273 * @return the removed picture or NULL if an error occurs
4275 static Picture * remove_short(H264Context *h, int frame_num){
4276 MpegEncContext * const s = &h->s;
4279 if(s->avctx->debug&FF_DEBUG_MMCO)
4280 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
4282 for(i=0; i<h->short_ref_count; i++){
4283 Picture *pic= h->short_ref[i];
4284 if(s->avctx->debug&FF_DEBUG_MMCO)
4285 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
4286 if(pic->frame_num == frame_num){
4287 h->short_ref[i]= NULL;
4288 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
4289 h->short_ref_count--;
4298 * @return the removed picture or NULL if an error occurs
4300 static Picture * remove_long(H264Context *h, int i){
4303 pic= h->long_ref[i];
4304 h->long_ref[i]= NULL;
4305 if(pic) h->long_ref_count--;
4311 * print short term list
4313 static void print_short_term(H264Context *h) {
4315 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4316 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
4317 for(i=0; i<h->short_ref_count; i++){
4318 Picture *pic= h->short_ref[i];
4319 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4325 * print long term list
4327 static void print_long_term(H264Context *h) {
4329 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4330 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
4331 for(i = 0; i < 16; i++){
4332 Picture *pic= h->long_ref[i];
4334 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4341 * Executes the reference picture marking (memory management control operations).
4343 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
4344 MpegEncContext * const s = &h->s;
4346 int current_is_long=0;
4349 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
4350 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4352 for(i=0; i<mmco_count; i++){
4353 if(s->avctx->debug&FF_DEBUG_MMCO)
4354 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4356 switch(mmco[i].opcode){
4357 case MMCO_SHORT2UNUSED:
4358 pic= remove_short(h, mmco[i].short_frame_num);
4360 unreference_pic(h, pic);
4361 else if(s->avctx->debug&FF_DEBUG_MMCO)
4362 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4364 case MMCO_SHORT2LONG:
4365 pic= remove_long(h, mmco[i].long_index);
4366 if(pic) unreference_pic(h, pic);
4368 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4369 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4370 h->long_ref_count++;
4372 case MMCO_LONG2UNUSED:
4373 pic= remove_long(h, mmco[i].long_index);
4375 unreference_pic(h, pic);
4376 else if(s->avctx->debug&FF_DEBUG_MMCO)
4377 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4380 pic= remove_long(h, mmco[i].long_index);
4381 if(pic) unreference_pic(h, pic);
4383 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4384 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4385 h->long_ref_count++;
4389 case MMCO_SET_MAX_LONG:
4390 assert(mmco[i].long_index <= 16);
4391 // just remove the long term which index is greater than new max
4392 for(j = mmco[i].long_index; j<16; j++){
4393 pic = remove_long(h, j);
4394 if (pic) unreference_pic(h, pic);
4398 while(h->short_ref_count){
4399 pic= remove_short(h, h->short_ref[0]->frame_num);
4400 unreference_pic(h, pic);
4402 for(j = 0; j < 16; j++) {
4403 pic= remove_long(h, j);
4404 if(pic) unreference_pic(h, pic);
4411 if(!current_is_long){
4412 pic= remove_short(h, s->current_picture_ptr->frame_num);
4414 unreference_pic(h, pic);
4415 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4418 if(h->short_ref_count)
4419 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4421 h->short_ref[0]= s->current_picture_ptr;
4422 h->short_ref[0]->long_ref=0;
4423 h->short_ref_count++;
4426 print_short_term(h);
4431 static int decode_ref_pic_marking(H264Context *h){
4432 MpegEncContext * const s = &h->s;
4435 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4436 s->broken_link= get_bits1(&s->gb) -1;
4437 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4438 if(h->mmco[0].long_index == -1)
4441 h->mmco[0].opcode= MMCO_LONG;
4445 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4446 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4447 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4449 h->mmco[i].opcode= opcode;
4450 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4451 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4452 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4453 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4457 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4458 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4459 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4460 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4465 if(opcode > MMCO_LONG){
4466 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4469 if(opcode == MMCO_END)
4474 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4476 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4477 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4478 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4488 static int init_poc(H264Context *h){
4489 MpegEncContext * const s = &h->s;
4490 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4493 if(h->nal_unit_type == NAL_IDR_SLICE){
4494 h->frame_num_offset= 0;
4496 if(h->frame_num < h->prev_frame_num)
4497 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4499 h->frame_num_offset= h->prev_frame_num_offset;
4502 if(h->sps.poc_type==0){
4503 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4505 if(h->nal_unit_type == NAL_IDR_SLICE){
4510 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4511 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4512 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4513 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4515 h->poc_msb = h->prev_poc_msb;
4516 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4518 field_poc[1] = h->poc_msb + h->poc_lsb;
4519 if(s->picture_structure == PICT_FRAME)
4520 field_poc[1] += h->delta_poc_bottom;
4521 }else if(h->sps.poc_type==1){
4522 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4525 if(h->sps.poc_cycle_length != 0)
4526 abs_frame_num = h->frame_num_offset + h->frame_num;
4530 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4533 expected_delta_per_poc_cycle = 0;
4534 for(i=0; i < h->sps.poc_cycle_length; i++)
4535 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4537 if(abs_frame_num > 0){
4538 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4539 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4541 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4542 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4543 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4547 if(h->nal_ref_idc == 0)
4548 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4550 field_poc[0] = expectedpoc + h->delta_poc[0];
4551 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4553 if(s->picture_structure == PICT_FRAME)
4554 field_poc[1] += h->delta_poc[1];
4557 if(h->nal_unit_type == NAL_IDR_SLICE){
4560 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4561 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4567 if(s->picture_structure != PICT_BOTTOM_FIELD)
4568 s->current_picture_ptr->field_poc[0]= field_poc[0];
4569 if(s->picture_structure != PICT_TOP_FIELD)
4570 s->current_picture_ptr->field_poc[1]= field_poc[1];
4571 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4572 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4578 * decodes a slice header.
4579 * this will allso call MPV_common_init() and frame_start() as needed
4581 static int decode_slice_header(H264Context *h){
4582 MpegEncContext * const s = &h->s;
4583 int first_mb_in_slice, pps_id;
4584 int num_ref_idx_active_override_flag;
4585 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4587 int default_ref_list_done = 0;
4589 s->current_picture.reference= h->nal_ref_idc != 0;
4590 s->dropable= h->nal_ref_idc == 0;
4592 first_mb_in_slice= get_ue_golomb(&s->gb);
4594 slice_type= get_ue_golomb(&s->gb);
4596 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4601 h->slice_type_fixed=1;
4603 h->slice_type_fixed=0;
4605 slice_type= slice_type_map[ slice_type ];
4606 if (slice_type == I_TYPE
4607 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4608 default_ref_list_done = 1;
4610 h->slice_type= slice_type;
4612 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4614 pps_id= get_ue_golomb(&s->gb);
4616 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4619 h->pps= h->pps_buffer[pps_id];
4620 if(h->pps.slice_group_count == 0){
4621 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4625 h->sps= h->sps_buffer[ h->pps.sps_id ];
4626 if(h->sps.log2_max_frame_num == 0){
4627 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4631 if(h->dequant_coeff_pps != pps_id){
4632 h->dequant_coeff_pps = pps_id;
4633 init_dequant_tables(h);
4636 s->mb_width= h->sps.mb_width;
4637 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4639 h->b_stride= s->mb_width*4;
4640 h->b8_stride= s->mb_width*2;
4642 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4643 if(h->sps.frame_mbs_only_flag)
4644 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4646 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4648 if (s->context_initialized
4649 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4653 if (!s->context_initialized) {
4654 if (MPV_common_init(s) < 0)
4657 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4658 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4659 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4662 for(i=0; i<16; i++){
4663 #define T(x) (x>>2) | ((x<<2) & 0xF)
4664 h->zigzag_scan[i] = T(zigzag_scan[i]);
4665 h-> field_scan[i] = T( field_scan[i]);
4669 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4670 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4671 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4672 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4673 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4676 for(i=0; i<64; i++){
4677 #define T(x) (x>>3) | ((x&7)<<3)
4678 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4679 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4680 h->field_scan8x8[i] = T(field_scan8x8[i]);
4681 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4685 if(h->sps.transform_bypass){ //FIXME same ugly
4686 h->zigzag_scan_q0 = zigzag_scan;
4687 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4688 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4689 h->field_scan_q0 = field_scan;
4690 h->field_scan8x8_q0 = field_scan8x8;
4691 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4693 h->zigzag_scan_q0 = h->zigzag_scan;
4694 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4695 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4696 h->field_scan_q0 = h->field_scan;
4697 h->field_scan8x8_q0 = h->field_scan8x8;
4698 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4703 s->avctx->width = s->width;
4704 s->avctx->height = s->height;
4705 s->avctx->sample_aspect_ratio= h->sps.sar;
4706 if(!s->avctx->sample_aspect_ratio.den)
4707 s->avctx->sample_aspect_ratio.den = 1;
4709 if(h->sps.timing_info_present_flag){
4710 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4711 if(h->x264_build > 0 && h->x264_build < 44)
4712 s->avctx->time_base.den *= 2;
4713 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4714 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4718 if(h->slice_num == 0){
4719 if(frame_start(h) < 0)
4723 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4724 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4727 h->mb_aff_frame = 0;
4728 if(h->sps.frame_mbs_only_flag){
4729 s->picture_structure= PICT_FRAME;
4731 if(get_bits1(&s->gb)) { //field_pic_flag
4732 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4733 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4735 s->picture_structure= PICT_FRAME;
4736 h->mb_aff_frame = h->sps.mb_aff;
4740 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4741 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4742 if(s->mb_y >= s->mb_height){
4746 if(s->picture_structure==PICT_FRAME){
4747 h->curr_pic_num= h->frame_num;
4748 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4750 h->curr_pic_num= 2*h->frame_num;
4751 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4754 if(h->nal_unit_type == NAL_IDR_SLICE){
4755 get_ue_golomb(&s->gb); /* idr_pic_id */
4758 if(h->sps.poc_type==0){
4759 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4761 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4762 h->delta_poc_bottom= get_se_golomb(&s->gb);
4766 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4767 h->delta_poc[0]= get_se_golomb(&s->gb);
4769 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4770 h->delta_poc[1]= get_se_golomb(&s->gb);
4775 if(h->pps.redundant_pic_cnt_present){
4776 h->redundant_pic_count= get_ue_golomb(&s->gb);
4779 //set defaults, might be overriden a few line later
4780 h->ref_count[0]= h->pps.ref_count[0];
4781 h->ref_count[1]= h->pps.ref_count[1];
4783 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4784 if(h->slice_type == B_TYPE){
4785 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4786 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4787 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4789 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4791 if(num_ref_idx_active_override_flag){
4792 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4793 if(h->slice_type==B_TYPE)
4794 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4796 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4797 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4803 if(!default_ref_list_done){
4804 fill_default_ref_list(h);
4807 if(decode_ref_pic_list_reordering(h) < 0)
4810 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4811 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4812 pred_weight_table(h);
4813 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4814 implicit_weight_table(h);
4818 if(s->current_picture.reference)
4819 decode_ref_pic_marking(h);
4822 fill_mbaff_ref_list(h);
4824 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4825 h->cabac_init_idc = get_ue_golomb(&s->gb);
4827 h->last_qscale_diff = 0;
4828 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4829 if(s->qscale<0 || s->qscale>51){
4830 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4833 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4834 //FIXME qscale / qp ... stuff
4835 if(h->slice_type == SP_TYPE){
4836 get_bits1(&s->gb); /* sp_for_switch_flag */
4838 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4839 get_se_golomb(&s->gb); /* slice_qs_delta */
4842 h->deblocking_filter = 1;
4843 h->slice_alpha_c0_offset = 0;
4844 h->slice_beta_offset = 0;
4845 if( h->pps.deblocking_filter_parameters_present ) {
4846 h->deblocking_filter= get_ue_golomb(&s->gb);
4847 if(h->deblocking_filter < 2)
4848 h->deblocking_filter^= 1; // 1<->0
4850 if( h->deblocking_filter ) {
4851 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4852 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4855 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4856 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4857 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4858 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4859 h->deblocking_filter= 0;
4862 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4863 slice_group_change_cycle= get_bits(&s->gb, ?);
4868 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4869 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4871 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4872 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4874 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4876 av_get_pict_type_char(h->slice_type),
4877 pps_id, h->frame_num,
4878 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4879 h->ref_count[0], h->ref_count[1],
4881 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4883 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4893 static inline int get_level_prefix(GetBitContext *gb){
4897 OPEN_READER(re, gb);
4898 UPDATE_CACHE(re, gb);
4899 buf=GET_CACHE(re, gb);
4901 log= 32 - av_log2(buf);
4903 print_bin(buf>>(32-log), log);
4904 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4907 LAST_SKIP_BITS(re, gb, log);
4908 CLOSE_READER(re, gb);
4913 static inline int get_dct8x8_allowed(H264Context *h){
4916 if(!IS_SUB_8X8(h->sub_mb_type[i])
4917 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4924 * decodes a residual block.
4925 * @param n block index
4926 * @param scantable scantable
4927 * @param max_coeff number of coefficients in the block
4928 * @return <0 if an error occured
4930 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4931 MpegEncContext * const s = &h->s;
4932 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4934 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4936 //FIXME put trailing_onex into the context
4938 if(n == CHROMA_DC_BLOCK_INDEX){
4939 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4940 total_coeff= coeff_token>>2;
4942 if(n == LUMA_DC_BLOCK_INDEX){
4943 total_coeff= pred_non_zero_count(h, 0);
4944 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4945 total_coeff= coeff_token>>2;
4947 total_coeff= pred_non_zero_count(h, n);
4948 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4949 total_coeff= coeff_token>>2;
4950 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4954 //FIXME set last_non_zero?
4959 trailing_ones= coeff_token&3;
4960 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4961 assert(total_coeff<=16);
4963 for(i=0; i<trailing_ones; i++){
4964 level[i]= 1 - 2*get_bits1(gb);
4968 int level_code, mask;
4969 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4970 int prefix= get_level_prefix(gb);
4972 //first coefficient has suffix_length equal to 0 or 1
4973 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4975 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4977 level_code= (prefix<<suffix_length); //part
4978 }else if(prefix==14){
4980 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4982 level_code= prefix + get_bits(gb, 4); //part
4983 }else if(prefix==15){
4984 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4985 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4987 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4991 if(trailing_ones < 3) level_code += 2;
4996 mask= -(level_code&1);
4997 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5000 //remaining coefficients have suffix_length > 0
5001 for(;i<total_coeff;i++) {
5002 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
5003 prefix = get_level_prefix(gb);
5005 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
5006 }else if(prefix==15){
5007 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
5009 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
5012 mask= -(level_code&1);
5013 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5014 if(level_code > suffix_limit[suffix_length])
5019 if(total_coeff == max_coeff)
5022 if(n == CHROMA_DC_BLOCK_INDEX)
5023 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
5025 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
5028 coeff_num = zeros_left + total_coeff - 1;
5029 j = scantable[coeff_num];
5031 block[j] = level[0];
5032 for(i=1;i<total_coeff;i++) {
5035 else if(zeros_left < 7){
5036 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5038 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5040 zeros_left -= run_before;
5041 coeff_num -= 1 + run_before;
5042 j= scantable[ coeff_num ];
5047 block[j] = (level[0] * qmul[j] + 32)>>6;
5048 for(i=1;i<total_coeff;i++) {
5051 else if(zeros_left < 7){
5052 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5054 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5056 zeros_left -= run_before;
5057 coeff_num -= 1 + run_before;
5058 j= scantable[ coeff_num ];
5060 block[j]= (level[i] * qmul[j] + 32)>>6;
5065 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
5072 static void predict_field_decoding_flag(H264Context *h){
5073 MpegEncContext * const s = &h->s;
5074 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5075 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
5076 ? s->current_picture.mb_type[mb_xy-1]
5077 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
5078 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
5080 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
5084 * decodes a P_SKIP or B_SKIP macroblock
5086 static void decode_mb_skip(H264Context *h){
5087 MpegEncContext * const s = &h->s;
5088 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5091 memset(h->non_zero_count[mb_xy], 0, 16);
5092 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
5095 mb_type|= MB_TYPE_INTERLACED;
5097 if( h->slice_type == B_TYPE )
5099 // just for fill_caches. pred_direct_motion will set the real mb_type
5100 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
5102 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5103 pred_direct_motion(h, &mb_type);
5105 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5106 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5112 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
5114 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5115 pred_pskip_motion(h, &mx, &my);
5116 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
5117 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
5119 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5122 write_back_motion(h, mb_type);
5123 s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP;
5124 s->current_picture.qscale_table[mb_xy]= s->qscale;
5125 h->slice_table[ mb_xy ]= h->slice_num;
5126 h->prev_mb_skipped= 1;
5130 * decodes a macroblock
5131 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5133 static int decode_mb_cavlc(H264Context *h){
5134 MpegEncContext * const s = &h->s;
5135 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5136 int mb_type, partition_count, cbp;
5137 int dct8x8_allowed= h->pps.transform_8x8_mode;
5139 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
5141 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5142 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
5144 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
5145 if(s->mb_skip_run==-1)
5146 s->mb_skip_run= get_ue_golomb(&s->gb);
5148 if (s->mb_skip_run--) {
5149 if(FRAME_MBAFF && (s->mb_y&1) == 0){
5150 if(s->mb_skip_run==0)
5151 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5153 predict_field_decoding_flag(h);
5160 if( (s->mb_y&1) == 0 )
5161 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5163 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5165 h->prev_mb_skipped= 0;
5167 mb_type= get_ue_golomb(&s->gb);
5168 if(h->slice_type == B_TYPE){
5170 partition_count= b_mb_type_info[mb_type].partition_count;
5171 mb_type= b_mb_type_info[mb_type].type;
5174 goto decode_intra_mb;
5176 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
5178 partition_count= p_mb_type_info[mb_type].partition_count;
5179 mb_type= p_mb_type_info[mb_type].type;
5182 goto decode_intra_mb;
5185 assert(h->slice_type == I_TYPE);
5188 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
5192 cbp= i_mb_type_info[mb_type].cbp;
5193 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5194 mb_type= i_mb_type_info[mb_type].type;
5198 mb_type |= MB_TYPE_INTERLACED;
5200 h->slice_table[ mb_xy ]= h->slice_num;
5202 if(IS_INTRA_PCM(mb_type)){
5205 // we assume these blocks are very rare so we dont optimize it
5206 align_get_bits(&s->gb);
5208 // The pixels are stored in the same order as levels in h->mb array.
5209 for(y=0; y<16; y++){
5210 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5211 for(x=0; x<16; x++){
5212 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5213 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
5217 const int index= 256 + 4*(y&3) + 32*(y>>2);
5219 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5220 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5224 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5226 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5227 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5231 // In deblocking, the quantizer is 0
5232 s->current_picture.qscale_table[mb_xy]= 0;
5233 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5234 // All coeffs are present
5235 memset(h->non_zero_count[mb_xy], 16, 16);
5237 s->current_picture.mb_type[mb_xy]= mb_type;
5242 h->ref_count[0] <<= 1;
5243 h->ref_count[1] <<= 1;
5246 fill_caches(h, mb_type, 0);
5249 if(IS_INTRA(mb_type)){
5250 // init_top_left_availability(h);
5251 if(IS_INTRA4x4(mb_type)){
5254 if(dct8x8_allowed && get_bits1(&s->gb)){
5255 mb_type |= MB_TYPE_8x8DCT;
5259 // fill_intra4x4_pred_table(h);
5260 for(i=0; i<16; i+=di){
5261 int mode= pred_intra_mode(h, i);
5263 if(!get_bits1(&s->gb)){
5264 const int rem_mode= get_bits(&s->gb, 3);
5265 mode = rem_mode + (rem_mode >= mode);
5269 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5271 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
5273 write_back_intra_pred_mode(h);
5274 if( check_intra4x4_pred_mode(h) < 0)
5277 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
5278 if(h->intra16x16_pred_mode < 0)
5281 h->chroma_pred_mode= get_ue_golomb(&s->gb);
5283 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
5284 if(h->chroma_pred_mode < 0)
5286 }else if(partition_count==4){
5287 int i, j, sub_partition_count[4], list, ref[2][4];
5289 if(h->slice_type == B_TYPE){
5291 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5292 if(h->sub_mb_type[i] >=13){
5293 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5296 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5297 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5299 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5300 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5301 pred_direct_motion(h, &mb_type);
5302 h->ref_cache[0][scan8[4]] =
5303 h->ref_cache[1][scan8[4]] =
5304 h->ref_cache[0][scan8[12]] =
5305 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5308 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
5310 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5311 if(h->sub_mb_type[i] >=4){
5312 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5315 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5316 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5320 for(list=0; list<2; list++){
5321 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5322 if(ref_count == 0) continue;
5324 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5325 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5326 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
5335 dct8x8_allowed = get_dct8x8_allowed(h);
5337 for(list=0; list<2; list++){
5338 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5339 if(ref_count == 0) continue;
5342 if(IS_DIRECT(h->sub_mb_type[i])) {
5343 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
5346 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
5347 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5349 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5350 const int sub_mb_type= h->sub_mb_type[i];
5351 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5352 for(j=0; j<sub_partition_count[i]; j++){
5354 const int index= 4*i + block_width*j;
5355 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5356 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
5357 mx += get_se_golomb(&s->gb);
5358 my += get_se_golomb(&s->gb);
5359 tprintf("final mv:%d %d\n", mx, my);
5361 if(IS_SUB_8X8(sub_mb_type)){
5362 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5363 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5364 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5365 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5366 }else if(IS_SUB_8X4(sub_mb_type)){
5367 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5368 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5369 }else if(IS_SUB_4X8(sub_mb_type)){
5370 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5371 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5373 assert(IS_SUB_4X4(sub_mb_type));
5374 mv_cache[ 0 ][0]= mx;
5375 mv_cache[ 0 ][1]= my;
5379 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5385 }else if(IS_DIRECT(mb_type)){
5386 pred_direct_motion(h, &mb_type);
5387 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5389 int list, mx, my, i;
5390 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5391 if(IS_16X16(mb_type)){
5392 for(list=0; list<2; list++){
5393 if(h->ref_count[list]>0){
5394 if(IS_DIR(mb_type, 0, list)){
5395 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5396 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5398 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
5401 for(list=0; list<2; list++){
5402 if(IS_DIR(mb_type, 0, list)){
5403 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5404 mx += get_se_golomb(&s->gb);
5405 my += get_se_golomb(&s->gb);
5406 tprintf("final mv:%d %d\n", mx, my);
5408 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5410 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5413 else if(IS_16X8(mb_type)){
5414 for(list=0; list<2; list++){
5415 if(h->ref_count[list]>0){
5417 if(IS_DIR(mb_type, i, list)){
5418 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5419 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5421 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5425 for(list=0; list<2; list++){
5427 if(IS_DIR(mb_type, i, list)){
5428 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5429 mx += get_se_golomb(&s->gb);
5430 my += get_se_golomb(&s->gb);
5431 tprintf("final mv:%d %d\n", mx, my);
5433 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5435 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5439 assert(IS_8X16(mb_type));
5440 for(list=0; list<2; list++){
5441 if(h->ref_count[list]>0){
5443 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5444 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5445 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5447 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5451 for(list=0; list<2; list++){
5453 if(IS_DIR(mb_type, i, list)){
5454 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5455 mx += get_se_golomb(&s->gb);
5456 my += get_se_golomb(&s->gb);
5457 tprintf("final mv:%d %d\n", mx, my);
5459 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5461 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5467 if(IS_INTER(mb_type))
5468 write_back_motion(h, mb_type);
5470 if(!IS_INTRA16x16(mb_type)){
5471 cbp= get_ue_golomb(&s->gb);
5473 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5477 if(IS_INTRA4x4(mb_type))
5478 cbp= golomb_to_intra4x4_cbp[cbp];
5480 cbp= golomb_to_inter_cbp[cbp];
5483 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5484 if(get_bits1(&s->gb))
5485 mb_type |= MB_TYPE_8x8DCT;
5487 s->current_picture.mb_type[mb_xy]= mb_type;
5489 if(cbp || IS_INTRA16x16(mb_type)){
5490 int i8x8, i4x4, chroma_idx;
5491 int chroma_qp, dquant;
5492 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5493 const uint8_t *scan, *scan8x8, *dc_scan;
5495 // fill_non_zero_count_cache(h);
5497 if(IS_INTERLACED(mb_type)){
5498 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5499 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5500 dc_scan= luma_dc_field_scan;
5502 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5503 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5504 dc_scan= luma_dc_zigzag_scan;
5507 dquant= get_se_golomb(&s->gb);
5509 if( dquant > 25 || dquant < -26 ){
5510 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5514 s->qscale += dquant;
5515 if(((unsigned)s->qscale) > 51){
5516 if(s->qscale<0) s->qscale+= 52;
5517 else s->qscale-= 52;
5520 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5521 if(IS_INTRA16x16(mb_type)){
5522 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5523 return -1; //FIXME continue if partitioned and other return -1 too
5526 assert((cbp&15) == 0 || (cbp&15) == 15);
5529 for(i8x8=0; i8x8<4; i8x8++){
5530 for(i4x4=0; i4x4<4; i4x4++){
5531 const int index= i4x4 + 4*i8x8;
5532 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5538 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5541 for(i8x8=0; i8x8<4; i8x8++){
5542 if(cbp & (1<<i8x8)){
5543 if(IS_8x8DCT(mb_type)){
5544 DCTELEM *buf = &h->mb[64*i8x8];
5546 for(i4x4=0; i4x4<4; i4x4++){
5547 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5548 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5551 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5552 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5554 for(i4x4=0; i4x4<4; i4x4++){
5555 const int index= i4x4 + 4*i8x8;
5557 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5563 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5564 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5570 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5571 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5577 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5578 for(i4x4=0; i4x4<4; i4x4++){
5579 const int index= 16 + 4*chroma_idx + i4x4;
5580 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5586 uint8_t * const nnz= &h->non_zero_count_cache[0];
5587 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5588 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5591 uint8_t * const nnz= &h->non_zero_count_cache[0];
5592 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5593 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5594 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5596 s->current_picture.qscale_table[mb_xy]= s->qscale;
5597 write_back_non_zero_count(h);
5600 h->ref_count[0] >>= 1;
5601 h->ref_count[1] >>= 1;
5607 static int decode_cabac_field_decoding_flag(H264Context *h) {
5608 MpegEncContext * const s = &h->s;
5609 const int mb_x = s->mb_x;
5610 const int mb_y = s->mb_y & ~1;
5611 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5612 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5614 unsigned int ctx = 0;
5616 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5619 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5623 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5626 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5627 uint8_t *state= &h->cabac_state[ctx_base];
5631 MpegEncContext * const s = &h->s;
5632 const int mba_xy = h->left_mb_xy[0];
5633 const int mbb_xy = h->top_mb_xy;
5635 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5637 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5639 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5640 return 0; /* I4x4 */
5643 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5644 return 0; /* I4x4 */
5647 if( get_cabac_terminate( &h->cabac ) )
5648 return 25; /* PCM */
5650 mb_type = 1; /* I16x16 */
5651 mb_type += 12 * get_cabac( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5652 if( get_cabac( &h->cabac, &state[2] ) ) /* cbp_chroma */
5653 mb_type += 4 + 4 * get_cabac( &h->cabac, &state[2+intra_slice] );
5654 mb_type += 2 * get_cabac( &h->cabac, &state[3+intra_slice] );
5655 mb_type += 1 * get_cabac( &h->cabac, &state[3+2*intra_slice] );
5659 static int decode_cabac_mb_type( H264Context *h ) {
5660 MpegEncContext * const s = &h->s;
5662 if( h->slice_type == I_TYPE ) {
5663 return decode_cabac_intra_mb_type(h, 3, 1);
5664 } else if( h->slice_type == P_TYPE ) {
5665 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5667 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5668 /* P_L0_D16x16, P_8x8 */
5669 return 3 * get_cabac( &h->cabac, &h->cabac_state[16] );
5671 /* P_L0_D8x16, P_L0_D16x8 */
5672 return 2 - get_cabac( &h->cabac, &h->cabac_state[17] );
5675 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5677 } else if( h->slice_type == B_TYPE ) {
5678 const int mba_xy = h->left_mb_xy[0];
5679 const int mbb_xy = h->top_mb_xy;
5683 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5685 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5688 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5689 return 0; /* B_Direct_16x16 */
5691 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5692 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5695 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5696 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5697 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5698 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5700 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5701 else if( bits == 13 ) {
5702 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5703 } else if( bits == 14 )
5704 return 11; /* B_L1_L0_8x16 */
5705 else if( bits == 15 )
5706 return 22; /* B_8x8 */
5708 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5709 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5711 /* TODO SI/SP frames? */
5716 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5717 MpegEncContext * const s = &h->s;
5721 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5722 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5725 && h->slice_table[mba_xy] == h->slice_num
5726 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5727 mba_xy += s->mb_stride;
5729 mbb_xy = mb_xy - s->mb_stride;
5731 && h->slice_table[mbb_xy] == h->slice_num
5732 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5733 mbb_xy -= s->mb_stride;
5735 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5737 int mb_xy = mb_x + mb_y*s->mb_stride;
5739 mbb_xy = mb_xy - s->mb_stride;
5742 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5744 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5747 if( h->slice_type == B_TYPE )
5749 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5752 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5755 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5758 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5759 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5760 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5762 if( mode >= pred_mode )
5768 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5769 const int mba_xy = h->left_mb_xy[0];
5770 const int mbb_xy = h->top_mb_xy;
5774 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5775 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5778 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5781 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5784 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5786 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5792 static const uint8_t block_idx_x[16] = {
5793 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5795 static const uint8_t block_idx_y[16] = {
5796 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5798 static const uint8_t block_idx_xy[4][4] = {
5805 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5810 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5812 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5815 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5820 x = block_idx_x[4*i8x8];
5821 y = block_idx_y[4*i8x8];
5825 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5826 cbp_a = h->left_cbp;
5827 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5833 /* No need to test for skip as we put 0 for skip block */
5834 /* No need to test for IPCM as we put 1 for IPCM block */
5836 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5837 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5842 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5843 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5847 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5853 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5857 cbp_a = (h->left_cbp>>4)&0x03;
5858 cbp_b = (h-> top_cbp>>4)&0x03;
5861 if( cbp_a > 0 ) ctx++;
5862 if( cbp_b > 0 ) ctx += 2;
5863 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5867 if( cbp_a == 2 ) ctx++;
5868 if( cbp_b == 2 ) ctx += 2;
5869 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5871 static int decode_cabac_mb_dqp( H264Context *h) {
5872 MpegEncContext * const s = &h->s;
5878 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5880 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5882 if( h->last_qscale_diff != 0 )
5885 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5891 if(val > 102) //prevent infinite loop
5898 return -(val + 1)/2;
5900 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5901 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5903 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5905 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5909 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5911 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5912 return 0; /* B_Direct_8x8 */
5913 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5914 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5916 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5917 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5918 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5921 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5922 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5926 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5927 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5930 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5931 int refa = h->ref_cache[list][scan8[n] - 1];
5932 int refb = h->ref_cache[list][scan8[n] - 8];
5936 if( h->slice_type == B_TYPE) {
5937 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5939 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5948 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5958 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5959 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5960 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5961 int ctxbase = (l == 0) ? 40 : 47;
5966 else if( amvd > 32 )
5971 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5976 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5984 while( get_cabac_bypass( &h->cabac ) ) {
5989 if( get_cabac_bypass( &h->cabac ) )
5993 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
5997 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
6002 nza = h->left_cbp&0x100;
6003 nzb = h-> top_cbp&0x100;
6004 } else if( cat == 1 || cat == 2 ) {
6005 nza = h->non_zero_count_cache[scan8[idx] - 1];
6006 nzb = h->non_zero_count_cache[scan8[idx] - 8];
6007 } else if( cat == 3 ) {
6008 nza = (h->left_cbp>>(6+idx))&0x01;
6009 nzb = (h-> top_cbp>>(6+idx))&0x01;
6012 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
6013 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
6022 return ctx + 4 * cat;
6025 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
6026 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
6027 static const int significant_coeff_flag_offset[2][6] = {
6028 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
6029 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
6031 static const int last_coeff_flag_offset[2][6] = {
6032 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
6033 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
6035 static const int coeff_abs_level_m1_offset[6] = {
6036 227+0, 227+10, 227+20, 227+30, 227+39, 426
6038 static const int significant_coeff_flag_offset_8x8[2][63] = {
6039 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
6040 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
6041 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
6042 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
6043 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
6044 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
6045 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
6046 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
6048 static const int last_coeff_flag_offset_8x8[63] = {
6049 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6050 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
6051 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
6052 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
6058 int coeff_count = 0;
6061 int abslevelgt1 = 0;
6063 uint8_t *significant_coeff_ctx_base;
6064 uint8_t *last_coeff_ctx_base;
6065 uint8_t *abs_level_m1_ctx_base;
6067 /* cat: 0-> DC 16x16 n = 0
6068 * 1-> AC 16x16 n = luma4x4idx
6069 * 2-> Luma4x4 n = luma4x4idx
6070 * 3-> DC Chroma n = iCbCr
6071 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
6072 * 5-> Luma8x8 n = 4 * luma8x8idx
6075 /* read coded block flag */
6077 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
6078 if( cat == 1 || cat == 2 )
6079 h->non_zero_count_cache[scan8[n]] = 0;
6081 h->non_zero_count_cache[scan8[16+n]] = 0;
6087 significant_coeff_ctx_base = h->cabac_state
6088 + significant_coeff_flag_offset[MB_FIELD][cat];
6089 last_coeff_ctx_base = h->cabac_state
6090 + last_coeff_flag_offset[MB_FIELD][cat];
6091 abs_level_m1_ctx_base = h->cabac_state
6092 + coeff_abs_level_m1_offset[cat];
6095 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
6096 for(last= 0; last < coefs; last++) { \
6097 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
6098 if( get_cabac( &h->cabac, sig_ctx )) { \
6099 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
6100 index[coeff_count++] = last; \
6101 if( get_cabac( &h->cabac, last_ctx ) ) { \
6107 const int *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
6108 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
6110 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
6112 if( last == max_coeff -1 ) {
6113 index[coeff_count++] = last;
6115 assert(coeff_count > 0);
6118 h->cbp_table[mb_xy] |= 0x100;
6119 else if( cat == 1 || cat == 2 )
6120 h->non_zero_count_cache[scan8[n]] = coeff_count;
6122 h->cbp_table[mb_xy] |= 0x40 << n;
6124 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
6127 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
6130 for( i = coeff_count - 1; i >= 0; i-- ) {
6131 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
6132 int j= scantable[index[i]];
6134 if( get_cabac( &h->cabac, ctx ) == 0 ) {
6136 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
6139 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6;
6140 else block[j] = ( qmul[j] + 32) >> 6;
6146 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
6147 while( coeff_abs < 15 && get_cabac( &h->cabac, ctx ) ) {
6151 if( coeff_abs >= 15 ) {
6153 while( get_cabac_bypass( &h->cabac ) ) {
6154 coeff_abs += 1 << j;
6159 if( get_cabac_bypass( &h->cabac ) )
6160 coeff_abs += 1 << j ;
6165 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
6166 else block[j] = coeff_abs;
6168 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
6169 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
6178 static void inline compute_mb_neighbors(H264Context *h)
6180 MpegEncContext * const s = &h->s;
6181 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
6182 h->top_mb_xy = mb_xy - s->mb_stride;
6183 h->left_mb_xy[0] = mb_xy - 1;
6185 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
6186 const int top_pair_xy = pair_xy - s->mb_stride;
6187 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
6188 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
6189 const int curr_mb_frame_flag = !MB_FIELD;
6190 const int bottom = (s->mb_y & 1);
6192 ? !curr_mb_frame_flag // bottom macroblock
6193 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
6195 h->top_mb_xy -= s->mb_stride;
6197 if (left_mb_frame_flag != curr_mb_frame_flag) {
6198 h->left_mb_xy[0] = pair_xy - 1;
6205 * decodes a macroblock
6206 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
6208 static int decode_mb_cabac(H264Context *h) {
6209 MpegEncContext * const s = &h->s;
6210 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
6211 int mb_type, partition_count, cbp = 0;
6212 int dct8x8_allowed= h->pps.transform_8x8_mode;
6214 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
6216 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
6217 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
6219 /* a skipped mb needs the aff flag from the following mb */
6220 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
6221 predict_field_decoding_flag(h);
6222 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
6223 skip = h->next_mb_skipped;
6225 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
6226 /* read skip flags */
6228 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
6229 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
6230 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
6231 if(h->next_mb_skipped)
6232 predict_field_decoding_flag(h);
6234 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6239 h->cbp_table[mb_xy] = 0;
6240 h->chroma_pred_mode_table[mb_xy] = 0;
6241 h->last_qscale_diff = 0;
6248 if( (s->mb_y&1) == 0 )
6250 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6252 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
6254 h->prev_mb_skipped = 0;
6256 compute_mb_neighbors(h);
6257 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
6258 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
6262 if( h->slice_type == B_TYPE ) {
6264 partition_count= b_mb_type_info[mb_type].partition_count;
6265 mb_type= b_mb_type_info[mb_type].type;
6268 goto decode_intra_mb;
6270 } else if( h->slice_type == P_TYPE ) {
6272 partition_count= p_mb_type_info[mb_type].partition_count;
6273 mb_type= p_mb_type_info[mb_type].type;
6276 goto decode_intra_mb;
6279 assert(h->slice_type == I_TYPE);
6281 partition_count = 0;
6282 cbp= i_mb_type_info[mb_type].cbp;
6283 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
6284 mb_type= i_mb_type_info[mb_type].type;
6287 mb_type |= MB_TYPE_INTERLACED;
6289 h->slice_table[ mb_xy ]= h->slice_num;
6291 if(IS_INTRA_PCM(mb_type)) {
6295 // We assume these blocks are very rare so we dont optimize it.
6296 // FIXME The two following lines get the bitstream position in the cabac
6297 // decode, I think it should be done by a function in cabac.h (or cabac.c).
6298 ptr= h->cabac.bytestream;
6299 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
6301 // The pixels are stored in the same order as levels in h->mb array.
6302 for(y=0; y<16; y++){
6303 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
6304 for(x=0; x<16; x++){
6305 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
6306 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
6310 const int index= 256 + 4*(y&3) + 32*(y>>2);
6312 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
6313 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6317 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
6319 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
6320 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6324 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6326 // All blocks are present
6327 h->cbp_table[mb_xy] = 0x1ef;
6328 h->chroma_pred_mode_table[mb_xy] = 0;
6329 // In deblocking, the quantizer is 0
6330 s->current_picture.qscale_table[mb_xy]= 0;
6331 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
6332 // All coeffs are present
6333 memset(h->non_zero_count[mb_xy], 16, 16);
6334 s->current_picture.mb_type[mb_xy]= mb_type;
6339 h->ref_count[0] <<= 1;
6340 h->ref_count[1] <<= 1;
6343 fill_caches(h, mb_type, 0);
6345 if( IS_INTRA( mb_type ) ) {
6347 if( IS_INTRA4x4( mb_type ) ) {
6348 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6349 mb_type |= MB_TYPE_8x8DCT;
6350 for( i = 0; i < 16; i+=4 ) {
6351 int pred = pred_intra_mode( h, i );
6352 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6353 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6356 for( i = 0; i < 16; i++ ) {
6357 int pred = pred_intra_mode( h, i );
6358 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6360 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6363 write_back_intra_pred_mode(h);
6364 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6366 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6367 if( h->intra16x16_pred_mode < 0 ) return -1;
6369 h->chroma_pred_mode_table[mb_xy] =
6370 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6372 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
6373 if( h->chroma_pred_mode < 0 ) return -1;
6374 } else if( partition_count == 4 ) {
6375 int i, j, sub_partition_count[4], list, ref[2][4];
6377 if( h->slice_type == B_TYPE ) {
6378 for( i = 0; i < 4; i++ ) {
6379 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6380 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6381 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6383 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
6384 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
6385 pred_direct_motion(h, &mb_type);
6386 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6387 for( i = 0; i < 4; i++ )
6388 if( IS_DIRECT(h->sub_mb_type[i]) )
6389 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6393 for( i = 0; i < 4; i++ ) {
6394 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6395 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6396 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6400 for( list = 0; list < 2; list++ ) {
6401 if( h->ref_count[list] > 0 ) {
6402 for( i = 0; i < 4; i++ ) {
6403 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6404 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6405 if( h->ref_count[list] > 1 )
6406 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6412 h->ref_cache[list][ scan8[4*i]+1 ]=
6413 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6419 dct8x8_allowed = get_dct8x8_allowed(h);
6421 for(list=0; list<2; list++){
6423 if(IS_DIRECT(h->sub_mb_type[i])){
6424 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6427 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6429 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6430 const int sub_mb_type= h->sub_mb_type[i];
6431 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6432 for(j=0; j<sub_partition_count[i]; j++){
6435 const int index= 4*i + block_width*j;
6436 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6437 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6438 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6440 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6441 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6442 tprintf("final mv:%d %d\n", mx, my);
6444 if(IS_SUB_8X8(sub_mb_type)){
6445 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
6446 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6447 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
6448 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6450 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
6451 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6452 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
6453 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6454 }else if(IS_SUB_8X4(sub_mb_type)){
6455 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
6456 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
6458 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
6459 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
6460 }else if(IS_SUB_4X8(sub_mb_type)){
6461 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
6462 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
6464 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
6465 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
6467 assert(IS_SUB_4X4(sub_mb_type));
6468 mv_cache[ 0 ][0]= mx;
6469 mv_cache[ 0 ][1]= my;
6471 mvd_cache[ 0 ][0]= mx - mpx;
6472 mvd_cache[ 0 ][1]= my - mpy;
6476 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6477 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6478 p[0] = p[1] = p[8] = p[9] = 0;
6479 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6483 } else if( IS_DIRECT(mb_type) ) {
6484 pred_direct_motion(h, &mb_type);
6485 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6486 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6487 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6489 int list, mx, my, i, mpx, mpy;
6490 if(IS_16X16(mb_type)){
6491 for(list=0; list<2; list++){
6492 if(IS_DIR(mb_type, 0, list)){
6493 if(h->ref_count[list] > 0 ){
6494 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6495 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6498 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6500 for(list=0; list<2; list++){
6501 if(IS_DIR(mb_type, 0, list)){
6502 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6504 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6505 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6506 tprintf("final mv:%d %d\n", mx, my);
6508 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6509 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6511 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6514 else if(IS_16X8(mb_type)){
6515 for(list=0; list<2; list++){
6516 if(h->ref_count[list]>0){
6518 if(IS_DIR(mb_type, i, list)){
6519 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6520 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6522 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6526 for(list=0; list<2; list++){
6528 if(IS_DIR(mb_type, i, list)){
6529 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6530 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6531 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6532 tprintf("final mv:%d %d\n", mx, my);
6534 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6535 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6537 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6538 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6543 assert(IS_8X16(mb_type));
6544 for(list=0; list<2; list++){
6545 if(h->ref_count[list]>0){
6547 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6548 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6549 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6551 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6555 for(list=0; list<2; list++){
6557 if(IS_DIR(mb_type, i, list)){
6558 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6559 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6560 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6562 tprintf("final mv:%d %d\n", mx, my);
6563 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6564 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6566 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6567 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6574 if( IS_INTER( mb_type ) ) {
6575 h->chroma_pred_mode_table[mb_xy] = 0;
6576 write_back_motion( h, mb_type );
6579 if( !IS_INTRA16x16( mb_type ) ) {
6580 cbp = decode_cabac_mb_cbp_luma( h );
6581 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6584 h->cbp_table[mb_xy] = cbp;
6586 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6587 if( decode_cabac_mb_transform_size( h ) )
6588 mb_type |= MB_TYPE_8x8DCT;
6590 s->current_picture.mb_type[mb_xy]= mb_type;
6592 if( cbp || IS_INTRA16x16( mb_type ) ) {
6593 const uint8_t *scan, *scan8x8, *dc_scan;
6596 if(IS_INTERLACED(mb_type)){
6597 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6598 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6599 dc_scan= luma_dc_field_scan;
6601 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6602 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6603 dc_scan= luma_dc_zigzag_scan;
6606 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6607 if( dqp == INT_MIN ){
6608 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6612 if(((unsigned)s->qscale) > 51){
6613 if(s->qscale<0) s->qscale+= 52;
6614 else s->qscale-= 52;
6616 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6618 if( IS_INTRA16x16( mb_type ) ) {
6620 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6621 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6624 for( i = 0; i < 16; i++ ) {
6625 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6626 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6630 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6634 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6635 if( cbp & (1<<i8x8) ) {
6636 if( IS_8x8DCT(mb_type) ) {
6637 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6638 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6641 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6642 const int index = 4*i8x8 + i4x4;
6643 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6644 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6648 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6649 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6656 for( c = 0; c < 2; c++ ) {
6657 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6658 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6665 for( c = 0; c < 2; c++ ) {
6666 for( i = 0; i < 4; i++ ) {
6667 const int index = 16 + 4 * c + i;
6668 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6669 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6674 uint8_t * const nnz= &h->non_zero_count_cache[0];
6675 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6676 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6679 uint8_t * const nnz= &h->non_zero_count_cache[0];
6680 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6681 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6682 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6683 h->last_qscale_diff = 0;
6686 s->current_picture.qscale_table[mb_xy]= s->qscale;
6687 write_back_non_zero_count(h);
6690 h->ref_count[0] >>= 1;
6691 h->ref_count[1] >>= 1;
6698 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6700 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6701 const int alpha = alpha_table[index_a];
6702 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6707 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6708 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6710 /* 16px edge length, because bS=4 is triggered by being at
6711 * the edge of an intra MB, so all 4 bS are the same */
6712 for( d = 0; d < 16; d++ ) {
6713 const int p0 = pix[-1];
6714 const int p1 = pix[-2];
6715 const int p2 = pix[-3];
6717 const int q0 = pix[0];
6718 const int q1 = pix[1];
6719 const int q2 = pix[2];
6721 if( ABS( p0 - q0 ) < alpha &&
6722 ABS( p1 - p0 ) < beta &&
6723 ABS( q1 - q0 ) < beta ) {
6725 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6726 if( ABS( p2 - p0 ) < beta)
6728 const int p3 = pix[-4];
6730 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6731 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6732 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6735 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6737 if( ABS( q2 - q0 ) < beta)
6739 const int q3 = pix[3];
6741 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6742 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6743 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6746 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6750 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6751 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6753 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6759 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6761 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6762 const int alpha = alpha_table[index_a];
6763 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6768 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6769 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6771 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6775 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6777 for( i = 0; i < 16; i++, pix += stride) {
6783 int bS_index = (i >> 1);
6786 bS_index |= (i & 1);
6789 if( bS[bS_index] == 0 ) {
6793 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6794 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6795 alpha = alpha_table[index_a];
6796 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6798 if( bS[bS_index] < 4 ) {
6799 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6800 const int p0 = pix[-1];
6801 const int p1 = pix[-2];
6802 const int p2 = pix[-3];
6803 const int q0 = pix[0];
6804 const int q1 = pix[1];
6805 const int q2 = pix[2];
6807 if( ABS( p0 - q0 ) < alpha &&
6808 ABS( p1 - p0 ) < beta &&
6809 ABS( q1 - q0 ) < beta ) {
6813 if( ABS( p2 - p0 ) < beta ) {
6814 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6817 if( ABS( q2 - q0 ) < beta ) {
6818 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6822 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6823 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6824 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6825 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6828 const int p0 = pix[-1];
6829 const int p1 = pix[-2];
6830 const int p2 = pix[-3];
6832 const int q0 = pix[0];
6833 const int q1 = pix[1];
6834 const int q2 = pix[2];
6836 if( ABS( p0 - q0 ) < alpha &&
6837 ABS( p1 - p0 ) < beta &&
6838 ABS( q1 - q0 ) < beta ) {
6840 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6841 if( ABS( p2 - p0 ) < beta)
6843 const int p3 = pix[-4];
6845 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6846 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6847 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6850 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6852 if( ABS( q2 - q0 ) < beta)
6854 const int q3 = pix[3];
6856 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6857 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6858 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6861 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6865 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6866 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6868 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6873 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6875 for( i = 0; i < 8; i++, pix += stride) {
6883 if( bS[bS_index] == 0 ) {
6887 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6888 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6889 alpha = alpha_table[index_a];
6890 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6892 if( bS[bS_index] < 4 ) {
6893 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6894 const int p0 = pix[-1];
6895 const int p1 = pix[-2];
6896 const int q0 = pix[0];
6897 const int q1 = pix[1];
6899 if( ABS( p0 - q0 ) < alpha &&
6900 ABS( p1 - p0 ) < beta &&
6901 ABS( q1 - q0 ) < beta ) {
6902 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6904 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6905 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6906 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6909 const int p0 = pix[-1];
6910 const int p1 = pix[-2];
6911 const int q0 = pix[0];
6912 const int q1 = pix[1];
6914 if( ABS( p0 - q0 ) < alpha &&
6915 ABS( p1 - p0 ) < beta &&
6916 ABS( q1 - q0 ) < beta ) {
6918 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6919 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6920 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6926 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6928 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6929 const int alpha = alpha_table[index_a];
6930 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6931 const int pix_next = stride;
6936 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6937 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6939 /* 16px edge length, see filter_mb_edgev */
6940 for( d = 0; d < 16; d++ ) {
6941 const int p0 = pix[-1*pix_next];
6942 const int p1 = pix[-2*pix_next];
6943 const int p2 = pix[-3*pix_next];
6944 const int q0 = pix[0];
6945 const int q1 = pix[1*pix_next];
6946 const int q2 = pix[2*pix_next];
6948 if( ABS( p0 - q0 ) < alpha &&
6949 ABS( p1 - p0 ) < beta &&
6950 ABS( q1 - q0 ) < beta ) {
6952 const int p3 = pix[-4*pix_next];
6953 const int q3 = pix[ 3*pix_next];
6955 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6956 if( ABS( p2 - p0 ) < beta) {
6958 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6959 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6960 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6963 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6965 if( ABS( q2 - q0 ) < beta) {
6967 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6968 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6969 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6972 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6976 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6977 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6979 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6986 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6988 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6989 const int alpha = alpha_table[index_a];
6990 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6995 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6996 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6998 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
7002 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7003 MpegEncContext * const s = &h->s;
7004 const int mb_xy= mb_x + mb_y*s->mb_stride;
7005 const int mb_type = s->current_picture.mb_type[mb_xy];
7006 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
7007 int first_vertical_edge_done = 0;
7009 /* FIXME: A given frame may occupy more than one position in
7010 * the reference list. So ref2frm should be populated with
7011 * frame numbers, not indices. */
7012 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
7013 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7015 //for sufficiently low qp, filtering wouldn't do anything
7016 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
7018 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
7019 int qp = s->current_picture.qscale_table[mb_xy];
7021 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
7022 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
7028 // left mb is in picture
7029 && h->slice_table[mb_xy-1] != 255
7030 // and current and left pair do not have the same interlaced type
7031 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
7032 // and left mb is in the same slice if deblocking_filter == 2
7033 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
7034 /* First vertical edge is different in MBAFF frames
7035 * There are 8 different bS to compute and 2 different Qp
7037 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
7038 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
7042 int mb_qp, mbn0_qp, mbn1_qp;
7044 first_vertical_edge_done = 1;
7046 if( IS_INTRA(mb_type) )
7047 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
7049 for( i = 0; i < 8; i++ ) {
7050 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
7052 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
7054 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
7055 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
7056 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
7063 mb_qp = s->current_picture.qscale_table[mb_xy];
7064 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
7065 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
7066 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
7067 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7068 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
7069 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
7070 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7071 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
7074 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
7075 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7076 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
7077 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
7078 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
7080 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
7081 for( dir = 0; dir < 2; dir++ )
7084 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
7085 const int mbm_type = s->current_picture.mb_type[mbm_xy];
7086 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
7088 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
7089 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
7090 // how often to recheck mv-based bS when iterating between edges
7091 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
7092 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
7093 // how often to recheck mv-based bS when iterating along each edge
7094 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
7096 if (first_vertical_edge_done) {
7098 first_vertical_edge_done = 0;
7101 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
7104 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
7105 && !IS_INTERLACED(mb_type)
7106 && IS_INTERLACED(mbm_type)
7108 // This is a special case in the norm where the filtering must
7109 // be done twice (one each of the field) even if we are in a
7110 // frame macroblock.
7112 static const int nnz_idx[4] = {4,5,6,3};
7113 unsigned int tmp_linesize = 2 * linesize;
7114 unsigned int tmp_uvlinesize = 2 * uvlinesize;
7115 int mbn_xy = mb_xy - 2 * s->mb_stride;
7120 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
7121 if( IS_INTRA(mb_type) ||
7122 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
7123 bS[0] = bS[1] = bS[2] = bS[3] = 3;
7125 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
7126 for( i = 0; i < 4; i++ ) {
7127 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
7128 mbn_nnz[nnz_idx[i]] != 0 )
7134 // Do not use s->qscale as luma quantizer because it has not the same
7135 // value in IPCM macroblocks.
7136 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7137 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
7138 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7139 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
7140 chroma_qp = ( h->chroma_qp +
7141 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7142 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7143 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7150 for( edge = start; edge < edges; edge++ ) {
7151 /* mbn_xy: neighbor macroblock */
7152 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
7153 const int mbn_type = s->current_picture.mb_type[mbn_xy];
7157 if( (edge&1) && IS_8x8DCT(mb_type) )
7160 if( IS_INTRA(mb_type) ||
7161 IS_INTRA(mbn_type) ) {
7164 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
7165 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
7174 bS[0] = bS[1] = bS[2] = bS[3] = value;
7179 if( edge & mask_edge ) {
7180 bS[0] = bS[1] = bS[2] = bS[3] = 0;
7183 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
7184 bS[0] = bS[1] = bS[2] = bS[3] = 1;
7187 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
7188 int b_idx= 8 + 4 + edge * (dir ? 8:1);
7189 int bn_idx= b_idx - (dir ? 8:1);
7191 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
7192 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7193 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7194 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
7196 bS[0] = bS[1] = bS[2] = bS[3] = v;
7202 for( i = 0; i < 4; i++ ) {
7203 int x = dir == 0 ? edge : i;
7204 int y = dir == 0 ? i : edge;
7205 int b_idx= 8 + 4 + x + 8*y;
7206 int bn_idx= b_idx - (dir ? 8:1);
7208 if( h->non_zero_count_cache[b_idx] != 0 ||
7209 h->non_zero_count_cache[bn_idx] != 0 ) {
7215 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
7216 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7217 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7218 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7226 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7231 // Do not use s->qscale as luma quantizer because it has not the same
7232 // value in IPCM macroblocks.
7233 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7234 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7235 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7236 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7238 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7239 if( (edge&1) == 0 ) {
7240 int chroma_qp = ( h->chroma_qp +
7241 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7242 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
7243 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
7246 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7247 if( (edge&1) == 0 ) {
7248 int chroma_qp = ( h->chroma_qp +
7249 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7250 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7251 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7258 static int decode_slice(H264Context *h){
7259 MpegEncContext * const s = &h->s;
7260 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7264 if( h->pps.cabac ) {
7268 align_get_bits( &s->gb );
7271 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
7272 ff_init_cabac_decoder( &h->cabac,
7273 s->gb.buffer + get_bits_count(&s->gb)/8,
7274 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7275 /* calculate pre-state */
7276 for( i= 0; i < 460; i++ ) {
7278 if( h->slice_type == I_TYPE )
7279 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7281 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7284 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
7286 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7290 int ret = decode_mb_cabac(h);
7293 if(ret>=0) hl_decode_mb(h);
7295 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7298 if(ret>=0) ret = decode_mb_cabac(h);
7300 if(ret>=0) hl_decode_mb(h);
7303 eos = get_cabac_terminate( &h->cabac );
7305 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
7306 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7307 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7311 if( ++s->mb_x >= s->mb_width ) {
7313 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7320 if( eos || s->mb_y >= s->mb_height ) {
7321 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7322 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7329 int ret = decode_mb_cavlc(h);
7331 if(ret>=0) hl_decode_mb(h);
7333 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7335 ret = decode_mb_cavlc(h);
7337 if(ret>=0) hl_decode_mb(h);
7342 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7343 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7348 if(++s->mb_x >= s->mb_width){
7350 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7355 if(s->mb_y >= s->mb_height){
7356 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7358 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7359 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7363 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7370 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7371 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7372 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7373 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7377 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7386 for(;s->mb_y < s->mb_height; s->mb_y++){
7387 for(;s->mb_x < s->mb_width; s->mb_x++){
7388 int ret= decode_mb(h);
7393 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7394 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7399 if(++s->mb_x >= s->mb_width){
7401 if(++s->mb_y >= s->mb_height){
7402 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7403 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7407 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7414 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7415 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7416 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7420 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7427 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7430 return -1; //not reached
7433 static int decode_unregistered_user_data(H264Context *h, int size){
7434 MpegEncContext * const s = &h->s;
7435 uint8_t user_data[16+256];
7441 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7442 user_data[i]= get_bits(&s->gb, 8);
7446 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7447 if(e==1 && build>=0)
7448 h->x264_build= build;
7450 if(s->avctx->debug & FF_DEBUG_BUGS)
7451 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7454 skip_bits(&s->gb, 8);
7459 static int decode_sei(H264Context *h){
7460 MpegEncContext * const s = &h->s;
7462 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7467 type+= show_bits(&s->gb, 8);
7468 }while(get_bits(&s->gb, 8) == 255);
7472 size+= show_bits(&s->gb, 8);
7473 }while(get_bits(&s->gb, 8) == 255);
7477 if(decode_unregistered_user_data(h, size) < 0)
7481 skip_bits(&s->gb, 8*size);
7484 //FIXME check bits here
7485 align_get_bits(&s->gb);
7491 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7492 MpegEncContext * const s = &h->s;
7494 cpb_count = get_ue_golomb(&s->gb) + 1;
7495 get_bits(&s->gb, 4); /* bit_rate_scale */
7496 get_bits(&s->gb, 4); /* cpb_size_scale */
7497 for(i=0; i<cpb_count; i++){
7498 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7499 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7500 get_bits1(&s->gb); /* cbr_flag */
7502 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7503 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7504 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7505 get_bits(&s->gb, 5); /* time_offset_length */
7508 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7509 MpegEncContext * const s = &h->s;
7510 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7511 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7513 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7515 if( aspect_ratio_info_present_flag ) {
7516 aspect_ratio_idc= get_bits(&s->gb, 8);
7517 if( aspect_ratio_idc == EXTENDED_SAR ) {
7518 sps->sar.num= get_bits(&s->gb, 16);
7519 sps->sar.den= get_bits(&s->gb, 16);
7520 }else if(aspect_ratio_idc < 14){
7521 sps->sar= pixel_aspect[aspect_ratio_idc];
7523 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7530 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7532 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7533 get_bits1(&s->gb); /* overscan_appropriate_flag */
7536 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7537 get_bits(&s->gb, 3); /* video_format */
7538 get_bits1(&s->gb); /* video_full_range_flag */
7539 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7540 get_bits(&s->gb, 8); /* colour_primaries */
7541 get_bits(&s->gb, 8); /* transfer_characteristics */
7542 get_bits(&s->gb, 8); /* matrix_coefficients */
7546 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7547 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7548 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7551 sps->timing_info_present_flag = get_bits1(&s->gb);
7552 if(sps->timing_info_present_flag){
7553 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7554 sps->time_scale = get_bits_long(&s->gb, 32);
7555 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7558 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7559 if(nal_hrd_parameters_present_flag)
7560 decode_hrd_parameters(h, sps);
7561 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7562 if(vcl_hrd_parameters_present_flag)
7563 decode_hrd_parameters(h, sps);
7564 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7565 get_bits1(&s->gb); /* low_delay_hrd_flag */
7566 get_bits1(&s->gb); /* pic_struct_present_flag */
7568 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7569 if(sps->bitstream_restriction_flag){
7570 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7571 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7572 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7573 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7574 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7575 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7576 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7582 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7583 const uint8_t *jvt_list, const uint8_t *fallback_list){
7584 MpegEncContext * const s = &h->s;
7585 int i, last = 8, next = 8;
7586 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7587 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7588 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7590 for(i=0;i<size;i++){
7592 next = (last + get_se_golomb(&s->gb)) & 0xff;
7593 if(!i && !next){ /* matrix not written, we use the preset one */
7594 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7597 last = factors[scan[i]] = next ? next : last;
7601 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7602 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7603 MpegEncContext * const s = &h->s;
7604 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7605 const uint8_t *fallback[4] = {
7606 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7607 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7608 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7609 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7611 if(get_bits1(&s->gb)){
7612 sps->scaling_matrix_present |= is_sps;
7613 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7614 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7615 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7616 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7617 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7618 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7619 if(is_sps || pps->transform_8x8_mode){
7620 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7621 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7623 } else if(fallback_sps) {
7624 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7625 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7629 static inline int decode_seq_parameter_set(H264Context *h){
7630 MpegEncContext * const s = &h->s;
7631 int profile_idc, level_idc;
7635 profile_idc= get_bits(&s->gb, 8);
7636 get_bits1(&s->gb); //constraint_set0_flag
7637 get_bits1(&s->gb); //constraint_set1_flag
7638 get_bits1(&s->gb); //constraint_set2_flag
7639 get_bits1(&s->gb); //constraint_set3_flag
7640 get_bits(&s->gb, 4); // reserved
7641 level_idc= get_bits(&s->gb, 8);
7642 sps_id= get_ue_golomb(&s->gb);
7644 sps= &h->sps_buffer[ sps_id ];
7645 sps->profile_idc= profile_idc;
7646 sps->level_idc= level_idc;
7648 if(sps->profile_idc >= 100){ //high profile
7649 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7650 get_bits1(&s->gb); //residual_color_transform_flag
7651 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7652 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7653 sps->transform_bypass = get_bits1(&s->gb);
7654 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7656 sps->scaling_matrix_present = 0;
7658 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7659 sps->poc_type= get_ue_golomb(&s->gb);
7661 if(sps->poc_type == 0){ //FIXME #define
7662 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7663 } else if(sps->poc_type == 1){//FIXME #define
7664 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7665 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7666 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7667 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7669 for(i=0; i<sps->poc_cycle_length; i++)
7670 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7672 if(sps->poc_type > 2){
7673 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7677 sps->ref_frame_count= get_ue_golomb(&s->gb);
7678 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7679 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7681 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7682 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7683 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7684 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7685 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7688 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7689 if(!sps->frame_mbs_only_flag)
7690 sps->mb_aff= get_bits1(&s->gb);
7694 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7696 #ifndef ALLOW_INTERLACE
7698 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it compilation time\n");
7700 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7701 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7703 sps->crop= get_bits1(&s->gb);
7705 sps->crop_left = get_ue_golomb(&s->gb);
7706 sps->crop_right = get_ue_golomb(&s->gb);
7707 sps->crop_top = get_ue_golomb(&s->gb);
7708 sps->crop_bottom= get_ue_golomb(&s->gb);
7709 if(sps->crop_left || sps->crop_top){
7710 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7716 sps->crop_bottom= 0;
7719 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7720 if( sps->vui_parameters_present_flag )
7721 decode_vui_parameters(h, sps);
7723 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7724 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7725 sps_id, sps->profile_idc, sps->level_idc,
7727 sps->ref_frame_count,
7728 sps->mb_width, sps->mb_height,
7729 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7730 sps->direct_8x8_inference_flag ? "8B8" : "",
7731 sps->crop_left, sps->crop_right,
7732 sps->crop_top, sps->crop_bottom,
7733 sps->vui_parameters_present_flag ? "VUI" : ""
7739 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7740 MpegEncContext * const s = &h->s;
7741 int pps_id= get_ue_golomb(&s->gb);
7742 PPS *pps= &h->pps_buffer[pps_id];
7744 pps->sps_id= get_ue_golomb(&s->gb);
7745 pps->cabac= get_bits1(&s->gb);
7746 pps->pic_order_present= get_bits1(&s->gb);
7747 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7748 if(pps->slice_group_count > 1 ){
7749 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7750 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7751 switch(pps->mb_slice_group_map_type){
7754 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7755 | run_length[ i ] |1 |ue(v) |
7760 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7762 | top_left_mb[ i ] |1 |ue(v) |
7763 | bottom_right_mb[ i ] |1 |ue(v) |
7771 | slice_group_change_direction_flag |1 |u(1) |
7772 | slice_group_change_rate_minus1 |1 |ue(v) |
7777 | slice_group_id_cnt_minus1 |1 |ue(v) |
7778 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7780 | slice_group_id[ i ] |1 |u(v) |
7785 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7786 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7787 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7788 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7792 pps->weighted_pred= get_bits1(&s->gb);
7793 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7794 pps->init_qp= get_se_golomb(&s->gb) + 26;
7795 pps->init_qs= get_se_golomb(&s->gb) + 26;
7796 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7797 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7798 pps->constrained_intra_pred= get_bits1(&s->gb);
7799 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7801 pps->transform_8x8_mode= 0;
7802 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7803 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7804 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7806 if(get_bits_count(&s->gb) < bit_length){
7807 pps->transform_8x8_mode= get_bits1(&s->gb);
7808 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7809 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7812 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7813 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7814 pps_id, pps->sps_id,
7815 pps->cabac ? "CABAC" : "CAVLC",
7816 pps->slice_group_count,
7817 pps->ref_count[0], pps->ref_count[1],
7818 pps->weighted_pred ? "weighted" : "",
7819 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7820 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7821 pps->constrained_intra_pred ? "CONSTR" : "",
7822 pps->redundant_pic_cnt_present ? "REDU" : "",
7823 pps->transform_8x8_mode ? "8x8DCT" : ""
7831 * finds the end of the current frame in the bitstream.
7832 * @return the position of the first byte of the next frame, or -1
7834 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7837 ParseContext *pc = &(h->s.parse_context);
7838 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7839 // mb_addr= pc->mb_addr - 1;
7841 for(i=0; i<=buf_size; i++){
7842 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7843 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7844 if(pc->frame_start_found){
7845 // If there isn't one more byte in the buffer
7846 // the test on first_mb_in_slice cannot be done yet
7847 // do it at next call.
7848 if (i >= buf_size) break;
7849 if (buf[i] & 0x80) {
7850 // first_mb_in_slice is 0, probably the first nal of a new
7852 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7854 pc->frame_start_found= 0;
7858 pc->frame_start_found = 1;
7860 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7861 if(pc->frame_start_found){
7863 pc->frame_start_found= 0;
7868 state= (state<<8) | buf[i];
7872 return END_NOT_FOUND;
7875 static int h264_parse(AVCodecParserContext *s,
7876 AVCodecContext *avctx,
7877 uint8_t **poutbuf, int *poutbuf_size,
7878 const uint8_t *buf, int buf_size)
7880 H264Context *h = s->priv_data;
7881 ParseContext *pc = &h->s.parse_context;
7884 next= find_frame_end(h, buf, buf_size);
7886 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
7892 *poutbuf = (uint8_t *)buf;
7893 *poutbuf_size = buf_size;
7897 static int h264_split(AVCodecContext *avctx,
7898 const uint8_t *buf, int buf_size)
7901 uint32_t state = -1;
7904 for(i=0; i<=buf_size; i++){
7905 if((state&0xFFFFFF1F) == 0x107)
7907 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7909 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
7911 while(i>4 && buf[i-5]==0) i--;
7916 state= (state<<8) | buf[i];
7922 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7923 MpegEncContext * const s = &h->s;
7924 AVCodecContext * const avctx= s->avctx;
7928 for(i=0; i<50; i++){
7929 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7933 s->current_picture_ptr= NULL;
7942 if(buf_index >= buf_size) break;
7944 for(i = 0; i < h->nal_length_size; i++)
7945 nalsize = (nalsize << 8) | buf[buf_index++];
7951 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7956 // start code prefix search
7957 for(; buf_index + 3 < buf_size; buf_index++){
7958 // this should allways succeed in the first iteration
7959 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7963 if(buf_index+3 >= buf_size) break;
7968 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7969 while(ptr[dst_length - 1] == 0 && dst_length > 1)
7971 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
7973 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7974 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7977 if (h->is_avc && (nalsize != consumed))
7978 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7980 buf_index += consumed;
7982 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
7983 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7986 switch(h->nal_unit_type){
7988 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7990 init_get_bits(&s->gb, ptr, bit_length);
7992 h->inter_gb_ptr= &s->gb;
7993 s->data_partitioning = 0;
7995 if(decode_slice_header(h) < 0){
7996 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7999 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
8000 if(h->redundant_pic_count==0 && s->hurry_up < 5
8001 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8002 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8003 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8004 && avctx->skip_frame < AVDISCARD_ALL)
8008 init_get_bits(&s->gb, ptr, bit_length);
8010 h->inter_gb_ptr= NULL;
8011 s->data_partitioning = 1;
8013 if(decode_slice_header(h) < 0){
8014 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8018 init_get_bits(&h->intra_gb, ptr, bit_length);
8019 h->intra_gb_ptr= &h->intra_gb;
8022 init_get_bits(&h->inter_gb, ptr, bit_length);
8023 h->inter_gb_ptr= &h->inter_gb;
8025 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
8027 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8028 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8029 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8030 && avctx->skip_frame < AVDISCARD_ALL)
8034 init_get_bits(&s->gb, ptr, bit_length);
8038 init_get_bits(&s->gb, ptr, bit_length);
8039 decode_seq_parameter_set(h);
8041 if(s->flags& CODEC_FLAG_LOW_DELAY)
8044 if(avctx->has_b_frames < 2)
8045 avctx->has_b_frames= !s->low_delay;
8048 init_get_bits(&s->gb, ptr, bit_length);
8050 decode_picture_parameter_set(h, bit_length);
8054 case NAL_END_SEQUENCE:
8055 case NAL_END_STREAM:
8056 case NAL_FILLER_DATA:
8058 case NAL_AUXILIARY_SLICE:
8061 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
8065 if(!s->current_picture_ptr) return buf_index; //no frame
8067 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
8068 s->current_picture_ptr->pict_type= s->pict_type;
8070 h->prev_frame_num_offset= h->frame_num_offset;
8071 h->prev_frame_num= h->frame_num;
8072 if(s->current_picture_ptr->reference){
8073 h->prev_poc_msb= h->poc_msb;
8074 h->prev_poc_lsb= h->poc_lsb;
8076 if(s->current_picture_ptr->reference)
8077 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
8087 * returns the number of bytes consumed for building the current frame
8089 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
8090 if(s->flags&CODEC_FLAG_TRUNCATED){
8091 pos -= s->parse_context.last_index;
8092 if(pos<0) pos=0; // FIXME remove (unneeded?)
8096 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
8097 if(pos+10>buf_size) pos=buf_size; // oops ;)
8103 static int decode_frame(AVCodecContext *avctx,
8104 void *data, int *data_size,
8105 uint8_t *buf, int buf_size)
8107 H264Context *h = avctx->priv_data;
8108 MpegEncContext *s = &h->s;
8109 AVFrame *pict = data;
8112 s->flags= avctx->flags;
8113 s->flags2= avctx->flags2;
8115 /* no supplementary picture */
8116 if (buf_size == 0) {
8120 if(s->flags&CODEC_FLAG_TRUNCATED){
8121 int next= find_frame_end(h, buf, buf_size);
8123 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
8125 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
8128 if(h->is_avc && !h->got_avcC) {
8129 int i, cnt, nalsize;
8130 unsigned char *p = avctx->extradata;
8131 if(avctx->extradata_size < 7) {
8132 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
8136 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
8139 /* sps and pps in the avcC always have length coded with 2 bytes,
8140 so put a fake nal_length_size = 2 while parsing them */
8141 h->nal_length_size = 2;
8142 // Decode sps from avcC
8143 cnt = *(p+5) & 0x1f; // Number of sps
8145 for (i = 0; i < cnt; i++) {
8146 nalsize = BE_16(p) + 2;
8147 if(decode_nal_units(h, p, nalsize) < 0) {
8148 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
8153 // Decode pps from avcC
8154 cnt = *(p++); // Number of pps
8155 for (i = 0; i < cnt; i++) {
8156 nalsize = BE_16(p) + 2;
8157 if(decode_nal_units(h, p, nalsize) != nalsize) {
8158 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
8163 // Now store right nal length size, that will be use to parse all other nals
8164 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
8165 // Do not reparse avcC
8169 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
8170 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
8174 buf_index=decode_nal_units(h, buf, buf_size);
8178 //FIXME do something with unavailable reference frames
8180 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
8181 if(!s->current_picture_ptr){
8182 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
8187 Picture *out = s->current_picture_ptr;
8188 #if 0 //decode order
8189 *data_size = sizeof(AVFrame);
8191 /* Sort B-frames into display order */
8192 Picture *cur = s->current_picture_ptr;
8193 Picture *prev = h->delayed_output_pic;
8194 int i, pics, cross_idr, out_of_order, out_idx;
8196 if(h->sps.bitstream_restriction_flag
8197 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
8198 s->avctx->has_b_frames = h->sps.num_reorder_frames;
8203 while(h->delayed_pic[pics]) pics++;
8204 h->delayed_pic[pics++] = cur;
8205 if(cur->reference == 0)
8209 for(i=0; h->delayed_pic[i]; i++)
8210 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
8213 out = h->delayed_pic[0];
8215 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8216 if(h->delayed_pic[i]->poc < out->poc){
8217 out = h->delayed_pic[i];
8221 out_of_order = !cross_idr && prev && out->poc < prev->poc;
8222 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
8224 else if(prev && pics <= s->avctx->has_b_frames)
8226 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8228 ((!cross_idr && prev && out->poc > prev->poc + 2)
8229 || cur->pict_type == B_TYPE)))
8232 s->avctx->has_b_frames++;
8235 else if(out_of_order)
8238 if(out_of_order || pics > s->avctx->has_b_frames){
8239 for(i=out_idx; h->delayed_pic[i]; i++)
8240 h->delayed_pic[i] = h->delayed_pic[i+1];
8246 *data_size = sizeof(AVFrame);
8247 if(prev && prev != out && prev->reference == 1)
8248 prev->reference = 0;
8249 h->delayed_output_pic = out;
8253 *pict= *(AVFrame*)out;
8255 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8258 assert(pict->data[0] || !*data_size);
8259 ff_print_debug_info(s, pict);
8260 //printf("out %d\n", (int)pict->data[0]);
8263 /* Return the Picture timestamp as the frame number */
8264 /* we substract 1 because it is added on utils.c */
8265 avctx->frame_number = s->picture_number - 1;
8267 return get_consumed_bytes(s, buf_index, buf_size);
8270 static inline void fill_mb_avail(H264Context *h){
8271 MpegEncContext * const s = &h->s;
8272 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8275 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8276 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8277 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8283 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8284 h->mb_avail[4]= 1; //FIXME move out
8285 h->mb_avail[5]= 0; //FIXME move out
8291 #define SIZE (COUNT*40)
8297 // int int_temp[10000];
8299 AVCodecContext avctx;
8301 dsputil_init(&dsp, &avctx);
8303 init_put_bits(&pb, temp, SIZE);
8304 printf("testing unsigned exp golomb\n");
8305 for(i=0; i<COUNT; i++){
8307 set_ue_golomb(&pb, i);
8308 STOP_TIMER("set_ue_golomb");
8310 flush_put_bits(&pb);
8312 init_get_bits(&gb, temp, 8*SIZE);
8313 for(i=0; i<COUNT; i++){
8316 s= show_bits(&gb, 24);
8319 j= get_ue_golomb(&gb);
8321 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8324 STOP_TIMER("get_ue_golomb");
8328 init_put_bits(&pb, temp, SIZE);
8329 printf("testing signed exp golomb\n");
8330 for(i=0; i<COUNT; i++){
8332 set_se_golomb(&pb, i - COUNT/2);
8333 STOP_TIMER("set_se_golomb");
8335 flush_put_bits(&pb);
8337 init_get_bits(&gb, temp, 8*SIZE);
8338 for(i=0; i<COUNT; i++){
8341 s= show_bits(&gb, 24);
8344 j= get_se_golomb(&gb);
8345 if(j != i - COUNT/2){
8346 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8349 STOP_TIMER("get_se_golomb");
8352 printf("testing 4x4 (I)DCT\n");
8355 uint8_t src[16], ref[16];
8356 uint64_t error= 0, max_error=0;
8358 for(i=0; i<COUNT; i++){
8360 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8361 for(j=0; j<16; j++){
8362 ref[j]= random()%255;
8363 src[j]= random()%255;
8366 h264_diff_dct_c(block, src, ref, 4);
8369 for(j=0; j<16; j++){
8370 // printf("%d ", block[j]);
8371 block[j]= block[j]*4;
8372 if(j&1) block[j]= (block[j]*4 + 2)/5;
8373 if(j&4) block[j]= (block[j]*4 + 2)/5;
8377 s->dsp.h264_idct_add(ref, block, 4);
8378 /* for(j=0; j<16; j++){
8379 printf("%d ", ref[j]);
8383 for(j=0; j<16; j++){
8384 int diff= ABS(src[j] - ref[j]);
8387 max_error= FFMAX(max_error, diff);
8390 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8392 printf("testing quantizer\n");
8393 for(qp=0; qp<52; qp++){
8395 src1_block[i]= src2_block[i]= random()%255;
8399 printf("Testing NAL layer\n");
8401 uint8_t bitstream[COUNT];
8402 uint8_t nal[COUNT*2];
8404 memset(&h, 0, sizeof(H264Context));
8406 for(i=0; i<COUNT; i++){
8414 for(j=0; j<COUNT; j++){
8415 bitstream[j]= (random() % 255) + 1;
8418 for(j=0; j<zeros; j++){
8419 int pos= random() % COUNT;
8420 while(bitstream[pos] == 0){
8429 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8431 printf("encoding failed\n");
8435 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8439 if(out_length != COUNT){
8440 printf("incorrect length %d %d\n", out_length, COUNT);
8444 if(consumed != nal_length){
8445 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8449 if(memcmp(bitstream, out, COUNT)){
8450 printf("missmatch\n");
8455 printf("Testing RBSP\n");
8463 static int decode_end(AVCodecContext *avctx)
8465 H264Context *h = avctx->priv_data;
8466 MpegEncContext *s = &h->s;
8468 av_freep(&h->rbsp_buffer);
8469 free_tables(h); //FIXME cleanup init stuff perhaps
8472 // memset(h, 0, sizeof(H264Context));
8478 AVCodec h264_decoder = {
8482 sizeof(H264Context),
8487 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8491 AVCodecParser h264_parser = {
8493 sizeof(H264Context),