2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
57 /* Compiling in interlaced support reduces the speed
58 * of progressive decoding by about 2%. */
59 #define ALLOW_INTERLACE
61 #ifdef ALLOW_INTERLACE
62 #define MB_MBAFF h->mb_mbaff
63 #define MB_FIELD h->mb_field_decoding_flag
64 #define FRAME_MBAFF h->mb_aff_frame
70 #define IS_INTERLACED(mb_type) 0
74 * Sequence parameter set
80 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
81 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
82 int poc_type; ///< pic_order_cnt_type
83 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
84 int delta_pic_order_always_zero_flag;
85 int offset_for_non_ref_pic;
86 int offset_for_top_to_bottom_field;
87 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
88 int ref_frame_count; ///< num_ref_frames
89 int gaps_in_frame_num_allowed_flag;
90 int mb_width; ///< frame_width_in_mbs_minus1 + 1
91 int mb_height; ///< frame_height_in_mbs_minus1 + 1
92 int frame_mbs_only_flag;
93 int mb_aff; ///<mb_adaptive_frame_field_flag
94 int direct_8x8_inference_flag;
95 int crop; ///< frame_cropping_flag
96 int crop_left; ///< frame_cropping_rect_left_offset
97 int crop_right; ///< frame_cropping_rect_right_offset
98 int crop_top; ///< frame_cropping_rect_top_offset
99 int crop_bottom; ///< frame_cropping_rect_bottom_offset
100 int vui_parameters_present_flag;
102 int timing_info_present_flag;
103 uint32_t num_units_in_tick;
105 int fixed_frame_rate_flag;
106 short offset_for_ref_frame[256]; //FIXME dyn aloc?
107 int bitstream_restriction_flag;
108 int num_reorder_frames;
109 int scaling_matrix_present;
110 uint8_t scaling_matrix4[6][16];
111 uint8_t scaling_matrix8[2][64];
115 * Picture parameter set
119 int cabac; ///< entropy_coding_mode_flag
120 int pic_order_present; ///< pic_order_present_flag
121 int slice_group_count; ///< num_slice_groups_minus1 + 1
122 int mb_slice_group_map_type;
123 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
124 int weighted_pred; ///< weighted_pred_flag
125 int weighted_bipred_idc;
126 int init_qp; ///< pic_init_qp_minus26 + 26
127 int init_qs; ///< pic_init_qs_minus26 + 26
128 int chroma_qp_index_offset;
129 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
130 int constrained_intra_pred; ///< constrained_intra_pred_flag
131 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
132 int transform_8x8_mode; ///< transform_8x8_mode_flag
133 uint8_t scaling_matrix4[6][16];
134 uint8_t scaling_matrix8[2][64];
138 * Memory management control operation opcode.
140 typedef enum MMCOOpcode{
151 * Memory management control operation.
162 typedef struct H264Context{
170 #define NAL_IDR_SLICE 5
175 #define NAL_END_SEQUENCE 10
176 #define NAL_END_STREAM 11
177 #define NAL_FILLER_DATA 12
178 #define NAL_SPS_EXT 13
179 #define NAL_AUXILIARY_SLICE 19
180 uint8_t *rbsp_buffer;
181 unsigned int rbsp_buffer_size;
184 * Used to parse AVC variant of h264
186 int is_avc; ///< this flag is != 0 if codec is avc1
187 int got_avcC; ///< flag used to parse avcC data only once
188 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
196 int chroma_pred_mode;
197 int intra16x16_pred_mode;
202 int8_t intra4x4_pred_mode_cache[5*8];
203 int8_t (*intra4x4_pred_mode)[8];
204 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
205 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
206 void (*pred8x8 [4+3])(uint8_t *src, int stride);
207 void (*pred16x16[4+3])(uint8_t *src, int stride);
208 unsigned int topleft_samples_available;
209 unsigned int top_samples_available;
210 unsigned int topright_samples_available;
211 unsigned int left_samples_available;
212 uint8_t (*top_borders[2])[16+2*8];
213 uint8_t left_border[2*(17+2*9)];
216 * non zero coeff count cache.
217 * is 64 if not available.
219 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
220 uint8_t (*non_zero_count)[16];
223 * Motion vector cache.
225 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
226 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
227 #define LIST_NOT_USED -1 //FIXME rename?
228 #define PART_NOT_AVAILABLE -2
231 * is 1 if the specific list MV&references are set to 0,0,-2.
233 int mv_cache_clean[2];
236 * number of neighbors (top and/or left) that used 8x8 dct
238 int neighbor_transform_size;
241 * block_offset[ 0..23] for frame macroblocks
242 * block_offset[24..47] for field macroblocks
244 int block_offset[2*(16+8)];
246 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
248 int b_stride; //FIXME use s->b4_stride
251 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff
260 int unknown_svq3_flag;
261 int next_slice_index;
263 SPS sps_buffer[MAX_SPS_COUNT];
264 SPS sps; ///< current sps
266 PPS pps_buffer[MAX_PPS_COUNT];
270 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
272 uint32_t dequant4_buffer[6][52][16];
273 uint32_t dequant8_buffer[2][52][64];
274 uint32_t (*dequant4_coeff[6])[16];
275 uint32_t (*dequant8_coeff[2])[64];
276 int dequant_coeff_pps; ///< reinit tables when pps changes
279 uint8_t *slice_table_base;
280 uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
282 int slice_type_fixed;
284 //interlacing specific flags
286 int mb_field_decoding_flag;
287 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
294 int delta_poc_bottom;
297 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
298 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
299 int frame_num_offset; ///< for POC type 2
300 int prev_frame_num_offset; ///< for POC type 2
301 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
304 * frame_num for frames or 2*frame_num for field pics.
309 * max_frame_num or 2*max_frame_num for field pics.
313 //Weighted pred stuff
315 int use_weight_chroma;
316 int luma_log2_weight_denom;
317 int chroma_log2_weight_denom;
318 int luma_weight[2][48];
319 int luma_offset[2][48];
320 int chroma_weight[2][48][2];
321 int chroma_offset[2][48][2];
322 int implicit_weight[48][48];
325 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
326 int slice_alpha_c0_offset;
327 int slice_beta_offset;
329 int redundant_pic_count;
331 int direct_spatial_mv_pred;
332 int dist_scale_factor[16];
333 int dist_scale_factor_field[32];
334 int map_col_to_list0[2][16];
335 int map_col_to_list0_field[2][32];
338 * num_ref_idx_l0/1_active_minus1 + 1
340 int ref_count[2]; ///< counts frames or fields, depending on current mb mode
341 Picture *short_ref[32];
342 Picture *long_ref[32];
343 Picture default_ref_list[2][32];
344 Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs
345 Picture *delayed_pic[16]; //FIXME size?
346 Picture *delayed_output_pic;
349 * memory management control operations buffer.
351 MMCO mmco[MAX_MMCO_COUNT];
354 int long_ref_count; ///< number of actual long term references
355 int short_ref_count; ///< number of actual short term references
358 GetBitContext intra_gb;
359 GetBitContext inter_gb;
360 GetBitContext *intra_gb_ptr;
361 GetBitContext *inter_gb_ptr;
363 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
369 uint8_t cabac_state[460];
372 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
377 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
378 uint8_t *chroma_pred_mode_table;
379 int last_qscale_diff;
380 int16_t (*mvd_table[2])[2];
381 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
382 uint8_t *direct_table;
383 uint8_t direct_cache[5*8];
385 uint8_t zigzag_scan[16];
386 uint8_t zigzag_scan8x8[64];
387 uint8_t zigzag_scan8x8_cavlc[64];
388 uint8_t field_scan[16];
389 uint8_t field_scan8x8[64];
390 uint8_t field_scan8x8_cavlc[64];
391 const uint8_t *zigzag_scan_q0;
392 const uint8_t *zigzag_scan8x8_q0;
393 const uint8_t *zigzag_scan8x8_cavlc_q0;
394 const uint8_t *field_scan_q0;
395 const uint8_t *field_scan8x8_q0;
396 const uint8_t *field_scan8x8_cavlc_q0;
401 static VLC coeff_token_vlc[4];
402 static VLC chroma_dc_coeff_token_vlc;
404 static VLC total_zeros_vlc[15];
405 static VLC chroma_dc_total_zeros_vlc[3];
407 static VLC run_vlc[6];
410 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
411 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
412 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
413 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
415 static always_inline uint32_t pack16to32(int a, int b){
416 #ifdef WORDS_BIGENDIAN
417 return (b&0xFFFF) + (a<<16);
419 return (a&0xFFFF) + (b<<16);
425 * @param h height of the rectangle, should be a constant
426 * @param w width of the rectangle, should be a constant
427 * @param size the size of val (1 or 4), should be a constant
429 static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
430 uint8_t *p= (uint8_t*)vp;
431 assert(size==1 || size==4);
437 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
438 assert((stride&(w-1))==0);
440 const uint16_t v= size==4 ? val : val*0x0101;
441 *(uint16_t*)(p + 0*stride)= v;
443 *(uint16_t*)(p + 1*stride)= v;
445 *(uint16_t*)(p + 2*stride)=
446 *(uint16_t*)(p + 3*stride)= v;
448 const uint32_t v= size==4 ? val : val*0x01010101;
449 *(uint32_t*)(p + 0*stride)= v;
451 *(uint32_t*)(p + 1*stride)= v;
453 *(uint32_t*)(p + 2*stride)=
454 *(uint32_t*)(p + 3*stride)= v;
456 //gcc can't optimize 64bit math on x86_32
457 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
458 const uint64_t v= val*0x0100000001ULL;
459 *(uint64_t*)(p + 0*stride)= v;
461 *(uint64_t*)(p + 1*stride)= v;
463 *(uint64_t*)(p + 2*stride)=
464 *(uint64_t*)(p + 3*stride)= v;
466 const uint64_t v= val*0x0100000001ULL;
467 *(uint64_t*)(p + 0+0*stride)=
468 *(uint64_t*)(p + 8+0*stride)=
469 *(uint64_t*)(p + 0+1*stride)=
470 *(uint64_t*)(p + 8+1*stride)= v;
472 *(uint64_t*)(p + 0+2*stride)=
473 *(uint64_t*)(p + 8+2*stride)=
474 *(uint64_t*)(p + 0+3*stride)=
475 *(uint64_t*)(p + 8+3*stride)= v;
477 *(uint32_t*)(p + 0+0*stride)=
478 *(uint32_t*)(p + 4+0*stride)= val;
480 *(uint32_t*)(p + 0+1*stride)=
481 *(uint32_t*)(p + 4+1*stride)= val;
483 *(uint32_t*)(p + 0+2*stride)=
484 *(uint32_t*)(p + 4+2*stride)=
485 *(uint32_t*)(p + 0+3*stride)=
486 *(uint32_t*)(p + 4+3*stride)= val;
488 *(uint32_t*)(p + 0+0*stride)=
489 *(uint32_t*)(p + 4+0*stride)=
490 *(uint32_t*)(p + 8+0*stride)=
491 *(uint32_t*)(p +12+0*stride)=
492 *(uint32_t*)(p + 0+1*stride)=
493 *(uint32_t*)(p + 4+1*stride)=
494 *(uint32_t*)(p + 8+1*stride)=
495 *(uint32_t*)(p +12+1*stride)= val;
497 *(uint32_t*)(p + 0+2*stride)=
498 *(uint32_t*)(p + 4+2*stride)=
499 *(uint32_t*)(p + 8+2*stride)=
500 *(uint32_t*)(p +12+2*stride)=
501 *(uint32_t*)(p + 0+3*stride)=
502 *(uint32_t*)(p + 4+3*stride)=
503 *(uint32_t*)(p + 8+3*stride)=
504 *(uint32_t*)(p +12+3*stride)= val;
511 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
512 MpegEncContext * const s = &h->s;
513 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
514 int topleft_xy, top_xy, topright_xy, left_xy[2];
515 int topleft_type, top_type, topright_type, left_type[2];
519 //FIXME deblocking could skip the intra and nnz parts.
520 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
523 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
525 top_xy = mb_xy - s->mb_stride;
526 topleft_xy = top_xy - 1;
527 topright_xy= top_xy + 1;
528 left_xy[1] = left_xy[0] = mb_xy-1;
538 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
539 const int top_pair_xy = pair_xy - s->mb_stride;
540 const int topleft_pair_xy = top_pair_xy - 1;
541 const int topright_pair_xy = top_pair_xy + 1;
542 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
543 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
544 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
545 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
546 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
547 const int bottom = (s->mb_y & 1);
548 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
550 ? !curr_mb_frame_flag // bottom macroblock
551 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
553 top_xy -= s->mb_stride;
556 ? !curr_mb_frame_flag // bottom macroblock
557 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
559 topleft_xy -= s->mb_stride;
562 ? !curr_mb_frame_flag // bottom macroblock
563 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
565 topright_xy -= s->mb_stride;
567 if (left_mb_frame_flag != curr_mb_frame_flag) {
568 left_xy[1] = left_xy[0] = pair_xy - 1;
569 if (curr_mb_frame_flag) {
590 left_xy[1] += s->mb_stride;
603 h->top_mb_xy = top_xy;
604 h->left_mb_xy[0] = left_xy[0];
605 h->left_mb_xy[1] = left_xy[1];
609 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
610 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
611 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
613 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
615 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
617 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
618 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
619 if(USES_LIST(mb_type,list)){
620 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
621 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
622 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
623 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
629 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
630 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
632 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
633 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
635 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
636 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
641 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
642 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
643 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
644 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
645 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
648 if(IS_INTRA(mb_type)){
649 h->topleft_samples_available=
650 h->top_samples_available=
651 h->left_samples_available= 0xFFFF;
652 h->topright_samples_available= 0xEEEA;
654 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
655 h->topleft_samples_available= 0xB3FF;
656 h->top_samples_available= 0x33FF;
657 h->topright_samples_available= 0x26EA;
660 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
661 h->topleft_samples_available&= 0xDF5F;
662 h->left_samples_available&= 0x5F5F;
666 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
667 h->topleft_samples_available&= 0x7FFF;
669 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
670 h->topright_samples_available&= 0xFBFF;
672 if(IS_INTRA4x4(mb_type)){
673 if(IS_INTRA4x4(top_type)){
674 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
675 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
676 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
677 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
680 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
685 h->intra4x4_pred_mode_cache[4+8*0]=
686 h->intra4x4_pred_mode_cache[5+8*0]=
687 h->intra4x4_pred_mode_cache[6+8*0]=
688 h->intra4x4_pred_mode_cache[7+8*0]= pred;
691 if(IS_INTRA4x4(left_type[i])){
692 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
693 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
696 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
701 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
702 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
717 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
719 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
720 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
721 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
722 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
724 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
725 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
727 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
728 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
731 h->non_zero_count_cache[4+8*0]=
732 h->non_zero_count_cache[5+8*0]=
733 h->non_zero_count_cache[6+8*0]=
734 h->non_zero_count_cache[7+8*0]=
736 h->non_zero_count_cache[1+8*0]=
737 h->non_zero_count_cache[2+8*0]=
739 h->non_zero_count_cache[1+8*3]=
740 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
744 for (i=0; i<2; i++) {
746 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
747 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
748 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
749 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
751 h->non_zero_count_cache[3+8*1 + 2*8*i]=
752 h->non_zero_count_cache[3+8*2 + 2*8*i]=
753 h->non_zero_count_cache[0+8*1 + 8*i]=
754 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
761 h->top_cbp = h->cbp_table[top_xy];
762 } else if(IS_INTRA(mb_type)) {
769 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
770 } else if(IS_INTRA(mb_type)) {
776 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
779 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
784 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
786 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
787 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
788 /*if(!h->mv_cache_clean[list]){
789 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
790 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
791 h->mv_cache_clean[list]= 1;
795 h->mv_cache_clean[list]= 0;
797 if(USES_LIST(top_type, list)){
798 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
799 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
800 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
801 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
802 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
803 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
804 h->ref_cache[list][scan8[0] + 0 - 1*8]=
805 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
806 h->ref_cache[list][scan8[0] + 2 - 1*8]=
807 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
809 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
810 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
811 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
812 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
813 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
816 //FIXME unify cleanup or sth
817 if(USES_LIST(left_type[0], list)){
818 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
819 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
820 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
821 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
822 h->ref_cache[list][scan8[0] - 1 + 0*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
823 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1]>>1)];
825 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
826 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
827 h->ref_cache[list][scan8[0] - 1 + 0*8]=
828 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
831 if(USES_LIST(left_type[1], list)){
832 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
833 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
834 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
835 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
836 h->ref_cache[list][scan8[0] - 1 + 2*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
837 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[3]>>1)];
839 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
840 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
841 h->ref_cache[list][scan8[0] - 1 + 2*8]=
842 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
843 assert((!left_type[0]) == (!left_type[1]));
846 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
849 if(USES_LIST(topleft_type, list)){
850 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
851 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
852 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
853 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
855 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
856 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
859 if(USES_LIST(topright_type, list)){
860 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
861 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
862 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
863 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
865 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
866 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
869 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
872 h->ref_cache[list][scan8[5 ]+1] =
873 h->ref_cache[list][scan8[7 ]+1] =
874 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
875 h->ref_cache[list][scan8[4 ]] =
876 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
877 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
878 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
879 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
880 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
881 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
884 /* XXX beurk, Load mvd */
885 if(USES_LIST(top_type, list)){
886 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
887 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
888 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
889 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
890 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
892 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
893 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
894 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
895 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
897 if(USES_LIST(left_type[0], list)){
898 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
899 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
900 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
902 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
903 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
905 if(USES_LIST(left_type[1], list)){
906 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
907 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
908 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
910 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
911 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
913 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
914 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
915 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
916 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
917 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
919 if(h->slice_type == B_TYPE){
920 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
922 if(IS_DIRECT(top_type)){
923 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
924 }else if(IS_8X8(top_type)){
925 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
926 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
927 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
929 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
932 if(IS_DIRECT(left_type[0]))
933 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
934 else if(IS_8X8(left_type[0]))
935 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
937 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
939 if(IS_DIRECT(left_type[1]))
940 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
941 else if(IS_8X8(left_type[1]))
942 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
944 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
950 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
951 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
952 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
953 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
954 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
955 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
956 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
957 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
958 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
959 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
961 #define MAP_F2F(idx, mb_type)\
962 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
963 h->ref_cache[list][idx] <<= 1;\
964 h->mv_cache[list][idx][1] /= 2;\
965 h->mvd_cache[list][idx][1] /= 2;\
970 #define MAP_F2F(idx, mb_type)\
971 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
972 h->ref_cache[list][idx] >>= 1;\
973 h->mv_cache[list][idx][1] <<= 1;\
974 h->mvd_cache[list][idx][1] <<= 1;\
984 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
987 static inline void write_back_intra_pred_mode(H264Context *h){
988 MpegEncContext * const s = &h->s;
989 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
991 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
992 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
993 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
994 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
995 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
996 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
997 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
1001 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1003 static inline int check_intra4x4_pred_mode(H264Context *h){
1004 MpegEncContext * const s = &h->s;
1005 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
1006 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
1009 if(!(h->top_samples_available&0x8000)){
1011 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
1013 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1016 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
1021 if(!(h->left_samples_available&0x8000)){
1023 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
1025 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1028 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
1034 } //FIXME cleanup like next
1037 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1039 static inline int check_intra_pred_mode(H264Context *h, int mode){
1040 MpegEncContext * const s = &h->s;
1041 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
1042 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
1044 if(mode < 0 || mode > 6) {
1045 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
1049 if(!(h->top_samples_available&0x8000)){
1052 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1057 if(!(h->left_samples_available&0x8000)){
1060 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1069 * gets the predicted intra4x4 prediction mode.
1071 static inline int pred_intra_mode(H264Context *h, int n){
1072 const int index8= scan8[n];
1073 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
1074 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
1075 const int min= FFMIN(left, top);
1077 tprintf("mode:%d %d min:%d\n", left ,top, min);
1079 if(min<0) return DC_PRED;
1083 static inline void write_back_non_zero_count(H264Context *h){
1084 MpegEncContext * const s = &h->s;
1085 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1087 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
1088 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
1089 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
1090 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
1091 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
1092 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
1093 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
1095 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
1096 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
1097 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
1099 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1100 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1101 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1104 // store all luma nnzs, for deblocking
1107 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
1108 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
1113 * gets the predicted number of non zero coefficients.
1114 * @param n block index
1116 static inline int pred_non_zero_count(H264Context *h, int n){
1117 const int index8= scan8[n];
1118 const int left= h->non_zero_count_cache[index8 - 1];
1119 const int top = h->non_zero_count_cache[index8 - 8];
1122 if(i<64) i= (i+1)>>1;
1124 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1129 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1130 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1132 /* there is no consistent mapping of mvs to neighboring locations that will
1133 * make mbaff happy, so we can't move all this logic to fill_caches */
1135 MpegEncContext *s = &h->s;
1136 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
1138 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
1139 *C = h->mv_cache[list][scan8[0]-2];
1142 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
1143 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
1144 if(IS_INTERLACED(mb_types[topright_xy])){
1145 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
1146 const int x4 = X4, y4 = Y4;\
1147 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
1148 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
1149 return LIST_NOT_USED;\
1150 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
1151 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
1152 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
1153 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
1155 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
1158 if(topright_ref == PART_NOT_AVAILABLE
1159 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
1160 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
1162 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
1163 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
1166 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
1167 && i >= scan8[0]+8){
1168 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
1169 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
1175 if(topright_ref != PART_NOT_AVAILABLE){
1176 *C= h->mv_cache[list][ i - 8 + part_width ];
1177 return topright_ref;
1179 tprintf("topright MV not available\n");
1181 *C= h->mv_cache[list][ i - 8 - 1 ];
1182 return h->ref_cache[list][ i - 8 - 1 ];
1187 * gets the predicted MV.
1188 * @param n the block index
1189 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1190 * @param mx the x component of the predicted motion vector
1191 * @param my the y component of the predicted motion vector
1193 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1194 const int index8= scan8[n];
1195 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1196 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1197 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1198 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1200 int diagonal_ref, match_count;
1202 assert(part_width==1 || part_width==2 || part_width==4);
1212 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1213 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1214 tprintf("pred_motion match_count=%d\n", match_count);
1215 if(match_count > 1){ //most common
1216 *mx= mid_pred(A[0], B[0], C[0]);
1217 *my= mid_pred(A[1], B[1], C[1]);
1218 }else if(match_count==1){
1222 }else if(top_ref==ref){
1230 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1234 *mx= mid_pred(A[0], B[0], C[0]);
1235 *my= mid_pred(A[1], B[1], C[1]);
1239 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1243 * gets the directionally predicted 16x8 MV.
1244 * @param n the block index
1245 * @param mx the x component of the predicted motion vector
1246 * @param my the y component of the predicted motion vector
1248 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1250 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1251 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1253 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1261 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1262 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1264 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1266 if(left_ref == ref){
1274 pred_motion(h, n, 4, list, ref, mx, my);
1278 * gets the directionally predicted 8x16 MV.
1279 * @param n the block index
1280 * @param mx the x component of the predicted motion vector
1281 * @param my the y component of the predicted motion vector
1283 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1285 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1286 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1288 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1290 if(left_ref == ref){
1299 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1301 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1303 if(diagonal_ref == ref){
1311 pred_motion(h, n, 2, list, ref, mx, my);
1314 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1315 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1316 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1318 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1320 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1321 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1322 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1328 pred_motion(h, 0, 4, 0, 0, mx, my);
1333 static inline void direct_dist_scale_factor(H264Context * const h){
1334 const int poc = h->s.current_picture_ptr->poc;
1335 const int poc1 = h->ref_list[1][0].poc;
1337 for(i=0; i<h->ref_count[0]; i++){
1338 int poc0 = h->ref_list[0][i].poc;
1339 int td = clip(poc1 - poc0, -128, 127);
1340 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1341 h->dist_scale_factor[i] = 256;
1343 int tb = clip(poc - poc0, -128, 127);
1344 int tx = (16384 + (ABS(td) >> 1)) / td;
1345 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1349 for(i=0; i<h->ref_count[0]; i++){
1350 h->dist_scale_factor_field[2*i] =
1351 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
1355 static inline void direct_ref_list_init(H264Context * const h){
1356 MpegEncContext * const s = &h->s;
1357 Picture * const ref1 = &h->ref_list[1][0];
1358 Picture * const cur = s->current_picture_ptr;
1360 if(cur->pict_type == I_TYPE)
1361 cur->ref_count[0] = 0;
1362 if(cur->pict_type != B_TYPE)
1363 cur->ref_count[1] = 0;
1364 for(list=0; list<2; list++){
1365 cur->ref_count[list] = h->ref_count[list];
1366 for(j=0; j<h->ref_count[list]; j++)
1367 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1369 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1371 for(list=0; list<2; list++){
1372 for(i=0; i<ref1->ref_count[list]; i++){
1373 const int poc = ref1->ref_poc[list][i];
1374 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1375 for(j=0; j<h->ref_count[list]; j++)
1376 if(h->ref_list[list][j].poc == poc){
1377 h->map_col_to_list0[list][i] = j;
1383 for(list=0; list<2; list++){
1384 for(i=0; i<ref1->ref_count[list]; i++){
1385 j = h->map_col_to_list0[list][i];
1386 h->map_col_to_list0_field[list][2*i] = 2*j;
1387 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1393 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1394 MpegEncContext * const s = &h->s;
1395 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1396 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1397 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1398 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1399 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1400 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1401 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1402 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1403 const int is_b8x8 = IS_8X8(*mb_type);
1407 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1408 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1409 /* FIXME save sub mb types from previous frames (or derive from MVs)
1410 * so we know exactly what block size to use */
1411 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1412 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1413 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1414 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1415 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1417 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1418 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1421 *mb_type |= MB_TYPE_DIRECT2;
1423 *mb_type |= MB_TYPE_INTERLACED;
1425 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1427 if(h->direct_spatial_mv_pred){
1432 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1434 /* ref = min(neighbors) */
1435 for(list=0; list<2; list++){
1436 int refa = h->ref_cache[list][scan8[0] - 1];
1437 int refb = h->ref_cache[list][scan8[0] - 8];
1438 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1440 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1442 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1444 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1450 if(ref[0] < 0 && ref[1] < 0){
1451 ref[0] = ref[1] = 0;
1452 mv[0][0] = mv[0][1] =
1453 mv[1][0] = mv[1][1] = 0;
1455 for(list=0; list<2; list++){
1457 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1459 mv[list][0] = mv[list][1] = 0;
1464 *mb_type &= ~MB_TYPE_P0L1;
1465 sub_mb_type &= ~MB_TYPE_P0L1;
1466 }else if(ref[0] < 0){
1467 *mb_type &= ~MB_TYPE_P0L0;
1468 sub_mb_type &= ~MB_TYPE_P0L0;
1471 if(IS_16X16(*mb_type)){
1472 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1473 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1474 if(!IS_INTRA(mb_type_col)
1475 && ( (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1)
1476 || (l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1
1477 && (h->x264_build>33 || !h->x264_build)))){
1479 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1481 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1483 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1485 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1487 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1488 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1491 for(i8=0; i8<4; i8++){
1492 const int x8 = i8&1;
1493 const int y8 = i8>>1;
1495 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1497 h->sub_mb_type[i8] = sub_mb_type;
1499 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1500 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1501 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1502 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1505 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1506 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1507 && (h->x264_build>33 || !h->x264_build)))){
1508 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1509 if(IS_SUB_8X8(sub_mb_type)){
1510 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1511 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1513 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1515 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1518 for(i4=0; i4<4; i4++){
1519 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1520 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1522 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1524 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1530 }else{ /* direct temporal mv pred */
1531 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1532 const int *dist_scale_factor = h->dist_scale_factor;
1535 if(IS_INTERLACED(*mb_type)){
1536 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1537 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1538 dist_scale_factor = h->dist_scale_factor_field;
1540 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1541 /* FIXME assumes direct_8x8_inference == 1 */
1542 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1543 int mb_types_col[2];
1546 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1547 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1548 | (*mb_type & MB_TYPE_INTERLACED);
1549 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1551 if(IS_INTERLACED(*mb_type)){
1552 /* frame to field scaling */
1553 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1554 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1556 l1ref0 -= 2*h->b8_stride;
1557 l1ref1 -= 2*h->b8_stride;
1558 l1mv0 -= 4*h->b_stride;
1559 l1mv1 -= 4*h->b_stride;
1563 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1564 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1566 *mb_type |= MB_TYPE_16x8;
1568 *mb_type |= MB_TYPE_8x8;
1570 /* field to frame scaling */
1571 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1572 * but in MBAFF, top and bottom POC are equal */
1573 int dy = (s->mb_y&1) ? 1 : 2;
1575 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1576 l1ref0 += dy*h->b8_stride;
1577 l1ref1 += dy*h->b8_stride;
1578 l1mv0 += 2*dy*h->b_stride;
1579 l1mv1 += 2*dy*h->b_stride;
1582 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1584 *mb_type |= MB_TYPE_16x16;
1586 *mb_type |= MB_TYPE_8x8;
1589 for(i8=0; i8<4; i8++){
1590 const int x8 = i8&1;
1591 const int y8 = i8>>1;
1593 const int16_t (*l1mv)[2]= l1mv0;
1595 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1597 h->sub_mb_type[i8] = sub_mb_type;
1599 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1600 if(IS_INTRA(mb_types_col[y8])){
1601 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1602 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1603 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1607 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1609 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1611 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1614 scale = dist_scale_factor[ref0];
1615 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1618 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1619 int my_col = (mv_col[1]<<y_shift)/2;
1620 int mx = (scale * mv_col[0] + 128) >> 8;
1621 int my = (scale * my_col + 128) >> 8;
1622 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1623 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1630 /* one-to-one mv scaling */
1632 if(IS_16X16(*mb_type)){
1633 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1634 if(IS_INTRA(mb_type_col)){
1635 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1636 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1637 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1639 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1640 : map_col_to_list0[1][l1ref1[0]];
1641 const int scale = dist_scale_factor[ref0];
1642 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1644 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1645 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1646 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1647 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1648 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1651 for(i8=0; i8<4; i8++){
1652 const int x8 = i8&1;
1653 const int y8 = i8>>1;
1655 const int16_t (*l1mv)[2]= l1mv0;
1657 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1659 h->sub_mb_type[i8] = sub_mb_type;
1660 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1661 if(IS_INTRA(mb_type_col)){
1662 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1663 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1664 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1668 ref0 = l1ref0[x8 + y8*h->b8_stride];
1670 ref0 = map_col_to_list0[0][ref0];
1672 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1675 scale = dist_scale_factor[ref0];
1677 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1678 if(IS_SUB_8X8(sub_mb_type)){
1679 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1680 int mx = (scale * mv_col[0] + 128) >> 8;
1681 int my = (scale * mv_col[1] + 128) >> 8;
1682 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1683 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1685 for(i4=0; i4<4; i4++){
1686 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1687 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1688 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1689 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1690 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1691 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1698 static inline void write_back_motion(H264Context *h, int mb_type){
1699 MpegEncContext * const s = &h->s;
1700 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1701 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1704 if(!USES_LIST(mb_type, 0))
1705 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1707 for(list=0; list<2; list++){
1709 if(!USES_LIST(mb_type, list))
1713 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1714 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1716 if( h->pps.cabac ) {
1717 if(IS_SKIP(mb_type))
1718 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1721 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1722 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1727 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1728 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1729 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1730 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1731 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1735 if(h->slice_type == B_TYPE && h->pps.cabac){
1736 if(IS_8X8(mb_type)){
1737 uint8_t *direct_table = &h->direct_table[b8_xy];
1738 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1739 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1740 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1746 * Decodes a network abstraction layer unit.
1747 * @param consumed is the number of bytes used as input
1748 * @param length is the length of the array
1749 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1750 * @returns decoded bytes, might be src+1 if no escapes
1752 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1756 // src[0]&0x80; //forbidden bit
1757 h->nal_ref_idc= src[0]>>5;
1758 h->nal_unit_type= src[0]&0x1F;
1762 for(i=0; i<length; i++)
1763 printf("%2X ", src[i]);
1765 for(i=0; i+1<length; i+=2){
1766 if(src[i]) continue;
1767 if(i>0 && src[i-1]==0) i--;
1768 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1770 /* startcode, so we must be past the end */
1777 if(i>=length-1){ //no escaped 0
1778 *dst_length= length;
1779 *consumed= length+1; //+1 for the header
1783 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1784 dst= h->rbsp_buffer;
1786 //printf("decoding esc\n");
1789 //remove escapes (very rare 1:2^22)
1790 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1791 if(src[si+2]==3){ //escape
1796 }else //next start code
1800 dst[di++]= src[si++];
1804 *consumed= si + 1;//+1 for the header
1805 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1811 * @param src the data which should be escaped
1812 * @param dst the target buffer, dst+1 == src is allowed as a special case
1813 * @param length the length of the src data
1814 * @param dst_length the length of the dst array
1815 * @returns length of escaped data in bytes or -1 if an error occured
1817 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1818 int i, escape_count, si, di;
1822 assert(dst_length>0);
1824 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1826 if(length==0) return 1;
1829 for(i=0; i<length; i+=2){
1830 if(src[i]) continue;
1831 if(i>0 && src[i-1]==0)
1833 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1839 if(escape_count==0){
1841 memcpy(dst+1, src, length);
1845 if(length + escape_count + 1> dst_length)
1848 //this should be damn rare (hopefully)
1850 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1851 temp= h->rbsp_buffer;
1852 //printf("encoding esc\n");
1857 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1858 temp[di++]= 0; si++;
1859 temp[di++]= 0; si++;
1861 temp[di++]= src[si++];
1864 temp[di++]= src[si++];
1866 memcpy(dst+1, temp, length+escape_count);
1868 assert(di == length+escape_count);
1874 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1876 static void encode_rbsp_trailing(PutBitContext *pb){
1879 length= (-put_bits_count(pb))&7;
1880 if(length) put_bits(pb, length, 0);
1885 * identifies the exact end of the bitstream
1886 * @return the length of the trailing, or 0 if damaged
1888 static int decode_rbsp_trailing(uint8_t *src){
1892 tprintf("rbsp trailing %X\n", v);
1902 * idct tranforms the 16 dc values and dequantize them.
1903 * @param qp quantization parameter
1905 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1908 int temp[16]; //FIXME check if this is a good idea
1909 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1910 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1912 //memset(block, 64, 2*256);
1915 const int offset= y_offset[i];
1916 const int z0= block[offset+stride*0] + block[offset+stride*4];
1917 const int z1= block[offset+stride*0] - block[offset+stride*4];
1918 const int z2= block[offset+stride*1] - block[offset+stride*5];
1919 const int z3= block[offset+stride*1] + block[offset+stride*5];
1928 const int offset= x_offset[i];
1929 const int z0= temp[4*0+i] + temp[4*2+i];
1930 const int z1= temp[4*0+i] - temp[4*2+i];
1931 const int z2= temp[4*1+i] - temp[4*3+i];
1932 const int z3= temp[4*1+i] + temp[4*3+i];
1934 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1935 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1936 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1937 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1943 * dct tranforms the 16 dc values.
1944 * @param qp quantization parameter ??? FIXME
1946 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1947 // const int qmul= dequant_coeff[qp][0];
1949 int temp[16]; //FIXME check if this is a good idea
1950 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1951 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1954 const int offset= y_offset[i];
1955 const int z0= block[offset+stride*0] + block[offset+stride*4];
1956 const int z1= block[offset+stride*0] - block[offset+stride*4];
1957 const int z2= block[offset+stride*1] - block[offset+stride*5];
1958 const int z3= block[offset+stride*1] + block[offset+stride*5];
1967 const int offset= x_offset[i];
1968 const int z0= temp[4*0+i] + temp[4*2+i];
1969 const int z1= temp[4*0+i] - temp[4*2+i];
1970 const int z2= temp[4*1+i] - temp[4*3+i];
1971 const int z3= temp[4*1+i] + temp[4*3+i];
1973 block[stride*0 +offset]= (z0 + z3)>>1;
1974 block[stride*2 +offset]= (z1 + z2)>>1;
1975 block[stride*8 +offset]= (z1 - z2)>>1;
1976 block[stride*10+offset]= (z0 - z3)>>1;
1984 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1985 const int stride= 16*2;
1986 const int xStride= 16;
1989 a= block[stride*0 + xStride*0];
1990 b= block[stride*0 + xStride*1];
1991 c= block[stride*1 + xStride*0];
1992 d= block[stride*1 + xStride*1];
1999 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
2000 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
2001 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
2002 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
2006 static void chroma_dc_dct_c(DCTELEM *block){
2007 const int stride= 16*2;
2008 const int xStride= 16;
2011 a= block[stride*0 + xStride*0];
2012 b= block[stride*0 + xStride*1];
2013 c= block[stride*1 + xStride*0];
2014 d= block[stride*1 + xStride*1];
2021 block[stride*0 + xStride*0]= (a+c);
2022 block[stride*0 + xStride*1]= (e+b);
2023 block[stride*1 + xStride*0]= (a-c);
2024 block[stride*1 + xStride*1]= (e-b);
2029 * gets the chroma qp.
2031 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
2033 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
2038 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
2040 //FIXME try int temp instead of block
2043 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
2044 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
2045 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
2046 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
2047 const int z0= d0 + d3;
2048 const int z3= d0 - d3;
2049 const int z1= d1 + d2;
2050 const int z2= d1 - d2;
2052 block[0 + 4*i]= z0 + z1;
2053 block[1 + 4*i]= 2*z3 + z2;
2054 block[2 + 4*i]= z0 - z1;
2055 block[3 + 4*i]= z3 - 2*z2;
2059 const int z0= block[0*4 + i] + block[3*4 + i];
2060 const int z3= block[0*4 + i] - block[3*4 + i];
2061 const int z1= block[1*4 + i] + block[2*4 + i];
2062 const int z2= block[1*4 + i] - block[2*4 + i];
2064 block[0*4 + i]= z0 + z1;
2065 block[1*4 + i]= 2*z3 + z2;
2066 block[2*4 + i]= z0 - z1;
2067 block[3*4 + i]= z3 - 2*z2;
2072 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
2073 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
2074 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
2076 const int * const quant_table= quant_coeff[qscale];
2077 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
2078 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
2079 const unsigned int threshold2= (threshold1<<1);
2085 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
2086 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
2087 const unsigned int dc_threshold2= (dc_threshold1<<1);
2089 int level= block[0]*quant_coeff[qscale+18][0];
2090 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2092 level= (dc_bias + level)>>(QUANT_SHIFT-2);
2095 level= (dc_bias - level)>>(QUANT_SHIFT-2);
2098 // last_non_zero = i;
2103 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
2104 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
2105 const unsigned int dc_threshold2= (dc_threshold1<<1);
2107 int level= block[0]*quant_table[0];
2108 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2110 level= (dc_bias + level)>>(QUANT_SHIFT+1);
2113 level= (dc_bias - level)>>(QUANT_SHIFT+1);
2116 // last_non_zero = i;
2129 const int j= scantable[i];
2130 int level= block[j]*quant_table[j];
2132 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
2133 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
2134 if(((unsigned)(level+threshold1))>threshold2){
2136 level= (bias + level)>>QUANT_SHIFT;
2139 level= (bias - level)>>QUANT_SHIFT;
2148 return last_non_zero;
2151 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
2152 const uint32_t a= ((uint32_t*)(src-stride))[0];
2153 ((uint32_t*)(src+0*stride))[0]= a;
2154 ((uint32_t*)(src+1*stride))[0]= a;
2155 ((uint32_t*)(src+2*stride))[0]= a;
2156 ((uint32_t*)(src+3*stride))[0]= a;
2159 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
2160 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
2161 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
2162 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
2163 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
2166 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
2167 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
2168 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
2170 ((uint32_t*)(src+0*stride))[0]=
2171 ((uint32_t*)(src+1*stride))[0]=
2172 ((uint32_t*)(src+2*stride))[0]=
2173 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2176 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
2177 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
2179 ((uint32_t*)(src+0*stride))[0]=
2180 ((uint32_t*)(src+1*stride))[0]=
2181 ((uint32_t*)(src+2*stride))[0]=
2182 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2185 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
2186 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
2188 ((uint32_t*)(src+0*stride))[0]=
2189 ((uint32_t*)(src+1*stride))[0]=
2190 ((uint32_t*)(src+2*stride))[0]=
2191 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2194 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
2195 ((uint32_t*)(src+0*stride))[0]=
2196 ((uint32_t*)(src+1*stride))[0]=
2197 ((uint32_t*)(src+2*stride))[0]=
2198 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
2202 #define LOAD_TOP_RIGHT_EDGE\
2203 const int t4= topright[0];\
2204 const int t5= topright[1];\
2205 const int t6= topright[2];\
2206 const int t7= topright[3];\
2208 #define LOAD_LEFT_EDGE\
2209 const int l0= src[-1+0*stride];\
2210 const int l1= src[-1+1*stride];\
2211 const int l2= src[-1+2*stride];\
2212 const int l3= src[-1+3*stride];\
2214 #define LOAD_TOP_EDGE\
2215 const int t0= src[ 0-1*stride];\
2216 const int t1= src[ 1-1*stride];\
2217 const int t2= src[ 2-1*stride];\
2218 const int t3= src[ 3-1*stride];\
2220 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
2221 const int lt= src[-1-1*stride];
2225 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
2227 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
2230 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
2234 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2237 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
2239 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2240 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2243 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
2248 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
2250 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
2253 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
2257 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
2260 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
2262 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
2263 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
2266 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
2267 const int lt= src[-1-1*stride];
2270 const __attribute__((unused)) int unu= l3;
2273 src[1+2*stride]=(lt + t0 + 1)>>1;
2275 src[2+2*stride]=(t0 + t1 + 1)>>1;
2277 src[3+2*stride]=(t1 + t2 + 1)>>1;
2278 src[3+0*stride]=(t2 + t3 + 1)>>1;
2280 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2282 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2284 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2285 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2286 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2287 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2290 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2293 const __attribute__((unused)) int unu= t7;
2295 src[0+0*stride]=(t0 + t1 + 1)>>1;
2297 src[0+2*stride]=(t1 + t2 + 1)>>1;
2299 src[1+2*stride]=(t2 + t3 + 1)>>1;
2301 src[2+2*stride]=(t3 + t4+ 1)>>1;
2302 src[3+2*stride]=(t4 + t5+ 1)>>1;
2303 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2305 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2307 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2309 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2310 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2313 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2316 src[0+0*stride]=(l0 + l1 + 1)>>1;
2317 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2319 src[0+1*stride]=(l1 + l2 + 1)>>1;
2321 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2323 src[0+2*stride]=(l2 + l3 + 1)>>1;
2325 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2334 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2335 const int lt= src[-1-1*stride];
2338 const __attribute__((unused)) int unu= t3;
2341 src[2+1*stride]=(lt + l0 + 1)>>1;
2343 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2344 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2345 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2347 src[2+2*stride]=(l0 + l1 + 1)>>1;
2349 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2351 src[2+3*stride]=(l1 + l2+ 1)>>1;
2353 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2354 src[0+3*stride]=(l2 + l3 + 1)>>1;
2355 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2358 static void pred16x16_vertical_c(uint8_t *src, int stride){
2360 const uint32_t a= ((uint32_t*)(src-stride))[0];
2361 const uint32_t b= ((uint32_t*)(src-stride))[1];
2362 const uint32_t c= ((uint32_t*)(src-stride))[2];
2363 const uint32_t d= ((uint32_t*)(src-stride))[3];
2365 for(i=0; i<16; i++){
2366 ((uint32_t*)(src+i*stride))[0]= a;
2367 ((uint32_t*)(src+i*stride))[1]= b;
2368 ((uint32_t*)(src+i*stride))[2]= c;
2369 ((uint32_t*)(src+i*stride))[3]= d;
2373 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2376 for(i=0; i<16; i++){
2377 ((uint32_t*)(src+i*stride))[0]=
2378 ((uint32_t*)(src+i*stride))[1]=
2379 ((uint32_t*)(src+i*stride))[2]=
2380 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2384 static void pred16x16_dc_c(uint8_t *src, int stride){
2388 dc+= src[-1+i*stride];
2395 dc= 0x01010101*((dc + 16)>>5);
2397 for(i=0; i<16; i++){
2398 ((uint32_t*)(src+i*stride))[0]=
2399 ((uint32_t*)(src+i*stride))[1]=
2400 ((uint32_t*)(src+i*stride))[2]=
2401 ((uint32_t*)(src+i*stride))[3]= dc;
2405 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2409 dc+= src[-1+i*stride];
2412 dc= 0x01010101*((dc + 8)>>4);
2414 for(i=0; i<16; i++){
2415 ((uint32_t*)(src+i*stride))[0]=
2416 ((uint32_t*)(src+i*stride))[1]=
2417 ((uint32_t*)(src+i*stride))[2]=
2418 ((uint32_t*)(src+i*stride))[3]= dc;
2422 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2428 dc= 0x01010101*((dc + 8)>>4);
2430 for(i=0; i<16; i++){
2431 ((uint32_t*)(src+i*stride))[0]=
2432 ((uint32_t*)(src+i*stride))[1]=
2433 ((uint32_t*)(src+i*stride))[2]=
2434 ((uint32_t*)(src+i*stride))[3]= dc;
2438 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2441 for(i=0; i<16; i++){
2442 ((uint32_t*)(src+i*stride))[0]=
2443 ((uint32_t*)(src+i*stride))[1]=
2444 ((uint32_t*)(src+i*stride))[2]=
2445 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2449 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2452 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2453 const uint8_t * const src0 = src+7-stride;
2454 const uint8_t *src1 = src+8*stride-1;
2455 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2456 int H = src0[1] - src0[-1];
2457 int V = src1[0] - src2[ 0];
2458 for(k=2; k<=8; ++k) {
2459 src1 += stride; src2 -= stride;
2460 H += k*(src0[k] - src0[-k]);
2461 V += k*(src1[0] - src2[ 0]);
2464 H = ( 5*(H/4) ) / 16;
2465 V = ( 5*(V/4) ) / 16;
2467 /* required for 100% accuracy */
2468 i = H; H = V; V = i;
2470 H = ( 5*H+32 ) >> 6;
2471 V = ( 5*V+32 ) >> 6;
2474 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2475 for(j=16; j>0; --j) {
2478 for(i=-16; i<0; i+=4) {
2479 src[16+i] = cm[ (b ) >> 5 ];
2480 src[17+i] = cm[ (b+ H) >> 5 ];
2481 src[18+i] = cm[ (b+2*H) >> 5 ];
2482 src[19+i] = cm[ (b+3*H) >> 5 ];
2489 static void pred16x16_plane_c(uint8_t *src, int stride){
2490 pred16x16_plane_compat_c(src, stride, 0);
2493 static void pred8x8_vertical_c(uint8_t *src, int stride){
2495 const uint32_t a= ((uint32_t*)(src-stride))[0];
2496 const uint32_t b= ((uint32_t*)(src-stride))[1];
2499 ((uint32_t*)(src+i*stride))[0]= a;
2500 ((uint32_t*)(src+i*stride))[1]= b;
2504 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2508 ((uint32_t*)(src+i*stride))[0]=
2509 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2513 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2517 ((uint32_t*)(src+i*stride))[0]=
2518 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2522 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2528 dc0+= src[-1+i*stride];
2529 dc2+= src[-1+(i+4)*stride];
2531 dc0= 0x01010101*((dc0 + 2)>>2);
2532 dc2= 0x01010101*((dc2 + 2)>>2);
2535 ((uint32_t*)(src+i*stride))[0]=
2536 ((uint32_t*)(src+i*stride))[1]= dc0;
2539 ((uint32_t*)(src+i*stride))[0]=
2540 ((uint32_t*)(src+i*stride))[1]= dc2;
2544 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2550 dc0+= src[i-stride];
2551 dc1+= src[4+i-stride];
2553 dc0= 0x01010101*((dc0 + 2)>>2);
2554 dc1= 0x01010101*((dc1 + 2)>>2);
2557 ((uint32_t*)(src+i*stride))[0]= dc0;
2558 ((uint32_t*)(src+i*stride))[1]= dc1;
2561 ((uint32_t*)(src+i*stride))[0]= dc0;
2562 ((uint32_t*)(src+i*stride))[1]= dc1;
2567 static void pred8x8_dc_c(uint8_t *src, int stride){
2569 int dc0, dc1, dc2, dc3;
2573 dc0+= src[-1+i*stride] + src[i-stride];
2574 dc1+= src[4+i-stride];
2575 dc2+= src[-1+(i+4)*stride];
2577 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2578 dc0= 0x01010101*((dc0 + 4)>>3);
2579 dc1= 0x01010101*((dc1 + 2)>>2);
2580 dc2= 0x01010101*((dc2 + 2)>>2);
2583 ((uint32_t*)(src+i*stride))[0]= dc0;
2584 ((uint32_t*)(src+i*stride))[1]= dc1;
2587 ((uint32_t*)(src+i*stride))[0]= dc2;
2588 ((uint32_t*)(src+i*stride))[1]= dc3;
2592 static void pred8x8_plane_c(uint8_t *src, int stride){
2595 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2596 const uint8_t * const src0 = src+3-stride;
2597 const uint8_t *src1 = src+4*stride-1;
2598 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2599 int H = src0[1] - src0[-1];
2600 int V = src1[0] - src2[ 0];
2601 for(k=2; k<=4; ++k) {
2602 src1 += stride; src2 -= stride;
2603 H += k*(src0[k] - src0[-k]);
2604 V += k*(src1[0] - src2[ 0]);
2606 H = ( 17*H+16 ) >> 5;
2607 V = ( 17*V+16 ) >> 5;
2609 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2610 for(j=8; j>0; --j) {
2613 src[0] = cm[ (b ) >> 5 ];
2614 src[1] = cm[ (b+ H) >> 5 ];
2615 src[2] = cm[ (b+2*H) >> 5 ];
2616 src[3] = cm[ (b+3*H) >> 5 ];
2617 src[4] = cm[ (b+4*H) >> 5 ];
2618 src[5] = cm[ (b+5*H) >> 5 ];
2619 src[6] = cm[ (b+6*H) >> 5 ];
2620 src[7] = cm[ (b+7*H) >> 5 ];
2625 #define SRC(x,y) src[(x)+(y)*stride]
2627 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2628 #define PREDICT_8x8_LOAD_LEFT \
2629 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2630 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2631 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2632 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2635 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2636 #define PREDICT_8x8_LOAD_TOP \
2637 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2638 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2639 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2640 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2641 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2644 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2645 #define PREDICT_8x8_LOAD_TOPRIGHT \
2646 int t8, t9, t10, t11, t12, t13, t14, t15; \
2647 if(has_topright) { \
2648 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2649 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2650 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2652 #define PREDICT_8x8_LOAD_TOPLEFT \
2653 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2655 #define PREDICT_8x8_DC(v) \
2657 for( y = 0; y < 8; y++ ) { \
2658 ((uint32_t*)src)[0] = \
2659 ((uint32_t*)src)[1] = v; \
2663 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2665 PREDICT_8x8_DC(0x80808080);
2667 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2669 PREDICT_8x8_LOAD_LEFT;
2670 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2673 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2675 PREDICT_8x8_LOAD_TOP;
2676 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2679 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2681 PREDICT_8x8_LOAD_LEFT;
2682 PREDICT_8x8_LOAD_TOP;
2683 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2684 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2687 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2689 PREDICT_8x8_LOAD_LEFT;
2690 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2691 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2692 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2695 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2698 PREDICT_8x8_LOAD_TOP;
2707 for( y = 1; y < 8; y++ )
2708 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2710 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2712 PREDICT_8x8_LOAD_TOP;
2713 PREDICT_8x8_LOAD_TOPRIGHT;
2714 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2715 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2716 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2717 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2718 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2719 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2720 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2721 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2722 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2723 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2724 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2725 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2726 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2727 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2728 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2730 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2732 PREDICT_8x8_LOAD_TOP;
2733 PREDICT_8x8_LOAD_LEFT;
2734 PREDICT_8x8_LOAD_TOPLEFT;
2735 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2736 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2737 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2738 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2739 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2740 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2741 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2742 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2743 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2744 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2745 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2746 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2747 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2748 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2749 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2752 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2754 PREDICT_8x8_LOAD_TOP;
2755 PREDICT_8x8_LOAD_LEFT;
2756 PREDICT_8x8_LOAD_TOPLEFT;
2757 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2758 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2759 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2760 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2761 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2762 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2763 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2764 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2765 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2766 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2767 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2768 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2769 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2770 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2771 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2772 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2773 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2774 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2775 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2776 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2777 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2778 SRC(7,0)= (t6 + t7 + 1) >> 1;
2780 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2782 PREDICT_8x8_LOAD_TOP;
2783 PREDICT_8x8_LOAD_LEFT;
2784 PREDICT_8x8_LOAD_TOPLEFT;
2785 SRC(0,7)= (l6 + l7 + 1) >> 1;
2786 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2787 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2788 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2789 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2790 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2791 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2792 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2793 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2794 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2795 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2796 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2797 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2798 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2799 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2800 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2801 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2802 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2803 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2804 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2805 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2806 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2808 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2810 PREDICT_8x8_LOAD_TOP;
2811 PREDICT_8x8_LOAD_TOPRIGHT;
2812 SRC(0,0)= (t0 + t1 + 1) >> 1;
2813 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2814 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2815 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2816 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2817 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2818 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2819 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2820 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2821 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2822 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2823 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2824 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2825 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2826 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2827 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2828 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2829 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2830 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2831 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2832 SRC(7,6)= (t10 + t11 + 1) >> 1;
2833 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2835 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2837 PREDICT_8x8_LOAD_LEFT;
2838 SRC(0,0)= (l0 + l1 + 1) >> 1;
2839 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2840 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2841 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2842 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2843 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2844 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2845 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2846 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2847 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2848 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2849 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2850 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2851 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2852 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2853 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2854 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2855 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2857 #undef PREDICT_8x8_LOAD_LEFT
2858 #undef PREDICT_8x8_LOAD_TOP
2859 #undef PREDICT_8x8_LOAD_TOPLEFT
2860 #undef PREDICT_8x8_LOAD_TOPRIGHT
2861 #undef PREDICT_8x8_DC
2867 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2868 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2869 int src_x_offset, int src_y_offset,
2870 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2871 MpegEncContext * const s = &h->s;
2872 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2873 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2874 const int luma_xy= (mx&3) + ((my&3)<<2);
2875 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2876 uint8_t * src_cb, * src_cr;
2877 int extra_width= h->emu_edge_width;
2878 int extra_height= h->emu_edge_height;
2880 const int full_mx= mx>>2;
2881 const int full_my= my>>2;
2882 const int pic_width = 16*s->mb_width;
2883 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2888 if(mx&7) extra_width -= 3;
2889 if(my&7) extra_height -= 3;
2891 if( full_mx < 0-extra_width
2892 || full_my < 0-extra_height
2893 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2894 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2895 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2896 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2900 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2902 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2905 if(s->flags&CODEC_FLAG_GRAY) return;
2908 // chroma offset when predicting from a field of opposite parity
2909 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2910 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2912 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2913 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2916 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2917 src_cb= s->edge_emu_buffer;
2919 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2922 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2923 src_cr= s->edge_emu_buffer;
2925 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2928 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2929 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2930 int x_offset, int y_offset,
2931 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2932 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2933 int list0, int list1){
2934 MpegEncContext * const s = &h->s;
2935 qpel_mc_func *qpix_op= qpix_put;
2936 h264_chroma_mc_func chroma_op= chroma_put;
2938 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2939 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2940 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2941 x_offset += 8*s->mb_x;
2942 y_offset += 8*(s->mb_y >> MB_MBAFF);
2945 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2946 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2947 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2948 qpix_op, chroma_op);
2951 chroma_op= chroma_avg;
2955 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2956 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2957 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2958 qpix_op, chroma_op);
2962 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2963 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2964 int x_offset, int y_offset,
2965 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2966 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2967 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2968 int list0, int list1){
2969 MpegEncContext * const s = &h->s;
2971 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2972 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2973 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2974 x_offset += 8*s->mb_x;
2975 y_offset += 8*(s->mb_y >> MB_MBAFF);
2978 /* don't optimize for luma-only case, since B-frames usually
2979 * use implicit weights => chroma too. */
2980 uint8_t *tmp_cb = s->obmc_scratchpad;
2981 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2982 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2983 int refn0 = h->ref_cache[0][ scan8[n] ];
2984 int refn1 = h->ref_cache[1][ scan8[n] ];
2986 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2987 dest_y, dest_cb, dest_cr,
2988 x_offset, y_offset, qpix_put, chroma_put);
2989 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2990 tmp_y, tmp_cb, tmp_cr,
2991 x_offset, y_offset, qpix_put, chroma_put);
2993 if(h->use_weight == 2){
2994 int weight0 = h->implicit_weight[refn0][refn1];
2995 int weight1 = 64 - weight0;
2996 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2997 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
2998 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
3000 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
3001 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
3002 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
3003 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3004 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
3005 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
3006 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3007 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
3008 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
3011 int list = list1 ? 1 : 0;
3012 int refn = h->ref_cache[list][ scan8[n] ];
3013 Picture *ref= &h->ref_list[list][refn];
3014 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
3015 dest_y, dest_cb, dest_cr, x_offset, y_offset,
3016 qpix_put, chroma_put);
3018 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
3019 h->luma_weight[list][refn], h->luma_offset[list][refn]);
3020 if(h->use_weight_chroma){
3021 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3022 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
3023 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3024 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
3029 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
3030 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3031 int x_offset, int y_offset,
3032 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
3033 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
3034 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
3035 int list0, int list1){
3036 if((h->use_weight==2 && list0 && list1
3037 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
3038 || h->use_weight==1)
3039 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3040 x_offset, y_offset, qpix_put, chroma_put,
3041 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
3043 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3044 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
3047 static inline void prefetch_motion(H264Context *h, int list){
3048 /* fetch pixels for estimated mv 4 macroblocks ahead
3049 * optimized for 64byte cache lines */
3050 MpegEncContext * const s = &h->s;
3051 const int refn = h->ref_cache[list][scan8[0]];
3053 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
3054 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
3055 uint8_t **src= h->ref_list[list][refn].data;
3056 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
3057 s->dsp.prefetch(src[0]+off, s->linesize, 4);
3058 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3059 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
3063 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3064 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
3065 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
3066 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
3067 MpegEncContext * const s = &h->s;
3068 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
3069 const int mb_type= s->current_picture.mb_type[mb_xy];
3071 assert(IS_INTER(mb_type));
3073 prefetch_motion(h, 0);
3075 if(IS_16X16(mb_type)){
3076 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
3077 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
3078 &weight_op[0], &weight_avg[0],
3079 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3080 }else if(IS_16X8(mb_type)){
3081 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
3082 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3083 &weight_op[1], &weight_avg[1],
3084 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3085 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
3086 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3087 &weight_op[1], &weight_avg[1],
3088 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3089 }else if(IS_8X16(mb_type)){
3090 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
3091 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3092 &weight_op[2], &weight_avg[2],
3093 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3094 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
3095 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3096 &weight_op[2], &weight_avg[2],
3097 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3101 assert(IS_8X8(mb_type));
3104 const int sub_mb_type= h->sub_mb_type[i];
3106 int x_offset= (i&1)<<2;
3107 int y_offset= (i&2)<<1;
3109 if(IS_SUB_8X8(sub_mb_type)){
3110 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3111 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3112 &weight_op[3], &weight_avg[3],
3113 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3114 }else if(IS_SUB_8X4(sub_mb_type)){
3115 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3116 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3117 &weight_op[4], &weight_avg[4],
3118 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3119 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
3120 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3121 &weight_op[4], &weight_avg[4],
3122 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3123 }else if(IS_SUB_4X8(sub_mb_type)){
3124 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3125 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3126 &weight_op[5], &weight_avg[5],
3127 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3128 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
3129 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3130 &weight_op[5], &weight_avg[5],
3131 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3134 assert(IS_SUB_4X4(sub_mb_type));
3136 int sub_x_offset= x_offset + 2*(j&1);
3137 int sub_y_offset= y_offset + (j&2);
3138 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
3139 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3140 &weight_op[6], &weight_avg[6],
3141 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3147 prefetch_motion(h, 1);
3150 static void decode_init_vlc(H264Context *h){
3151 static int done = 0;
3157 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
3158 &chroma_dc_coeff_token_len [0], 1, 1,
3159 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
3162 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
3163 &coeff_token_len [i][0], 1, 1,
3164 &coeff_token_bits[i][0], 1, 1, 1);
3168 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
3169 &chroma_dc_total_zeros_len [i][0], 1, 1,
3170 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
3172 for(i=0; i<15; i++){
3173 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
3174 &total_zeros_len [i][0], 1, 1,
3175 &total_zeros_bits[i][0], 1, 1, 1);
3179 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
3180 &run_len [i][0], 1, 1,
3181 &run_bits[i][0], 1, 1, 1);
3183 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
3184 &run_len [6][0], 1, 1,
3185 &run_bits[6][0], 1, 1, 1);
3190 * Sets the intra prediction function pointers.
3192 static void init_pred_ptrs(H264Context *h){
3193 // MpegEncContext * const s = &h->s;
3195 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
3196 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
3197 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
3198 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
3199 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
3200 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
3201 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
3202 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
3203 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
3204 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
3205 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
3206 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
3208 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
3209 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
3210 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
3211 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
3212 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
3213 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
3214 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
3215 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
3216 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
3217 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
3218 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
3219 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
3221 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
3222 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
3223 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
3224 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
3225 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
3226 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
3227 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
3229 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
3230 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
3231 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
3232 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
3233 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
3234 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
3235 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
3238 static void free_tables(H264Context *h){
3239 av_freep(&h->intra4x4_pred_mode);
3240 av_freep(&h->chroma_pred_mode_table);
3241 av_freep(&h->cbp_table);
3242 av_freep(&h->mvd_table[0]);
3243 av_freep(&h->mvd_table[1]);
3244 av_freep(&h->direct_table);
3245 av_freep(&h->non_zero_count);
3246 av_freep(&h->slice_table_base);
3247 av_freep(&h->top_borders[1]);
3248 av_freep(&h->top_borders[0]);
3249 h->slice_table= NULL;
3251 av_freep(&h->mb2b_xy);
3252 av_freep(&h->mb2b8_xy);
3254 av_freep(&h->s.obmc_scratchpad);
3257 static void init_dequant8_coeff_table(H264Context *h){
3259 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
3260 h->dequant8_coeff[0] = h->dequant8_buffer[0];
3261 h->dequant8_coeff[1] = h->dequant8_buffer[1];
3263 for(i=0; i<2; i++ ){
3264 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
3265 h->dequant8_coeff[1] = h->dequant8_buffer[0];
3269 for(q=0; q<52; q++){
3270 int shift = div6[q];
3273 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
3274 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
3275 h->pps.scaling_matrix8[i][x]) << shift;
3280 static void init_dequant4_coeff_table(H264Context *h){
3282 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
3283 for(i=0; i<6; i++ ){
3284 h->dequant4_coeff[i] = h->dequant4_buffer[i];
3286 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
3287 h->dequant4_coeff[i] = h->dequant4_buffer[j];
3294 for(q=0; q<52; q++){
3295 int shift = div6[q] + 2;
3298 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
3299 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
3300 h->pps.scaling_matrix4[i][x]) << shift;
3305 static void init_dequant_tables(H264Context *h){
3307 init_dequant4_coeff_table(h);
3308 if(h->pps.transform_8x8_mode)
3309 init_dequant8_coeff_table(h);
3310 if(h->sps.transform_bypass){
3313 h->dequant4_coeff[i][0][x] = 1<<6;
3314 if(h->pps.transform_8x8_mode)
3317 h->dequant8_coeff[i][0][x] = 1<<6;
3324 * needs width/height
3326 static int alloc_tables(H264Context *h){
3327 MpegEncContext * const s = &h->s;
3328 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3331 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3333 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3334 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
3335 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3336 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3337 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3339 if( h->pps.cabac ) {
3340 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3341 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3342 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3343 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3346 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
3347 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
3349 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3350 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3351 for(y=0; y<s->mb_height; y++){
3352 for(x=0; x<s->mb_width; x++){
3353 const int mb_xy= x + y*s->mb_stride;
3354 const int b_xy = 4*x + 4*y*h->b_stride;
3355 const int b8_xy= 2*x + 2*y*h->b8_stride;
3357 h->mb2b_xy [mb_xy]= b_xy;
3358 h->mb2b8_xy[mb_xy]= b8_xy;
3362 s->obmc_scratchpad = NULL;
3364 if(!h->dequant4_coeff[0])
3365 init_dequant_tables(h);
3373 static void common_init(H264Context *h){
3374 MpegEncContext * const s = &h->s;
3376 s->width = s->avctx->width;
3377 s->height = s->avctx->height;
3378 s->codec_id= s->avctx->codec->id;
3382 h->dequant_coeff_pps= -1;
3383 s->unrestricted_mv=1;
3384 s->decode=1; //FIXME
3386 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3387 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3390 static int decode_init(AVCodecContext *avctx){
3391 H264Context *h= avctx->priv_data;
3392 MpegEncContext * const s = &h->s;
3394 MPV_decode_defaults(s);
3399 s->out_format = FMT_H264;
3400 s->workaround_bugs= avctx->workaround_bugs;
3403 // s->decode_mb= ff_h263_decode_mb;
3405 avctx->pix_fmt= PIX_FMT_YUV420P;
3409 if(avctx->extradata_size > 0 && avctx->extradata &&
3410 *(char *)avctx->extradata == 1){
3420 static int frame_start(H264Context *h){
3421 MpegEncContext * const s = &h->s;
3424 if(MPV_frame_start(s, s->avctx) < 0)
3426 ff_er_frame_start(s);
3428 assert(s->linesize && s->uvlinesize);
3430 for(i=0; i<16; i++){
3431 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3432 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3435 h->block_offset[16+i]=
3436 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3437 h->block_offset[24+16+i]=
3438 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3441 /* can't be in alloc_tables because linesize isn't known there.
3442 * FIXME: redo bipred weight to not require extra buffer? */
3443 if(!s->obmc_scratchpad)
3444 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
3446 /* some macroblocks will be accessed before they're available */
3448 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
3450 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3454 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3455 MpegEncContext * const s = &h->s;
3459 src_cb -= uvlinesize;
3460 src_cr -= uvlinesize;
3462 // There are two lines saved, the line above the the top macroblock of a pair,
3463 // and the line above the bottom macroblock
3464 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3465 for(i=1; i<17; i++){
3466 h->left_border[i]= src_y[15+i* linesize];
3469 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3470 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3472 if(!(s->flags&CODEC_FLAG_GRAY)){
3473 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3474 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3476 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3477 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3479 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3480 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3484 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3485 MpegEncContext * const s = &h->s;
3488 int deblock_left = (s->mb_x > 0);
3489 int deblock_top = (s->mb_y > 0);
3491 src_y -= linesize + 1;
3492 src_cb -= uvlinesize + 1;
3493 src_cr -= uvlinesize + 1;
3495 #define XCHG(a,b,t,xchg)\
3502 for(i = !deblock_top; i<17; i++){
3503 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3508 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3509 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3510 if(s->mb_x+1 < s->mb_width){
3511 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3515 if(!(s->flags&CODEC_FLAG_GRAY)){
3517 for(i = !deblock_top; i<9; i++){
3518 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3519 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3523 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3524 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3529 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3530 MpegEncContext * const s = &h->s;
3533 src_y -= 2 * linesize;
3534 src_cb -= 2 * uvlinesize;
3535 src_cr -= 2 * uvlinesize;
3537 // There are two lines saved, the line above the the top macroblock of a pair,
3538 // and the line above the bottom macroblock
3539 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3540 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3541 for(i=2; i<34; i++){
3542 h->left_border[i]= src_y[15+i* linesize];
3545 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3546 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3547 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3548 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3550 if(!(s->flags&CODEC_FLAG_GRAY)){
3551 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3552 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3553 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3554 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3555 for(i=2; i<18; i++){
3556 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3557 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3559 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3560 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3561 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3562 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3566 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3567 MpegEncContext * const s = &h->s;
3570 int deblock_left = (s->mb_x > 0);
3571 int deblock_top = (s->mb_y > 1);
3573 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3575 src_y -= 2 * linesize + 1;
3576 src_cb -= 2 * uvlinesize + 1;
3577 src_cr -= 2 * uvlinesize + 1;
3579 #define XCHG(a,b,t,xchg)\
3586 for(i = (!deblock_top)<<1; i<34; i++){
3587 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3592 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3593 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3594 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3595 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3596 if(s->mb_x+1 < s->mb_width){
3597 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3598 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3602 if(!(s->flags&CODEC_FLAG_GRAY)){
3604 for(i = (!deblock_top) << 1; i<18; i++){
3605 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3606 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3610 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3611 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3612 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3613 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3618 static void hl_decode_mb(H264Context *h){
3619 MpegEncContext * const s = &h->s;
3620 const int mb_x= s->mb_x;
3621 const int mb_y= s->mb_y;
3622 const int mb_xy= mb_x + mb_y*s->mb_stride;
3623 const int mb_type= s->current_picture.mb_type[mb_xy];
3624 uint8_t *dest_y, *dest_cb, *dest_cr;
3625 int linesize, uvlinesize /*dct_offset*/;
3627 int *block_offset = &h->block_offset[0];
3628 const unsigned int bottom = mb_y & 1;
3629 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3630 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3631 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3636 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3637 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3638 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3641 linesize = h->mb_linesize = s->linesize * 2;
3642 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3643 block_offset = &h->block_offset[24];
3644 if(mb_y&1){ //FIXME move out of this func?
3645 dest_y -= s->linesize*15;
3646 dest_cb-= s->uvlinesize*7;
3647 dest_cr-= s->uvlinesize*7;
3651 for(list=0; list<2; list++){
3652 if(!USES_LIST(mb_type, list))
3654 if(IS_16X16(mb_type)){
3655 int8_t *ref = &h->ref_cache[list][scan8[0]];
3656 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3658 for(i=0; i<16; i+=4){
3659 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3660 int ref = h->ref_cache[list][scan8[i]];
3662 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3668 linesize = h->mb_linesize = s->linesize;
3669 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3670 // dct_offset = s->linesize * 16;
3673 if(transform_bypass){
3675 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3676 }else if(IS_8x8DCT(mb_type)){
3677 idct_dc_add = s->dsp.h264_idct8_dc_add;
3678 idct_add = s->dsp.h264_idct8_add;
3680 idct_dc_add = s->dsp.h264_idct_dc_add;
3681 idct_add = s->dsp.h264_idct_add;
3684 if(FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3685 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3686 int mbt_y = mb_y&~1;
3687 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3688 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3689 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3690 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3693 if (IS_INTRA_PCM(mb_type)) {
3696 // The pixels are stored in h->mb array in the same order as levels,
3697 // copy them in output in the correct order.
3698 for(i=0; i<16; i++) {
3699 for (y=0; y<4; y++) {
3700 for (x=0; x<4; x++) {
3701 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3705 for(i=16; i<16+4; i++) {
3706 for (y=0; y<4; y++) {
3707 for (x=0; x<4; x++) {
3708 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3712 for(i=20; i<20+4; i++) {
3713 for (y=0; y<4; y++) {
3714 for (x=0; x<4; x++) {
3715 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3720 if(IS_INTRA(mb_type)){
3721 if(h->deblocking_filter && !FRAME_MBAFF)
3722 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3724 if(!(s->flags&CODEC_FLAG_GRAY)){
3725 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3726 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3729 if(IS_INTRA4x4(mb_type)){
3731 if(IS_8x8DCT(mb_type)){
3732 for(i=0; i<16; i+=4){
3733 uint8_t * const ptr= dest_y + block_offset[i];
3734 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3735 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3736 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3737 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3739 if(nnz == 1 && h->mb[i*16])
3740 idct_dc_add(ptr, h->mb + i*16, linesize);
3742 idct_add(ptr, h->mb + i*16, linesize);
3746 for(i=0; i<16; i++){
3747 uint8_t * const ptr= dest_y + block_offset[i];
3749 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3752 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3753 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3754 assert(mb_y || linesize <= block_offset[i]);
3755 if(!topright_avail){
3756 tr= ptr[3 - linesize]*0x01010101;
3757 topright= (uint8_t*) &tr;
3759 topright= ptr + 4 - linesize;
3763 h->pred4x4[ dir ](ptr, topright, linesize);
3764 nnz = h->non_zero_count_cache[ scan8[i] ];
3766 if(s->codec_id == CODEC_ID_H264){
3767 if(nnz == 1 && h->mb[i*16])
3768 idct_dc_add(ptr, h->mb + i*16, linesize);
3770 idct_add(ptr, h->mb + i*16, linesize);
3772 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3777 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3778 if(s->codec_id == CODEC_ID_H264){
3779 if(!transform_bypass)
3780 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3782 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3784 if(h->deblocking_filter && !FRAME_MBAFF)
3785 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3786 }else if(s->codec_id == CODEC_ID_H264){
3787 hl_motion(h, dest_y, dest_cb, dest_cr,
3788 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
3789 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
3790 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3794 if(!IS_INTRA4x4(mb_type)){
3795 if(s->codec_id == CODEC_ID_H264){
3796 if(IS_INTRA16x16(mb_type)){
3797 for(i=0; i<16; i++){
3798 if(h->non_zero_count_cache[ scan8[i] ])
3799 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3800 else if(h->mb[i*16])
3801 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3804 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3805 for(i=0; i<16; i+=di){
3806 int nnz = h->non_zero_count_cache[ scan8[i] ];
3808 if(nnz==1 && h->mb[i*16])
3809 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3811 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3816 for(i=0; i<16; i++){
3817 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3818 uint8_t * const ptr= dest_y + block_offset[i];
3819 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3825 if(!(s->flags&CODEC_FLAG_GRAY)){
3826 uint8_t *dest[2] = {dest_cb, dest_cr};
3827 if(transform_bypass){
3828 idct_add = idct_dc_add = s->dsp.add_pixels4;
3830 idct_add = s->dsp.h264_idct_add;
3831 idct_dc_add = s->dsp.h264_idct_dc_add;
3832 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3833 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3835 if(s->codec_id == CODEC_ID_H264){
3836 for(i=16; i<16+8; i++){
3837 if(h->non_zero_count_cache[ scan8[i] ])
3838 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3839 else if(h->mb[i*16])
3840 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3843 for(i=16; i<16+8; i++){
3844 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3845 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3846 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3852 if(h->deblocking_filter) {
3854 //FIXME try deblocking one mb at a time?
3855 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3856 const int mb_y = s->mb_y - 1;
3857 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3858 const int mb_xy= mb_x + mb_y*s->mb_stride;
3859 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3860 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3861 if (!bottom) return;
3862 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3863 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3864 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3866 if(IS_INTRA(mb_type_top | mb_type_bottom))
3867 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3869 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3873 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3874 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3875 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
3876 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3879 tprintf("call mbaff filter_mb\n");
3880 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3881 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3882 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3884 tprintf("call filter_mb\n");
3885 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3886 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3887 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3893 * fills the default_ref_list.
3895 static int fill_default_ref_list(H264Context *h){
3896 MpegEncContext * const s = &h->s;
3898 int smallest_poc_greater_than_current = -1;
3899 Picture sorted_short_ref[32];
3901 if(h->slice_type==B_TYPE){
3905 /* sort frame according to poc in B slice */
3906 for(out_i=0; out_i<h->short_ref_count; out_i++){
3908 int best_poc=INT_MAX;
3910 for(i=0; i<h->short_ref_count; i++){
3911 const int poc= h->short_ref[i]->poc;
3912 if(poc > limit && poc < best_poc){
3918 assert(best_i != INT_MIN);
3921 sorted_short_ref[out_i]= *h->short_ref[best_i];
3922 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3923 if (-1 == smallest_poc_greater_than_current) {
3924 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3925 smallest_poc_greater_than_current = out_i;
3931 if(s->picture_structure == PICT_FRAME){
3932 if(h->slice_type==B_TYPE){
3934 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3936 // find the largest poc
3937 for(list=0; list<2; list++){
3940 int step= list ? -1 : 1;
3942 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3943 while(j<0 || j>= h->short_ref_count){
3944 if(j != -99 && step == (list ? -1 : 1))
3947 j= smallest_poc_greater_than_current + (step>>1);
3949 if(sorted_short_ref[j].reference != 3) continue;
3950 h->default_ref_list[list][index ]= sorted_short_ref[j];
3951 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3954 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3955 if(h->long_ref[i] == NULL) continue;
3956 if(h->long_ref[i]->reference != 3) continue;
3958 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3959 h->default_ref_list[ list ][index++].pic_id= i;;
3962 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3963 // swap the two first elements of L1 when
3964 // L0 and L1 are identical
3965 Picture temp= h->default_ref_list[1][0];
3966 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3967 h->default_ref_list[1][1] = temp;
3970 if(index < h->ref_count[ list ])
3971 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3975 for(i=0; i<h->short_ref_count; i++){
3976 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3977 h->default_ref_list[0][index ]= *h->short_ref[i];
3978 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3980 for(i = 0; i < 16; i++){
3981 if(h->long_ref[i] == NULL) continue;
3982 if(h->long_ref[i]->reference != 3) continue;
3983 h->default_ref_list[0][index ]= *h->long_ref[i];
3984 h->default_ref_list[0][index++].pic_id= i;;
3986 if(index < h->ref_count[0])
3987 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3990 if(h->slice_type==B_TYPE){
3992 //FIXME second field balh
3996 for (i=0; i<h->ref_count[0]; i++) {
3997 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3999 if(h->slice_type==B_TYPE){
4000 for (i=0; i<h->ref_count[1]; i++) {
4001 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
4008 static void print_short_term(H264Context *h);
4009 static void print_long_term(H264Context *h);
4011 static int decode_ref_pic_list_reordering(H264Context *h){
4012 MpegEncContext * const s = &h->s;
4015 print_short_term(h);
4017 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
4019 for(list=0; list<2; list++){
4020 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
4022 if(get_bits1(&s->gb)){
4023 int pred= h->curr_pic_num;
4025 for(index=0; ; index++){
4026 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
4029 Picture *ref = NULL;
4031 if(reordering_of_pic_nums_idc==3)
4034 if(index >= h->ref_count[list]){
4035 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
4039 if(reordering_of_pic_nums_idc<3){
4040 if(reordering_of_pic_nums_idc<2){
4041 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
4043 if(abs_diff_pic_num >= h->max_pic_num){
4044 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
4048 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
4049 else pred+= abs_diff_pic_num;
4050 pred &= h->max_pic_num - 1;
4052 for(i= h->short_ref_count-1; i>=0; i--){
4053 ref = h->short_ref[i];
4054 assert(ref->reference == 3);
4055 assert(!ref->long_ref);
4056 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
4060 ref->pic_id= ref->frame_num;
4062 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
4063 ref = h->long_ref[pic_id];
4064 ref->pic_id= pic_id;
4065 assert(ref->reference == 3);
4066 assert(ref->long_ref);
4071 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
4072 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
4074 for(i=index; i+1<h->ref_count[list]; i++){
4075 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
4078 for(; i > index; i--){
4079 h->ref_list[list][i]= h->ref_list[list][i-1];
4081 h->ref_list[list][index]= *ref;
4084 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
4090 if(h->slice_type!=B_TYPE) break;
4092 for(list=0; list<2; list++){
4093 for(index= 0; index < h->ref_count[list]; index++){
4094 if(!h->ref_list[list][index].data[0])
4095 h->ref_list[list][index]= s->current_picture;
4097 if(h->slice_type!=B_TYPE) break;
4100 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
4101 direct_dist_scale_factor(h);
4102 direct_ref_list_init(h);
4106 static void fill_mbaff_ref_list(H264Context *h){
4108 for(list=0; list<2; list++){
4109 for(i=0; i<h->ref_count[list]; i++){
4110 Picture *frame = &h->ref_list[list][i];
4111 Picture *field = &h->ref_list[list][16+2*i];
4114 field[0].linesize[j] <<= 1;
4115 field[1] = field[0];
4117 field[1].data[j] += frame->linesize[j];
4119 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
4120 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
4122 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
4123 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
4127 for(j=0; j<h->ref_count[1]; j++){
4128 for(i=0; i<h->ref_count[0]; i++)
4129 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
4130 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
4131 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
4135 static int pred_weight_table(H264Context *h){
4136 MpegEncContext * const s = &h->s;
4138 int luma_def, chroma_def;
4141 h->use_weight_chroma= 0;
4142 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
4143 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
4144 luma_def = 1<<h->luma_log2_weight_denom;
4145 chroma_def = 1<<h->chroma_log2_weight_denom;
4147 for(list=0; list<2; list++){
4148 for(i=0; i<h->ref_count[list]; i++){
4149 int luma_weight_flag, chroma_weight_flag;
4151 luma_weight_flag= get_bits1(&s->gb);
4152 if(luma_weight_flag){
4153 h->luma_weight[list][i]= get_se_golomb(&s->gb);
4154 h->luma_offset[list][i]= get_se_golomb(&s->gb);
4155 if( h->luma_weight[list][i] != luma_def
4156 || h->luma_offset[list][i] != 0)
4159 h->luma_weight[list][i]= luma_def;
4160 h->luma_offset[list][i]= 0;
4163 chroma_weight_flag= get_bits1(&s->gb);
4164 if(chroma_weight_flag){
4167 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
4168 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
4169 if( h->chroma_weight[list][i][j] != chroma_def
4170 || h->chroma_offset[list][i][j] != 0)
4171 h->use_weight_chroma= 1;
4176 h->chroma_weight[list][i][j]= chroma_def;
4177 h->chroma_offset[list][i][j]= 0;
4181 if(h->slice_type != B_TYPE) break;
4183 h->use_weight= h->use_weight || h->use_weight_chroma;
4187 static void implicit_weight_table(H264Context *h){
4188 MpegEncContext * const s = &h->s;
4190 int cur_poc = s->current_picture_ptr->poc;
4192 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
4193 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
4195 h->use_weight_chroma= 0;
4200 h->use_weight_chroma= 2;
4201 h->luma_log2_weight_denom= 5;
4202 h->chroma_log2_weight_denom= 5;
4204 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
4205 int poc0 = h->ref_list[0][ref0].poc;
4206 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
4207 int poc1 = h->ref_list[1][ref1].poc;
4208 int td = clip(poc1 - poc0, -128, 127);
4210 int tb = clip(cur_poc - poc0, -128, 127);
4211 int tx = (16384 + (ABS(td) >> 1)) / td;
4212 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
4213 if(dist_scale_factor < -64 || dist_scale_factor > 128)
4214 h->implicit_weight[ref0][ref1] = 32;
4216 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
4218 h->implicit_weight[ref0][ref1] = 32;
4223 static inline void unreference_pic(H264Context *h, Picture *pic){
4226 if(pic == h->delayed_output_pic)
4229 for(i = 0; h->delayed_pic[i]; i++)
4230 if(pic == h->delayed_pic[i]){
4238 * instantaneous decoder refresh.
4240 static void idr(H264Context *h){
4243 for(i=0; i<16; i++){
4244 if (h->long_ref[i] != NULL) {
4245 unreference_pic(h, h->long_ref[i]);
4246 h->long_ref[i]= NULL;
4249 h->long_ref_count=0;
4251 for(i=0; i<h->short_ref_count; i++){
4252 unreference_pic(h, h->short_ref[i]);
4253 h->short_ref[i]= NULL;
4255 h->short_ref_count=0;
4258 /* forget old pics after a seek */
4259 static void flush_dpb(AVCodecContext *avctx){
4260 H264Context *h= avctx->priv_data;
4262 for(i=0; i<16; i++) {
4263 if(h->delayed_pic[i])
4264 h->delayed_pic[i]->reference= 0;
4265 h->delayed_pic[i]= NULL;
4267 if(h->delayed_output_pic)
4268 h->delayed_output_pic->reference= 0;
4269 h->delayed_output_pic= NULL;
4271 if(h->s.current_picture_ptr)
4272 h->s.current_picture_ptr->reference= 0;
4277 * @return the removed picture or NULL if an error occurs
4279 static Picture * remove_short(H264Context *h, int frame_num){
4280 MpegEncContext * const s = &h->s;
4283 if(s->avctx->debug&FF_DEBUG_MMCO)
4284 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
4286 for(i=0; i<h->short_ref_count; i++){
4287 Picture *pic= h->short_ref[i];
4288 if(s->avctx->debug&FF_DEBUG_MMCO)
4289 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
4290 if(pic->frame_num == frame_num){
4291 h->short_ref[i]= NULL;
4292 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
4293 h->short_ref_count--;
4302 * @return the removed picture or NULL if an error occurs
4304 static Picture * remove_long(H264Context *h, int i){
4307 pic= h->long_ref[i];
4308 h->long_ref[i]= NULL;
4309 if(pic) h->long_ref_count--;
4315 * print short term list
4317 static void print_short_term(H264Context *h) {
4319 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4320 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
4321 for(i=0; i<h->short_ref_count; i++){
4322 Picture *pic= h->short_ref[i];
4323 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4329 * print long term list
4331 static void print_long_term(H264Context *h) {
4333 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4334 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
4335 for(i = 0; i < 16; i++){
4336 Picture *pic= h->long_ref[i];
4338 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4345 * Executes the reference picture marking (memory management control operations).
4347 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
4348 MpegEncContext * const s = &h->s;
4350 int current_is_long=0;
4353 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
4354 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4356 for(i=0; i<mmco_count; i++){
4357 if(s->avctx->debug&FF_DEBUG_MMCO)
4358 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4360 switch(mmco[i].opcode){
4361 case MMCO_SHORT2UNUSED:
4362 pic= remove_short(h, mmco[i].short_frame_num);
4364 unreference_pic(h, pic);
4365 else if(s->avctx->debug&FF_DEBUG_MMCO)
4366 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4368 case MMCO_SHORT2LONG:
4369 pic= remove_long(h, mmco[i].long_index);
4370 if(pic) unreference_pic(h, pic);
4372 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4373 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4374 h->long_ref_count++;
4376 case MMCO_LONG2UNUSED:
4377 pic= remove_long(h, mmco[i].long_index);
4379 unreference_pic(h, pic);
4380 else if(s->avctx->debug&FF_DEBUG_MMCO)
4381 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4384 pic= remove_long(h, mmco[i].long_index);
4385 if(pic) unreference_pic(h, pic);
4387 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4388 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4389 h->long_ref_count++;
4393 case MMCO_SET_MAX_LONG:
4394 assert(mmco[i].long_index <= 16);
4395 // just remove the long term which index is greater than new max
4396 for(j = mmco[i].long_index; j<16; j++){
4397 pic = remove_long(h, j);
4398 if (pic) unreference_pic(h, pic);
4402 while(h->short_ref_count){
4403 pic= remove_short(h, h->short_ref[0]->frame_num);
4404 unreference_pic(h, pic);
4406 for(j = 0; j < 16; j++) {
4407 pic= remove_long(h, j);
4408 if(pic) unreference_pic(h, pic);
4415 if(!current_is_long){
4416 pic= remove_short(h, s->current_picture_ptr->frame_num);
4418 unreference_pic(h, pic);
4419 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4422 if(h->short_ref_count)
4423 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4425 h->short_ref[0]= s->current_picture_ptr;
4426 h->short_ref[0]->long_ref=0;
4427 h->short_ref_count++;
4430 print_short_term(h);
4435 static int decode_ref_pic_marking(H264Context *h){
4436 MpegEncContext * const s = &h->s;
4439 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4440 s->broken_link= get_bits1(&s->gb) -1;
4441 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4442 if(h->mmco[0].long_index == -1)
4445 h->mmco[0].opcode= MMCO_LONG;
4449 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4450 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4451 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4453 h->mmco[i].opcode= opcode;
4454 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4455 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4456 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4457 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4461 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4462 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4463 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4464 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4469 if(opcode > MMCO_LONG){
4470 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4473 if(opcode == MMCO_END)
4478 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4480 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4481 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4482 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4492 static int init_poc(H264Context *h){
4493 MpegEncContext * const s = &h->s;
4494 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4497 if(h->nal_unit_type == NAL_IDR_SLICE){
4498 h->frame_num_offset= 0;
4500 if(h->frame_num < h->prev_frame_num)
4501 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4503 h->frame_num_offset= h->prev_frame_num_offset;
4506 if(h->sps.poc_type==0){
4507 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4509 if(h->nal_unit_type == NAL_IDR_SLICE){
4514 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4515 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4516 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4517 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4519 h->poc_msb = h->prev_poc_msb;
4520 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4522 field_poc[1] = h->poc_msb + h->poc_lsb;
4523 if(s->picture_structure == PICT_FRAME)
4524 field_poc[1] += h->delta_poc_bottom;
4525 }else if(h->sps.poc_type==1){
4526 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4529 if(h->sps.poc_cycle_length != 0)
4530 abs_frame_num = h->frame_num_offset + h->frame_num;
4534 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4537 expected_delta_per_poc_cycle = 0;
4538 for(i=0; i < h->sps.poc_cycle_length; i++)
4539 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4541 if(abs_frame_num > 0){
4542 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4543 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4545 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4546 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4547 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4551 if(h->nal_ref_idc == 0)
4552 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4554 field_poc[0] = expectedpoc + h->delta_poc[0];
4555 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4557 if(s->picture_structure == PICT_FRAME)
4558 field_poc[1] += h->delta_poc[1];
4561 if(h->nal_unit_type == NAL_IDR_SLICE){
4564 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4565 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4571 if(s->picture_structure != PICT_BOTTOM_FIELD)
4572 s->current_picture_ptr->field_poc[0]= field_poc[0];
4573 if(s->picture_structure != PICT_TOP_FIELD)
4574 s->current_picture_ptr->field_poc[1]= field_poc[1];
4575 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4576 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4582 * decodes a slice header.
4583 * this will allso call MPV_common_init() and frame_start() as needed
4585 static int decode_slice_header(H264Context *h){
4586 MpegEncContext * const s = &h->s;
4587 int first_mb_in_slice, pps_id;
4588 int num_ref_idx_active_override_flag;
4589 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4591 int default_ref_list_done = 0;
4593 s->current_picture.reference= h->nal_ref_idc != 0;
4594 s->dropable= h->nal_ref_idc == 0;
4596 first_mb_in_slice= get_ue_golomb(&s->gb);
4598 slice_type= get_ue_golomb(&s->gb);
4600 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4605 h->slice_type_fixed=1;
4607 h->slice_type_fixed=0;
4609 slice_type= slice_type_map[ slice_type ];
4610 if (slice_type == I_TYPE
4611 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4612 default_ref_list_done = 1;
4614 h->slice_type= slice_type;
4616 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4618 pps_id= get_ue_golomb(&s->gb);
4620 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4623 h->pps= h->pps_buffer[pps_id];
4624 if(h->pps.slice_group_count == 0){
4625 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4629 h->sps= h->sps_buffer[ h->pps.sps_id ];
4630 if(h->sps.log2_max_frame_num == 0){
4631 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4635 if(h->dequant_coeff_pps != pps_id){
4636 h->dequant_coeff_pps = pps_id;
4637 init_dequant_tables(h);
4640 s->mb_width= h->sps.mb_width;
4641 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4643 h->b_stride= s->mb_width*4;
4644 h->b8_stride= s->mb_width*2;
4646 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4647 if(h->sps.frame_mbs_only_flag)
4648 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4650 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4652 if (s->context_initialized
4653 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4657 if (!s->context_initialized) {
4658 if (MPV_common_init(s) < 0)
4661 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4662 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4663 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4666 for(i=0; i<16; i++){
4667 #define T(x) (x>>2) | ((x<<2) & 0xF)
4668 h->zigzag_scan[i] = T(zigzag_scan[i]);
4669 h-> field_scan[i] = T( field_scan[i]);
4673 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4674 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4675 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4676 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4677 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4680 for(i=0; i<64; i++){
4681 #define T(x) (x>>3) | ((x&7)<<3)
4682 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4683 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4684 h->field_scan8x8[i] = T(field_scan8x8[i]);
4685 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4689 if(h->sps.transform_bypass){ //FIXME same ugly
4690 h->zigzag_scan_q0 = zigzag_scan;
4691 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4692 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4693 h->field_scan_q0 = field_scan;
4694 h->field_scan8x8_q0 = field_scan8x8;
4695 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4697 h->zigzag_scan_q0 = h->zigzag_scan;
4698 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4699 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4700 h->field_scan_q0 = h->field_scan;
4701 h->field_scan8x8_q0 = h->field_scan8x8;
4702 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4707 s->avctx->width = s->width;
4708 s->avctx->height = s->height;
4709 s->avctx->sample_aspect_ratio= h->sps.sar;
4710 if(!s->avctx->sample_aspect_ratio.den)
4711 s->avctx->sample_aspect_ratio.den = 1;
4713 if(h->sps.timing_info_present_flag){
4714 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4715 if(h->x264_build > 0 && h->x264_build < 44)
4716 s->avctx->time_base.den *= 2;
4717 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4718 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4722 if(h->slice_num == 0){
4723 if(frame_start(h) < 0)
4727 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4728 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4731 h->mb_aff_frame = 0;
4732 if(h->sps.frame_mbs_only_flag){
4733 s->picture_structure= PICT_FRAME;
4735 if(get_bits1(&s->gb)) { //field_pic_flag
4736 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4737 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4739 s->picture_structure= PICT_FRAME;
4740 h->mb_aff_frame = h->sps.mb_aff;
4744 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4745 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4746 if(s->mb_y >= s->mb_height){
4750 if(s->picture_structure==PICT_FRAME){
4751 h->curr_pic_num= h->frame_num;
4752 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4754 h->curr_pic_num= 2*h->frame_num;
4755 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4758 if(h->nal_unit_type == NAL_IDR_SLICE){
4759 get_ue_golomb(&s->gb); /* idr_pic_id */
4762 if(h->sps.poc_type==0){
4763 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4765 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4766 h->delta_poc_bottom= get_se_golomb(&s->gb);
4770 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4771 h->delta_poc[0]= get_se_golomb(&s->gb);
4773 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4774 h->delta_poc[1]= get_se_golomb(&s->gb);
4779 if(h->pps.redundant_pic_cnt_present){
4780 h->redundant_pic_count= get_ue_golomb(&s->gb);
4783 //set defaults, might be overriden a few line later
4784 h->ref_count[0]= h->pps.ref_count[0];
4785 h->ref_count[1]= h->pps.ref_count[1];
4787 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4788 if(h->slice_type == B_TYPE){
4789 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4790 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4791 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4793 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4795 if(num_ref_idx_active_override_flag){
4796 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4797 if(h->slice_type==B_TYPE)
4798 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4800 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4801 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4807 if(!default_ref_list_done){
4808 fill_default_ref_list(h);
4811 if(decode_ref_pic_list_reordering(h) < 0)
4814 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4815 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4816 pred_weight_table(h);
4817 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4818 implicit_weight_table(h);
4822 if(s->current_picture.reference)
4823 decode_ref_pic_marking(h);
4826 fill_mbaff_ref_list(h);
4828 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4829 h->cabac_init_idc = get_ue_golomb(&s->gb);
4831 h->last_qscale_diff = 0;
4832 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4833 if(s->qscale<0 || s->qscale>51){
4834 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4837 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4838 //FIXME qscale / qp ... stuff
4839 if(h->slice_type == SP_TYPE){
4840 get_bits1(&s->gb); /* sp_for_switch_flag */
4842 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4843 get_se_golomb(&s->gb); /* slice_qs_delta */
4846 h->deblocking_filter = 1;
4847 h->slice_alpha_c0_offset = 0;
4848 h->slice_beta_offset = 0;
4849 if( h->pps.deblocking_filter_parameters_present ) {
4850 h->deblocking_filter= get_ue_golomb(&s->gb);
4851 if(h->deblocking_filter < 2)
4852 h->deblocking_filter^= 1; // 1<->0
4854 if( h->deblocking_filter ) {
4855 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4856 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4859 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4860 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4861 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4862 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4863 h->deblocking_filter= 0;
4866 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4867 slice_group_change_cycle= get_bits(&s->gb, ?);
4872 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4873 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4875 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4876 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4878 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4880 av_get_pict_type_char(h->slice_type),
4881 pps_id, h->frame_num,
4882 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4883 h->ref_count[0], h->ref_count[1],
4885 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4887 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4891 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
4892 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4893 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4895 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4896 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4905 static inline int get_level_prefix(GetBitContext *gb){
4909 OPEN_READER(re, gb);
4910 UPDATE_CACHE(re, gb);
4911 buf=GET_CACHE(re, gb);
4913 log= 32 - av_log2(buf);
4915 print_bin(buf>>(32-log), log);
4916 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4919 LAST_SKIP_BITS(re, gb, log);
4920 CLOSE_READER(re, gb);
4925 static inline int get_dct8x8_allowed(H264Context *h){
4928 if(!IS_SUB_8X8(h->sub_mb_type[i])
4929 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4936 * decodes a residual block.
4937 * @param n block index
4938 * @param scantable scantable
4939 * @param max_coeff number of coefficients in the block
4940 * @return <0 if an error occured
4942 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4943 MpegEncContext * const s = &h->s;
4944 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4946 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4948 //FIXME put trailing_onex into the context
4950 if(n == CHROMA_DC_BLOCK_INDEX){
4951 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4952 total_coeff= coeff_token>>2;
4954 if(n == LUMA_DC_BLOCK_INDEX){
4955 total_coeff= pred_non_zero_count(h, 0);
4956 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4957 total_coeff= coeff_token>>2;
4959 total_coeff= pred_non_zero_count(h, n);
4960 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4961 total_coeff= coeff_token>>2;
4962 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4966 //FIXME set last_non_zero?
4971 trailing_ones= coeff_token&3;
4972 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4973 assert(total_coeff<=16);
4975 for(i=0; i<trailing_ones; i++){
4976 level[i]= 1 - 2*get_bits1(gb);
4980 int level_code, mask;
4981 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4982 int prefix= get_level_prefix(gb);
4984 //first coefficient has suffix_length equal to 0 or 1
4985 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4987 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4989 level_code= (prefix<<suffix_length); //part
4990 }else if(prefix==14){
4992 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4994 level_code= prefix + get_bits(gb, 4); //part
4995 }else if(prefix==15){
4996 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4997 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4999 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
5003 if(trailing_ones < 3) level_code += 2;
5008 mask= -(level_code&1);
5009 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5012 //remaining coefficients have suffix_length > 0
5013 for(;i<total_coeff;i++) {
5014 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
5015 prefix = get_level_prefix(gb);
5017 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
5018 }else if(prefix==15){
5019 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
5021 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
5024 mask= -(level_code&1);
5025 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5026 if(level_code > suffix_limit[suffix_length])
5031 if(total_coeff == max_coeff)
5034 if(n == CHROMA_DC_BLOCK_INDEX)
5035 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
5037 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
5040 coeff_num = zeros_left + total_coeff - 1;
5041 j = scantable[coeff_num];
5043 block[j] = level[0];
5044 for(i=1;i<total_coeff;i++) {
5047 else if(zeros_left < 7){
5048 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5050 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5052 zeros_left -= run_before;
5053 coeff_num -= 1 + run_before;
5054 j= scantable[ coeff_num ];
5059 block[j] = (level[0] * qmul[j] + 32)>>6;
5060 for(i=1;i<total_coeff;i++) {
5063 else if(zeros_left < 7){
5064 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5066 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5068 zeros_left -= run_before;
5069 coeff_num -= 1 + run_before;
5070 j= scantable[ coeff_num ];
5072 block[j]= (level[i] * qmul[j] + 32)>>6;
5077 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
5084 static void predict_field_decoding_flag(H264Context *h){
5085 MpegEncContext * const s = &h->s;
5086 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5087 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
5088 ? s->current_picture.mb_type[mb_xy-1]
5089 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
5090 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
5092 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
5096 * decodes a P_SKIP or B_SKIP macroblock
5098 static void decode_mb_skip(H264Context *h){
5099 MpegEncContext * const s = &h->s;
5100 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5103 memset(h->non_zero_count[mb_xy], 0, 16);
5104 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
5107 mb_type|= MB_TYPE_INTERLACED;
5109 if( h->slice_type == B_TYPE )
5111 // just for fill_caches. pred_direct_motion will set the real mb_type
5112 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
5114 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5115 pred_direct_motion(h, &mb_type);
5116 mb_type|= MB_TYPE_SKIP;
5121 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
5123 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5124 pred_pskip_motion(h, &mx, &my);
5125 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
5126 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
5129 write_back_motion(h, mb_type);
5130 s->current_picture.mb_type[mb_xy]= mb_type;
5131 s->current_picture.qscale_table[mb_xy]= s->qscale;
5132 h->slice_table[ mb_xy ]= h->slice_num;
5133 h->prev_mb_skipped= 1;
5137 * decodes a macroblock
5138 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5140 static int decode_mb_cavlc(H264Context *h){
5141 MpegEncContext * const s = &h->s;
5142 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5143 int mb_type, partition_count, cbp;
5144 int dct8x8_allowed= h->pps.transform_8x8_mode;
5146 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
5148 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5149 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
5151 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
5152 if(s->mb_skip_run==-1)
5153 s->mb_skip_run= get_ue_golomb(&s->gb);
5155 if (s->mb_skip_run--) {
5156 if(FRAME_MBAFF && (s->mb_y&1) == 0){
5157 if(s->mb_skip_run==0)
5158 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5160 predict_field_decoding_flag(h);
5167 if( (s->mb_y&1) == 0 )
5168 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5170 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5172 h->prev_mb_skipped= 0;
5174 mb_type= get_ue_golomb(&s->gb);
5175 if(h->slice_type == B_TYPE){
5177 partition_count= b_mb_type_info[mb_type].partition_count;
5178 mb_type= b_mb_type_info[mb_type].type;
5181 goto decode_intra_mb;
5183 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
5185 partition_count= p_mb_type_info[mb_type].partition_count;
5186 mb_type= p_mb_type_info[mb_type].type;
5189 goto decode_intra_mb;
5192 assert(h->slice_type == I_TYPE);
5195 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
5199 cbp= i_mb_type_info[mb_type].cbp;
5200 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5201 mb_type= i_mb_type_info[mb_type].type;
5205 mb_type |= MB_TYPE_INTERLACED;
5207 h->slice_table[ mb_xy ]= h->slice_num;
5209 if(IS_INTRA_PCM(mb_type)){
5212 // we assume these blocks are very rare so we dont optimize it
5213 align_get_bits(&s->gb);
5215 // The pixels are stored in the same order as levels in h->mb array.
5216 for(y=0; y<16; y++){
5217 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5218 for(x=0; x<16; x++){
5219 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5220 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
5224 const int index= 256 + 4*(y&3) + 32*(y>>2);
5226 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5227 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5231 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5233 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5234 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5238 // In deblocking, the quantizer is 0
5239 s->current_picture.qscale_table[mb_xy]= 0;
5240 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5241 // All coeffs are present
5242 memset(h->non_zero_count[mb_xy], 16, 16);
5244 s->current_picture.mb_type[mb_xy]= mb_type;
5249 h->ref_count[0] <<= 1;
5250 h->ref_count[1] <<= 1;
5253 fill_caches(h, mb_type, 0);
5256 if(IS_INTRA(mb_type)){
5257 // init_top_left_availability(h);
5258 if(IS_INTRA4x4(mb_type)){
5261 if(dct8x8_allowed && get_bits1(&s->gb)){
5262 mb_type |= MB_TYPE_8x8DCT;
5266 // fill_intra4x4_pred_table(h);
5267 for(i=0; i<16; i+=di){
5268 int mode= pred_intra_mode(h, i);
5270 if(!get_bits1(&s->gb)){
5271 const int rem_mode= get_bits(&s->gb, 3);
5272 mode = rem_mode + (rem_mode >= mode);
5276 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5278 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
5280 write_back_intra_pred_mode(h);
5281 if( check_intra4x4_pred_mode(h) < 0)
5284 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
5285 if(h->intra16x16_pred_mode < 0)
5288 h->chroma_pred_mode= get_ue_golomb(&s->gb);
5290 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
5291 if(h->chroma_pred_mode < 0)
5293 }else if(partition_count==4){
5294 int i, j, sub_partition_count[4], list, ref[2][4];
5296 if(h->slice_type == B_TYPE){
5298 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5299 if(h->sub_mb_type[i] >=13){
5300 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5303 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5304 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5306 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5307 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5308 pred_direct_motion(h, &mb_type);
5309 h->ref_cache[0][scan8[4]] =
5310 h->ref_cache[1][scan8[4]] =
5311 h->ref_cache[0][scan8[12]] =
5312 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5315 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
5317 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5318 if(h->sub_mb_type[i] >=4){
5319 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5322 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5323 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5327 for(list=0; list<2; list++){
5328 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5329 if(ref_count == 0) continue;
5331 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5332 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5333 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
5342 dct8x8_allowed = get_dct8x8_allowed(h);
5344 for(list=0; list<2; list++){
5345 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5346 if(ref_count == 0) continue;
5349 if(IS_DIRECT(h->sub_mb_type[i])) {
5350 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
5353 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
5354 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5356 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5357 const int sub_mb_type= h->sub_mb_type[i];
5358 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5359 for(j=0; j<sub_partition_count[i]; j++){
5361 const int index= 4*i + block_width*j;
5362 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5363 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
5364 mx += get_se_golomb(&s->gb);
5365 my += get_se_golomb(&s->gb);
5366 tprintf("final mv:%d %d\n", mx, my);
5368 if(IS_SUB_8X8(sub_mb_type)){
5369 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5370 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5371 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5372 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5373 }else if(IS_SUB_8X4(sub_mb_type)){
5374 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5375 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5376 }else if(IS_SUB_4X8(sub_mb_type)){
5377 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5378 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5380 assert(IS_SUB_4X4(sub_mb_type));
5381 mv_cache[ 0 ][0]= mx;
5382 mv_cache[ 0 ][1]= my;
5386 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5392 }else if(IS_DIRECT(mb_type)){
5393 pred_direct_motion(h, &mb_type);
5394 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5396 int list, mx, my, i;
5397 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5398 if(IS_16X16(mb_type)){
5399 for(list=0; list<2; list++){
5400 if(h->ref_count[list]>0){
5401 if(IS_DIR(mb_type, 0, list)){
5402 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5403 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5405 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
5408 for(list=0; list<2; list++){
5409 if(IS_DIR(mb_type, 0, list)){
5410 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5411 mx += get_se_golomb(&s->gb);
5412 my += get_se_golomb(&s->gb);
5413 tprintf("final mv:%d %d\n", mx, my);
5415 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5417 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5420 else if(IS_16X8(mb_type)){
5421 for(list=0; list<2; list++){
5422 if(h->ref_count[list]>0){
5424 if(IS_DIR(mb_type, i, list)){
5425 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5426 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5428 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5432 for(list=0; list<2; list++){
5434 if(IS_DIR(mb_type, i, list)){
5435 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5436 mx += get_se_golomb(&s->gb);
5437 my += get_se_golomb(&s->gb);
5438 tprintf("final mv:%d %d\n", mx, my);
5440 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5442 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5446 assert(IS_8X16(mb_type));
5447 for(list=0; list<2; list++){
5448 if(h->ref_count[list]>0){
5450 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5451 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5452 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5454 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5458 for(list=0; list<2; list++){
5460 if(IS_DIR(mb_type, i, list)){
5461 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5462 mx += get_se_golomb(&s->gb);
5463 my += get_se_golomb(&s->gb);
5464 tprintf("final mv:%d %d\n", mx, my);
5466 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5468 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5474 if(IS_INTER(mb_type))
5475 write_back_motion(h, mb_type);
5477 if(!IS_INTRA16x16(mb_type)){
5478 cbp= get_ue_golomb(&s->gb);
5480 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5484 if(IS_INTRA4x4(mb_type))
5485 cbp= golomb_to_intra4x4_cbp[cbp];
5487 cbp= golomb_to_inter_cbp[cbp];
5491 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5492 if(get_bits1(&s->gb))
5493 mb_type |= MB_TYPE_8x8DCT;
5495 s->current_picture.mb_type[mb_xy]= mb_type;
5497 if(cbp || IS_INTRA16x16(mb_type)){
5498 int i8x8, i4x4, chroma_idx;
5499 int chroma_qp, dquant;
5500 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5501 const uint8_t *scan, *scan8x8, *dc_scan;
5503 // fill_non_zero_count_cache(h);
5505 if(IS_INTERLACED(mb_type)){
5506 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5507 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5508 dc_scan= luma_dc_field_scan;
5510 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5511 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5512 dc_scan= luma_dc_zigzag_scan;
5515 dquant= get_se_golomb(&s->gb);
5517 if( dquant > 25 || dquant < -26 ){
5518 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5522 s->qscale += dquant;
5523 if(((unsigned)s->qscale) > 51){
5524 if(s->qscale<0) s->qscale+= 52;
5525 else s->qscale-= 52;
5528 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5529 if(IS_INTRA16x16(mb_type)){
5530 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5531 return -1; //FIXME continue if partitioned and other return -1 too
5534 assert((cbp&15) == 0 || (cbp&15) == 15);
5537 for(i8x8=0; i8x8<4; i8x8++){
5538 for(i4x4=0; i4x4<4; i4x4++){
5539 const int index= i4x4 + 4*i8x8;
5540 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5546 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5549 for(i8x8=0; i8x8<4; i8x8++){
5550 if(cbp & (1<<i8x8)){
5551 if(IS_8x8DCT(mb_type)){
5552 DCTELEM *buf = &h->mb[64*i8x8];
5554 for(i4x4=0; i4x4<4; i4x4++){
5555 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5556 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5559 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5560 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5562 for(i4x4=0; i4x4<4; i4x4++){
5563 const int index= i4x4 + 4*i8x8;
5565 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5571 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5572 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5578 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5579 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5585 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5586 for(i4x4=0; i4x4<4; i4x4++){
5587 const int index= 16 + 4*chroma_idx + i4x4;
5588 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5594 uint8_t * const nnz= &h->non_zero_count_cache[0];
5595 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5596 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5599 uint8_t * const nnz= &h->non_zero_count_cache[0];
5600 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5601 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5602 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5604 s->current_picture.qscale_table[mb_xy]= s->qscale;
5605 write_back_non_zero_count(h);
5608 h->ref_count[0] >>= 1;
5609 h->ref_count[1] >>= 1;
5615 static int decode_cabac_field_decoding_flag(H264Context *h) {
5616 MpegEncContext * const s = &h->s;
5617 const int mb_x = s->mb_x;
5618 const int mb_y = s->mb_y & ~1;
5619 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5620 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5622 unsigned int ctx = 0;
5624 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5627 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5631 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5634 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5635 uint8_t *state= &h->cabac_state[ctx_base];
5639 MpegEncContext * const s = &h->s;
5640 const int mba_xy = h->left_mb_xy[0];
5641 const int mbb_xy = h->top_mb_xy;
5643 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5645 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5647 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5648 return 0; /* I4x4 */
5651 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5652 return 0; /* I4x4 */
5655 if( get_cabac_terminate( &h->cabac ) )
5656 return 25; /* PCM */
5658 mb_type = 1; /* I16x16 */
5659 mb_type += 12 * get_cabac( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5660 if( get_cabac( &h->cabac, &state[2] ) ) /* cbp_chroma */
5661 mb_type += 4 + 4 * get_cabac( &h->cabac, &state[2+intra_slice] );
5662 mb_type += 2 * get_cabac( &h->cabac, &state[3+intra_slice] );
5663 mb_type += 1 * get_cabac( &h->cabac, &state[3+2*intra_slice] );
5667 static int decode_cabac_mb_type( H264Context *h ) {
5668 MpegEncContext * const s = &h->s;
5670 if( h->slice_type == I_TYPE ) {
5671 return decode_cabac_intra_mb_type(h, 3, 1);
5672 } else if( h->slice_type == P_TYPE ) {
5673 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5675 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5676 /* P_L0_D16x16, P_8x8 */
5677 return 3 * get_cabac( &h->cabac, &h->cabac_state[16] );
5679 /* P_L0_D8x16, P_L0_D16x8 */
5680 return 2 - get_cabac( &h->cabac, &h->cabac_state[17] );
5683 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5685 } else if( h->slice_type == B_TYPE ) {
5686 const int mba_xy = h->left_mb_xy[0];
5687 const int mbb_xy = h->top_mb_xy;
5691 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5693 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5696 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5697 return 0; /* B_Direct_16x16 */
5699 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5700 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5703 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5704 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5705 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5706 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5708 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5709 else if( bits == 13 ) {
5710 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5711 } else if( bits == 14 )
5712 return 11; /* B_L1_L0_8x16 */
5713 else if( bits == 15 )
5714 return 22; /* B_8x8 */
5716 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5717 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5719 /* TODO SI/SP frames? */
5724 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5725 MpegEncContext * const s = &h->s;
5729 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5730 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5733 && h->slice_table[mba_xy] == h->slice_num
5734 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5735 mba_xy += s->mb_stride;
5737 mbb_xy = mb_xy - s->mb_stride;
5739 && h->slice_table[mbb_xy] == h->slice_num
5740 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5741 mbb_xy -= s->mb_stride;
5743 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5745 int mb_xy = mb_x + mb_y*s->mb_stride;
5747 mbb_xy = mb_xy - s->mb_stride;
5750 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5752 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5755 if( h->slice_type == B_TYPE )
5757 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5760 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5763 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5766 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5767 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5768 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5770 if( mode >= pred_mode )
5776 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5777 const int mba_xy = h->left_mb_xy[0];
5778 const int mbb_xy = h->top_mb_xy;
5782 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5783 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5786 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5789 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5792 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5794 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5800 static const uint8_t block_idx_x[16] = {
5801 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5803 static const uint8_t block_idx_y[16] = {
5804 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5806 static const uint8_t block_idx_xy[4][4] = {
5813 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5818 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5820 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5823 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5828 x = block_idx_x[4*i8x8];
5829 y = block_idx_y[4*i8x8];
5833 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5834 cbp_a = h->left_cbp;
5835 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5841 /* No need to test for skip as we put 0 for skip block */
5842 /* No need to test for IPCM as we put 1 for IPCM block */
5844 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5845 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5850 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5851 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5855 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5861 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5865 cbp_a = (h->left_cbp>>4)&0x03;
5866 cbp_b = (h-> top_cbp>>4)&0x03;
5869 if( cbp_a > 0 ) ctx++;
5870 if( cbp_b > 0 ) ctx += 2;
5871 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5875 if( cbp_a == 2 ) ctx++;
5876 if( cbp_b == 2 ) ctx += 2;
5877 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5879 static int decode_cabac_mb_dqp( H264Context *h) {
5880 MpegEncContext * const s = &h->s;
5886 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5888 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5890 if( h->last_qscale_diff != 0 )
5893 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5899 if(val > 102) //prevent infinite loop
5906 return -(val + 1)/2;
5908 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5909 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5911 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5913 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5917 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5919 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5920 return 0; /* B_Direct_8x8 */
5921 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5922 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5924 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5925 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5926 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5929 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5930 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5934 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5935 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5938 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5939 int refa = h->ref_cache[list][scan8[n] - 1];
5940 int refb = h->ref_cache[list][scan8[n] - 8];
5944 if( h->slice_type == B_TYPE) {
5945 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5947 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5956 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5966 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5967 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5968 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5969 int ctxbase = (l == 0) ? 40 : 47;
5974 else if( amvd > 32 )
5979 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5984 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5992 while( get_cabac_bypass( &h->cabac ) ) {
5997 if( get_cabac_bypass( &h->cabac ) )
6001 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
6005 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
6010 nza = h->left_cbp&0x100;
6011 nzb = h-> top_cbp&0x100;
6012 } else if( cat == 1 || cat == 2 ) {
6013 nza = h->non_zero_count_cache[scan8[idx] - 1];
6014 nzb = h->non_zero_count_cache[scan8[idx] - 8];
6015 } else if( cat == 3 ) {
6016 nza = (h->left_cbp>>(6+idx))&0x01;
6017 nzb = (h-> top_cbp>>(6+idx))&0x01;
6020 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
6021 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
6030 return ctx + 4 * cat;
6033 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
6034 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
6035 static const int significant_coeff_flag_offset[2][6] = {
6036 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
6037 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
6039 static const int last_coeff_flag_offset[2][6] = {
6040 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
6041 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
6043 static const int coeff_abs_level_m1_offset[6] = {
6044 227+0, 227+10, 227+20, 227+30, 227+39, 426
6046 static const int significant_coeff_flag_offset_8x8[2][63] = {
6047 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
6048 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
6049 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
6050 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
6051 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
6052 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
6053 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
6054 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
6056 static const int last_coeff_flag_offset_8x8[63] = {
6057 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6058 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
6059 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
6060 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
6066 int coeff_count = 0;
6069 int abslevelgt1 = 0;
6071 uint8_t *significant_coeff_ctx_base;
6072 uint8_t *last_coeff_ctx_base;
6073 uint8_t *abs_level_m1_ctx_base;
6075 /* cat: 0-> DC 16x16 n = 0
6076 * 1-> AC 16x16 n = luma4x4idx
6077 * 2-> Luma4x4 n = luma4x4idx
6078 * 3-> DC Chroma n = iCbCr
6079 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
6080 * 5-> Luma8x8 n = 4 * luma8x8idx
6083 /* read coded block flag */
6085 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
6086 if( cat == 1 || cat == 2 )
6087 h->non_zero_count_cache[scan8[n]] = 0;
6089 h->non_zero_count_cache[scan8[16+n]] = 0;
6095 significant_coeff_ctx_base = h->cabac_state
6096 + significant_coeff_flag_offset[MB_FIELD][cat];
6097 last_coeff_ctx_base = h->cabac_state
6098 + last_coeff_flag_offset[MB_FIELD][cat];
6099 abs_level_m1_ctx_base = h->cabac_state
6100 + coeff_abs_level_m1_offset[cat];
6103 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
6104 for(last= 0; last < coefs; last++) { \
6105 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
6106 if( get_cabac( &h->cabac, sig_ctx )) { \
6107 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
6108 index[coeff_count++] = last; \
6109 if( get_cabac( &h->cabac, last_ctx ) ) { \
6115 const int *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
6116 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
6118 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
6120 if( last == max_coeff -1 ) {
6121 index[coeff_count++] = last;
6123 assert(coeff_count > 0);
6126 h->cbp_table[mb_xy] |= 0x100;
6127 else if( cat == 1 || cat == 2 )
6128 h->non_zero_count_cache[scan8[n]] = coeff_count;
6130 h->cbp_table[mb_xy] |= 0x40 << n;
6132 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
6135 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
6138 for( i = coeff_count - 1; i >= 0; i-- ) {
6139 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
6140 int j= scantable[index[i]];
6142 if( get_cabac( &h->cabac, ctx ) == 0 ) {
6144 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
6147 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6;
6148 else block[j] = ( qmul[j] + 32) >> 6;
6154 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
6155 while( coeff_abs < 15 && get_cabac( &h->cabac, ctx ) ) {
6159 if( coeff_abs >= 15 ) {
6161 while( get_cabac_bypass( &h->cabac ) ) {
6162 coeff_abs += 1 << j;
6167 if( get_cabac_bypass( &h->cabac ) )
6168 coeff_abs += 1 << j ;
6173 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
6174 else block[j] = coeff_abs;
6176 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
6177 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
6186 static void inline compute_mb_neighbors(H264Context *h)
6188 MpegEncContext * const s = &h->s;
6189 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
6190 h->top_mb_xy = mb_xy - s->mb_stride;
6191 h->left_mb_xy[0] = mb_xy - 1;
6193 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
6194 const int top_pair_xy = pair_xy - s->mb_stride;
6195 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
6196 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
6197 const int curr_mb_frame_flag = !MB_FIELD;
6198 const int bottom = (s->mb_y & 1);
6200 ? !curr_mb_frame_flag // bottom macroblock
6201 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
6203 h->top_mb_xy -= s->mb_stride;
6205 if (left_mb_frame_flag != curr_mb_frame_flag) {
6206 h->left_mb_xy[0] = pair_xy - 1;
6213 * decodes a macroblock
6214 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
6216 static int decode_mb_cabac(H264Context *h) {
6217 MpegEncContext * const s = &h->s;
6218 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
6219 int mb_type, partition_count, cbp = 0;
6220 int dct8x8_allowed= h->pps.transform_8x8_mode;
6222 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
6224 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
6225 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
6227 /* a skipped mb needs the aff flag from the following mb */
6228 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
6229 predict_field_decoding_flag(h);
6230 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
6231 skip = h->next_mb_skipped;
6233 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
6234 /* read skip flags */
6236 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
6237 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
6238 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
6239 if(h->next_mb_skipped)
6240 predict_field_decoding_flag(h);
6242 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6247 h->cbp_table[mb_xy] = 0;
6248 h->chroma_pred_mode_table[mb_xy] = 0;
6249 h->last_qscale_diff = 0;
6256 if( (s->mb_y&1) == 0 )
6258 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6260 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
6262 h->prev_mb_skipped = 0;
6264 compute_mb_neighbors(h);
6265 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
6266 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
6270 if( h->slice_type == B_TYPE ) {
6272 partition_count= b_mb_type_info[mb_type].partition_count;
6273 mb_type= b_mb_type_info[mb_type].type;
6276 goto decode_intra_mb;
6278 } else if( h->slice_type == P_TYPE ) {
6280 partition_count= p_mb_type_info[mb_type].partition_count;
6281 mb_type= p_mb_type_info[mb_type].type;
6284 goto decode_intra_mb;
6287 assert(h->slice_type == I_TYPE);
6289 partition_count = 0;
6290 cbp= i_mb_type_info[mb_type].cbp;
6291 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
6292 mb_type= i_mb_type_info[mb_type].type;
6295 mb_type |= MB_TYPE_INTERLACED;
6297 h->slice_table[ mb_xy ]= h->slice_num;
6299 if(IS_INTRA_PCM(mb_type)) {
6303 // We assume these blocks are very rare so we dont optimize it.
6304 // FIXME The two following lines get the bitstream position in the cabac
6305 // decode, I think it should be done by a function in cabac.h (or cabac.c).
6306 ptr= h->cabac.bytestream;
6307 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
6309 // The pixels are stored in the same order as levels in h->mb array.
6310 for(y=0; y<16; y++){
6311 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
6312 for(x=0; x<16; x++){
6313 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
6314 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
6318 const int index= 256 + 4*(y&3) + 32*(y>>2);
6320 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
6321 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6325 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
6327 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
6328 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6332 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6334 // All blocks are present
6335 h->cbp_table[mb_xy] = 0x1ef;
6336 h->chroma_pred_mode_table[mb_xy] = 0;
6337 // In deblocking, the quantizer is 0
6338 s->current_picture.qscale_table[mb_xy]= 0;
6339 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
6340 // All coeffs are present
6341 memset(h->non_zero_count[mb_xy], 16, 16);
6342 s->current_picture.mb_type[mb_xy]= mb_type;
6347 h->ref_count[0] <<= 1;
6348 h->ref_count[1] <<= 1;
6351 fill_caches(h, mb_type, 0);
6353 if( IS_INTRA( mb_type ) ) {
6355 if( IS_INTRA4x4( mb_type ) ) {
6356 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6357 mb_type |= MB_TYPE_8x8DCT;
6358 for( i = 0; i < 16; i+=4 ) {
6359 int pred = pred_intra_mode( h, i );
6360 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6361 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6364 for( i = 0; i < 16; i++ ) {
6365 int pred = pred_intra_mode( h, i );
6366 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6368 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6371 write_back_intra_pred_mode(h);
6372 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6374 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6375 if( h->intra16x16_pred_mode < 0 ) return -1;
6377 h->chroma_pred_mode_table[mb_xy] =
6378 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6380 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
6381 if( h->chroma_pred_mode < 0 ) return -1;
6382 } else if( partition_count == 4 ) {
6383 int i, j, sub_partition_count[4], list, ref[2][4];
6385 if( h->slice_type == B_TYPE ) {
6386 for( i = 0; i < 4; i++ ) {
6387 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6388 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6389 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6391 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
6392 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
6393 pred_direct_motion(h, &mb_type);
6394 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6395 for( i = 0; i < 4; i++ )
6396 if( IS_DIRECT(h->sub_mb_type[i]) )
6397 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6401 for( i = 0; i < 4; i++ ) {
6402 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6403 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6404 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6408 for( list = 0; list < 2; list++ ) {
6409 if( h->ref_count[list] > 0 ) {
6410 for( i = 0; i < 4; i++ ) {
6411 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6412 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6413 if( h->ref_count[list] > 1 )
6414 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6420 h->ref_cache[list][ scan8[4*i]+1 ]=
6421 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6427 dct8x8_allowed = get_dct8x8_allowed(h);
6429 for(list=0; list<2; list++){
6431 if(IS_DIRECT(h->sub_mb_type[i])){
6432 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6435 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6437 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6438 const int sub_mb_type= h->sub_mb_type[i];
6439 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6440 for(j=0; j<sub_partition_count[i]; j++){
6443 const int index= 4*i + block_width*j;
6444 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6445 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6446 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6448 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6449 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6450 tprintf("final mv:%d %d\n", mx, my);
6452 if(IS_SUB_8X8(sub_mb_type)){
6453 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
6454 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6455 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
6456 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6458 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
6459 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6460 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
6461 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6462 }else if(IS_SUB_8X4(sub_mb_type)){
6463 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
6464 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
6466 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
6467 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
6468 }else if(IS_SUB_4X8(sub_mb_type)){
6469 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
6470 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
6472 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
6473 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
6475 assert(IS_SUB_4X4(sub_mb_type));
6476 mv_cache[ 0 ][0]= mx;
6477 mv_cache[ 0 ][1]= my;
6479 mvd_cache[ 0 ][0]= mx - mpx;
6480 mvd_cache[ 0 ][1]= my - mpy;
6484 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6485 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6486 p[0] = p[1] = p[8] = p[9] = 0;
6487 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6491 } else if( IS_DIRECT(mb_type) ) {
6492 pred_direct_motion(h, &mb_type);
6493 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6494 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6495 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6497 int list, mx, my, i, mpx, mpy;
6498 if(IS_16X16(mb_type)){
6499 for(list=0; list<2; list++){
6500 if(IS_DIR(mb_type, 0, list)){
6501 if(h->ref_count[list] > 0 ){
6502 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6503 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6506 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6508 for(list=0; list<2; list++){
6509 if(IS_DIR(mb_type, 0, list)){
6510 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6512 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6513 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6514 tprintf("final mv:%d %d\n", mx, my);
6516 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6517 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6519 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6522 else if(IS_16X8(mb_type)){
6523 for(list=0; list<2; list++){
6524 if(h->ref_count[list]>0){
6526 if(IS_DIR(mb_type, i, list)){
6527 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6528 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6530 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6534 for(list=0; list<2; list++){
6536 if(IS_DIR(mb_type, i, list)){
6537 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6538 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6539 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6540 tprintf("final mv:%d %d\n", mx, my);
6542 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6543 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6545 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6546 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6551 assert(IS_8X16(mb_type));
6552 for(list=0; list<2; list++){
6553 if(h->ref_count[list]>0){
6555 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6556 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6557 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6559 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6563 for(list=0; list<2; list++){
6565 if(IS_DIR(mb_type, i, list)){
6566 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6567 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6568 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6570 tprintf("final mv:%d %d\n", mx, my);
6571 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6572 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6574 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6575 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6582 if( IS_INTER( mb_type ) ) {
6583 h->chroma_pred_mode_table[mb_xy] = 0;
6584 write_back_motion( h, mb_type );
6587 if( !IS_INTRA16x16( mb_type ) ) {
6588 cbp = decode_cabac_mb_cbp_luma( h );
6589 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6592 h->cbp_table[mb_xy] = h->cbp = cbp;
6594 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6595 if( decode_cabac_mb_transform_size( h ) )
6596 mb_type |= MB_TYPE_8x8DCT;
6598 s->current_picture.mb_type[mb_xy]= mb_type;
6600 if( cbp || IS_INTRA16x16( mb_type ) ) {
6601 const uint8_t *scan, *scan8x8, *dc_scan;
6604 if(IS_INTERLACED(mb_type)){
6605 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6606 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6607 dc_scan= luma_dc_field_scan;
6609 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6610 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6611 dc_scan= luma_dc_zigzag_scan;
6614 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6615 if( dqp == INT_MIN ){
6616 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6620 if(((unsigned)s->qscale) > 51){
6621 if(s->qscale<0) s->qscale+= 52;
6622 else s->qscale-= 52;
6624 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6626 if( IS_INTRA16x16( mb_type ) ) {
6628 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6629 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6632 for( i = 0; i < 16; i++ ) {
6633 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6634 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6638 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6642 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6643 if( cbp & (1<<i8x8) ) {
6644 if( IS_8x8DCT(mb_type) ) {
6645 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6646 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6649 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6650 const int index = 4*i8x8 + i4x4;
6651 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6652 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6656 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6657 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6664 for( c = 0; c < 2; c++ ) {
6665 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6666 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6673 for( c = 0; c < 2; c++ ) {
6674 for( i = 0; i < 4; i++ ) {
6675 const int index = 16 + 4 * c + i;
6676 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6677 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6682 uint8_t * const nnz= &h->non_zero_count_cache[0];
6683 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6684 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6687 uint8_t * const nnz= &h->non_zero_count_cache[0];
6688 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6689 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6690 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6691 h->last_qscale_diff = 0;
6694 s->current_picture.qscale_table[mb_xy]= s->qscale;
6695 write_back_non_zero_count(h);
6698 h->ref_count[0] >>= 1;
6699 h->ref_count[1] >>= 1;
6706 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6708 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6709 const int alpha = alpha_table[index_a];
6710 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6715 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6716 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6718 /* 16px edge length, because bS=4 is triggered by being at
6719 * the edge of an intra MB, so all 4 bS are the same */
6720 for( d = 0; d < 16; d++ ) {
6721 const int p0 = pix[-1];
6722 const int p1 = pix[-2];
6723 const int p2 = pix[-3];
6725 const int q0 = pix[0];
6726 const int q1 = pix[1];
6727 const int q2 = pix[2];
6729 if( ABS( p0 - q0 ) < alpha &&
6730 ABS( p1 - p0 ) < beta &&
6731 ABS( q1 - q0 ) < beta ) {
6733 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6734 if( ABS( p2 - p0 ) < beta)
6736 const int p3 = pix[-4];
6738 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6739 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6740 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6743 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6745 if( ABS( q2 - q0 ) < beta)
6747 const int q3 = pix[3];
6749 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6750 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6751 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6754 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6758 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6759 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6761 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6767 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6769 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6770 const int alpha = alpha_table[index_a];
6771 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6776 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6777 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6779 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6783 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6785 for( i = 0; i < 16; i++, pix += stride) {
6791 int bS_index = (i >> 1);
6794 bS_index |= (i & 1);
6797 if( bS[bS_index] == 0 ) {
6801 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6802 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6803 alpha = alpha_table[index_a];
6804 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6806 if( bS[bS_index] < 4 ) {
6807 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6808 const int p0 = pix[-1];
6809 const int p1 = pix[-2];
6810 const int p2 = pix[-3];
6811 const int q0 = pix[0];
6812 const int q1 = pix[1];
6813 const int q2 = pix[2];
6815 if( ABS( p0 - q0 ) < alpha &&
6816 ABS( p1 - p0 ) < beta &&
6817 ABS( q1 - q0 ) < beta ) {
6821 if( ABS( p2 - p0 ) < beta ) {
6822 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6825 if( ABS( q2 - q0 ) < beta ) {
6826 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6830 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6831 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6832 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6833 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6836 const int p0 = pix[-1];
6837 const int p1 = pix[-2];
6838 const int p2 = pix[-3];
6840 const int q0 = pix[0];
6841 const int q1 = pix[1];
6842 const int q2 = pix[2];
6844 if( ABS( p0 - q0 ) < alpha &&
6845 ABS( p1 - p0 ) < beta &&
6846 ABS( q1 - q0 ) < beta ) {
6848 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6849 if( ABS( p2 - p0 ) < beta)
6851 const int p3 = pix[-4];
6853 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6854 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6855 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6858 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6860 if( ABS( q2 - q0 ) < beta)
6862 const int q3 = pix[3];
6864 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6865 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6866 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6869 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6873 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6874 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6876 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6881 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6883 for( i = 0; i < 8; i++, pix += stride) {
6891 if( bS[bS_index] == 0 ) {
6895 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6896 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6897 alpha = alpha_table[index_a];
6898 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6900 if( bS[bS_index] < 4 ) {
6901 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6902 const int p0 = pix[-1];
6903 const int p1 = pix[-2];
6904 const int q0 = pix[0];
6905 const int q1 = pix[1];
6907 if( ABS( p0 - q0 ) < alpha &&
6908 ABS( p1 - p0 ) < beta &&
6909 ABS( q1 - q0 ) < beta ) {
6910 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6912 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6913 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6914 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6917 const int p0 = pix[-1];
6918 const int p1 = pix[-2];
6919 const int q0 = pix[0];
6920 const int q1 = pix[1];
6922 if( ABS( p0 - q0 ) < alpha &&
6923 ABS( p1 - p0 ) < beta &&
6924 ABS( q1 - q0 ) < beta ) {
6926 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6927 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6928 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6934 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6936 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6937 const int alpha = alpha_table[index_a];
6938 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6939 const int pix_next = stride;
6944 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6945 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6947 /* 16px edge length, see filter_mb_edgev */
6948 for( d = 0; d < 16; d++ ) {
6949 const int p0 = pix[-1*pix_next];
6950 const int p1 = pix[-2*pix_next];
6951 const int p2 = pix[-3*pix_next];
6952 const int q0 = pix[0];
6953 const int q1 = pix[1*pix_next];
6954 const int q2 = pix[2*pix_next];
6956 if( ABS( p0 - q0 ) < alpha &&
6957 ABS( p1 - p0 ) < beta &&
6958 ABS( q1 - q0 ) < beta ) {
6960 const int p3 = pix[-4*pix_next];
6961 const int q3 = pix[ 3*pix_next];
6963 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6964 if( ABS( p2 - p0 ) < beta) {
6966 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6967 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6968 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6971 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6973 if( ABS( q2 - q0 ) < beta) {
6975 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6976 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6977 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6980 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6984 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6985 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6987 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6994 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6996 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6997 const int alpha = alpha_table[index_a];
6998 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
7003 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
7004 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
7006 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
7010 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7011 MpegEncContext * const s = &h->s;
7013 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
7015 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) {
7016 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
7019 assert(!FRAME_MBAFF);
7021 mb_xy = mb_x + mb_y*s->mb_stride;
7022 mb_type = s->current_picture.mb_type[mb_xy];
7023 qp = s->current_picture.qscale_table[mb_xy];
7024 qp0 = s->current_picture.qscale_table[mb_xy-1];
7025 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
7026 qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
7027 qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
7028 qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
7029 qp0 = (qp + qp0 + 1) >> 1;
7030 qp1 = (qp + qp1 + 1) >> 1;
7031 qpc0 = (qpc + qpc0 + 1) >> 1;
7032 qpc1 = (qpc + qpc1 + 1) >> 1;
7033 qp_thresh = 15 - h->slice_alpha_c0_offset;
7034 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
7035 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
7038 if( IS_INTRA(mb_type) ) {
7039 int16_t bS4[4] = {4,4,4,4};
7040 int16_t bS3[4] = {3,3,3,3};
7041 if( IS_8x8DCT(mb_type) ) {
7042 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7043 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7044 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7045 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7047 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7048 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
7049 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7050 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
7051 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7052 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
7053 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7054 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
7056 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
7057 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
7058 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
7059 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
7060 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7061 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
7062 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7063 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
7066 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
7067 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
7069 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
7071 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
7073 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
7074 (mb_type & MB_TYPE_16x8) ? 1 : 0;
7075 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
7076 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
7078 int step = IS_8x8DCT(mb_type) ? 2 : 1;
7079 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
7080 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
7081 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
7083 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
7084 bSv[0][0] = 0x0004000400040004ULL;
7085 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
7086 bSv[1][0] = 0x0004000400040004ULL;
7088 #define FILTER(hv,dir,edge)\
7089 if(bSv[dir][edge]) {\
7090 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
7092 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7093 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7099 } else if( IS_8x8DCT(mb_type) ) {
7118 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7119 MpegEncContext * const s = &h->s;
7120 const int mb_xy= mb_x + mb_y*s->mb_stride;
7121 const int mb_type = s->current_picture.mb_type[mb_xy];
7122 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
7123 int first_vertical_edge_done = 0;
7125 /* FIXME: A given frame may occupy more than one position in
7126 * the reference list. So ref2frm should be populated with
7127 * frame numbers, not indices. */
7128 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
7129 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7131 //for sufficiently low qp, filtering wouldn't do anything
7132 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
7134 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
7135 int qp = s->current_picture.qscale_table[mb_xy];
7137 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
7138 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
7144 // left mb is in picture
7145 && h->slice_table[mb_xy-1] != 255
7146 // and current and left pair do not have the same interlaced type
7147 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
7148 // and left mb is in the same slice if deblocking_filter == 2
7149 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
7150 /* First vertical edge is different in MBAFF frames
7151 * There are 8 different bS to compute and 2 different Qp
7153 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
7154 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
7158 int mb_qp, mbn0_qp, mbn1_qp;
7160 first_vertical_edge_done = 1;
7162 if( IS_INTRA(mb_type) )
7163 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
7165 for( i = 0; i < 8; i++ ) {
7166 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
7168 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
7170 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
7171 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
7172 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
7179 mb_qp = s->current_picture.qscale_table[mb_xy];
7180 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
7181 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
7182 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
7183 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7184 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
7185 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
7186 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7187 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
7190 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
7191 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7192 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
7193 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
7194 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
7196 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
7197 for( dir = 0; dir < 2; dir++ )
7200 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
7201 const int mbm_type = s->current_picture.mb_type[mbm_xy];
7202 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
7204 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
7205 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
7206 // how often to recheck mv-based bS when iterating between edges
7207 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
7208 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
7209 // how often to recheck mv-based bS when iterating along each edge
7210 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
7212 if (first_vertical_edge_done) {
7214 first_vertical_edge_done = 0;
7217 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
7220 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
7221 && !IS_INTERLACED(mb_type)
7222 && IS_INTERLACED(mbm_type)
7224 // This is a special case in the norm where the filtering must
7225 // be done twice (one each of the field) even if we are in a
7226 // frame macroblock.
7228 static const int nnz_idx[4] = {4,5,6,3};
7229 unsigned int tmp_linesize = 2 * linesize;
7230 unsigned int tmp_uvlinesize = 2 * uvlinesize;
7231 int mbn_xy = mb_xy - 2 * s->mb_stride;
7236 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
7237 if( IS_INTRA(mb_type) ||
7238 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
7239 bS[0] = bS[1] = bS[2] = bS[3] = 3;
7241 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
7242 for( i = 0; i < 4; i++ ) {
7243 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
7244 mbn_nnz[nnz_idx[i]] != 0 )
7250 // Do not use s->qscale as luma quantizer because it has not the same
7251 // value in IPCM macroblocks.
7252 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7253 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
7254 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7255 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
7256 chroma_qp = ( h->chroma_qp +
7257 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7258 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7259 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7266 for( edge = start; edge < edges; edge++ ) {
7267 /* mbn_xy: neighbor macroblock */
7268 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
7269 const int mbn_type = s->current_picture.mb_type[mbn_xy];
7273 if( (edge&1) && IS_8x8DCT(mb_type) )
7276 if( IS_INTRA(mb_type) ||
7277 IS_INTRA(mbn_type) ) {
7280 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
7281 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
7290 bS[0] = bS[1] = bS[2] = bS[3] = value;
7295 if( edge & mask_edge ) {
7296 bS[0] = bS[1] = bS[2] = bS[3] = 0;
7299 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
7300 bS[0] = bS[1] = bS[2] = bS[3] = 1;
7303 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
7304 int b_idx= 8 + 4 + edge * (dir ? 8:1);
7305 int bn_idx= b_idx - (dir ? 8:1);
7307 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
7308 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7309 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7310 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
7312 bS[0] = bS[1] = bS[2] = bS[3] = v;
7318 for( i = 0; i < 4; i++ ) {
7319 int x = dir == 0 ? edge : i;
7320 int y = dir == 0 ? i : edge;
7321 int b_idx= 8 + 4 + x + 8*y;
7322 int bn_idx= b_idx - (dir ? 8:1);
7324 if( h->non_zero_count_cache[b_idx] != 0 ||
7325 h->non_zero_count_cache[bn_idx] != 0 ) {
7331 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
7332 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7333 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7334 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7342 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7347 // Do not use s->qscale as luma quantizer because it has not the same
7348 // value in IPCM macroblocks.
7349 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7350 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7351 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7352 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7354 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7355 if( (edge&1) == 0 ) {
7356 int chroma_qp = ( h->chroma_qp +
7357 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7358 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
7359 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
7362 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7363 if( (edge&1) == 0 ) {
7364 int chroma_qp = ( h->chroma_qp +
7365 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7366 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7367 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7374 static int decode_slice(H264Context *h){
7375 MpegEncContext * const s = &h->s;
7376 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7380 if( h->pps.cabac ) {
7384 align_get_bits( &s->gb );
7387 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
7388 ff_init_cabac_decoder( &h->cabac,
7389 s->gb.buffer + get_bits_count(&s->gb)/8,
7390 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7391 /* calculate pre-state */
7392 for( i= 0; i < 460; i++ ) {
7394 if( h->slice_type == I_TYPE )
7395 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7397 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7400 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
7402 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7406 int ret = decode_mb_cabac(h);
7409 if(ret>=0) hl_decode_mb(h);
7411 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7414 if(ret>=0) ret = decode_mb_cabac(h);
7416 if(ret>=0) hl_decode_mb(h);
7419 eos = get_cabac_terminate( &h->cabac );
7421 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
7422 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7423 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7427 if( ++s->mb_x >= s->mb_width ) {
7429 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7436 if( eos || s->mb_y >= s->mb_height ) {
7437 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7438 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7445 int ret = decode_mb_cavlc(h);
7447 if(ret>=0) hl_decode_mb(h);
7449 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7451 ret = decode_mb_cavlc(h);
7453 if(ret>=0) hl_decode_mb(h);
7458 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7459 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7464 if(++s->mb_x >= s->mb_width){
7466 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7471 if(s->mb_y >= s->mb_height){
7472 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7474 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7475 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7479 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7486 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7487 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7488 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7489 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7493 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7502 for(;s->mb_y < s->mb_height; s->mb_y++){
7503 for(;s->mb_x < s->mb_width; s->mb_x++){
7504 int ret= decode_mb(h);
7509 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7510 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7515 if(++s->mb_x >= s->mb_width){
7517 if(++s->mb_y >= s->mb_height){
7518 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7519 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7523 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7530 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7531 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7532 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7536 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7543 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7546 return -1; //not reached
7549 static int decode_unregistered_user_data(H264Context *h, int size){
7550 MpegEncContext * const s = &h->s;
7551 uint8_t user_data[16+256];
7557 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7558 user_data[i]= get_bits(&s->gb, 8);
7562 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7563 if(e==1 && build>=0)
7564 h->x264_build= build;
7566 if(s->avctx->debug & FF_DEBUG_BUGS)
7567 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7570 skip_bits(&s->gb, 8);
7575 static int decode_sei(H264Context *h){
7576 MpegEncContext * const s = &h->s;
7578 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7583 type+= show_bits(&s->gb, 8);
7584 }while(get_bits(&s->gb, 8) == 255);
7588 size+= show_bits(&s->gb, 8);
7589 }while(get_bits(&s->gb, 8) == 255);
7593 if(decode_unregistered_user_data(h, size) < 0)
7597 skip_bits(&s->gb, 8*size);
7600 //FIXME check bits here
7601 align_get_bits(&s->gb);
7607 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7608 MpegEncContext * const s = &h->s;
7610 cpb_count = get_ue_golomb(&s->gb) + 1;
7611 get_bits(&s->gb, 4); /* bit_rate_scale */
7612 get_bits(&s->gb, 4); /* cpb_size_scale */
7613 for(i=0; i<cpb_count; i++){
7614 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7615 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7616 get_bits1(&s->gb); /* cbr_flag */
7618 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7619 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7620 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7621 get_bits(&s->gb, 5); /* time_offset_length */
7624 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7625 MpegEncContext * const s = &h->s;
7626 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7627 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7629 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7631 if( aspect_ratio_info_present_flag ) {
7632 aspect_ratio_idc= get_bits(&s->gb, 8);
7633 if( aspect_ratio_idc == EXTENDED_SAR ) {
7634 sps->sar.num= get_bits(&s->gb, 16);
7635 sps->sar.den= get_bits(&s->gb, 16);
7636 }else if(aspect_ratio_idc < 14){
7637 sps->sar= pixel_aspect[aspect_ratio_idc];
7639 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7646 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7648 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7649 get_bits1(&s->gb); /* overscan_appropriate_flag */
7652 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7653 get_bits(&s->gb, 3); /* video_format */
7654 get_bits1(&s->gb); /* video_full_range_flag */
7655 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7656 get_bits(&s->gb, 8); /* colour_primaries */
7657 get_bits(&s->gb, 8); /* transfer_characteristics */
7658 get_bits(&s->gb, 8); /* matrix_coefficients */
7662 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7663 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7664 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7667 sps->timing_info_present_flag = get_bits1(&s->gb);
7668 if(sps->timing_info_present_flag){
7669 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7670 sps->time_scale = get_bits_long(&s->gb, 32);
7671 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7674 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7675 if(nal_hrd_parameters_present_flag)
7676 decode_hrd_parameters(h, sps);
7677 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7678 if(vcl_hrd_parameters_present_flag)
7679 decode_hrd_parameters(h, sps);
7680 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7681 get_bits1(&s->gb); /* low_delay_hrd_flag */
7682 get_bits1(&s->gb); /* pic_struct_present_flag */
7684 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7685 if(sps->bitstream_restriction_flag){
7686 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7687 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7688 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7689 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7690 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7691 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7692 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7698 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7699 const uint8_t *jvt_list, const uint8_t *fallback_list){
7700 MpegEncContext * const s = &h->s;
7701 int i, last = 8, next = 8;
7702 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7703 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7704 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7706 for(i=0;i<size;i++){
7708 next = (last + get_se_golomb(&s->gb)) & 0xff;
7709 if(!i && !next){ /* matrix not written, we use the preset one */
7710 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7713 last = factors[scan[i]] = next ? next : last;
7717 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7718 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7719 MpegEncContext * const s = &h->s;
7720 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7721 const uint8_t *fallback[4] = {
7722 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7723 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7724 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7725 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7727 if(get_bits1(&s->gb)){
7728 sps->scaling_matrix_present |= is_sps;
7729 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7730 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7731 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7732 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7733 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7734 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7735 if(is_sps || pps->transform_8x8_mode){
7736 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7737 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7739 } else if(fallback_sps) {
7740 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7741 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7745 static inline int decode_seq_parameter_set(H264Context *h){
7746 MpegEncContext * const s = &h->s;
7747 int profile_idc, level_idc;
7751 profile_idc= get_bits(&s->gb, 8);
7752 get_bits1(&s->gb); //constraint_set0_flag
7753 get_bits1(&s->gb); //constraint_set1_flag
7754 get_bits1(&s->gb); //constraint_set2_flag
7755 get_bits1(&s->gb); //constraint_set3_flag
7756 get_bits(&s->gb, 4); // reserved
7757 level_idc= get_bits(&s->gb, 8);
7758 sps_id= get_ue_golomb(&s->gb);
7760 sps= &h->sps_buffer[ sps_id ];
7761 sps->profile_idc= profile_idc;
7762 sps->level_idc= level_idc;
7764 if(sps->profile_idc >= 100){ //high profile
7765 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7766 get_bits1(&s->gb); //residual_color_transform_flag
7767 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7768 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7769 sps->transform_bypass = get_bits1(&s->gb);
7770 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7772 sps->scaling_matrix_present = 0;
7774 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7775 sps->poc_type= get_ue_golomb(&s->gb);
7777 if(sps->poc_type == 0){ //FIXME #define
7778 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7779 } else if(sps->poc_type == 1){//FIXME #define
7780 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7781 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7782 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7783 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7785 for(i=0; i<sps->poc_cycle_length; i++)
7786 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7788 if(sps->poc_type > 2){
7789 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7793 sps->ref_frame_count= get_ue_golomb(&s->gb);
7794 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7795 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7797 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7798 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7799 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7800 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7801 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7804 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7805 if(!sps->frame_mbs_only_flag)
7806 sps->mb_aff= get_bits1(&s->gb);
7810 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7812 #ifndef ALLOW_INTERLACE
7814 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it compilation time\n");
7816 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7817 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7819 sps->crop= get_bits1(&s->gb);
7821 sps->crop_left = get_ue_golomb(&s->gb);
7822 sps->crop_right = get_ue_golomb(&s->gb);
7823 sps->crop_top = get_ue_golomb(&s->gb);
7824 sps->crop_bottom= get_ue_golomb(&s->gb);
7825 if(sps->crop_left || sps->crop_top){
7826 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7832 sps->crop_bottom= 0;
7835 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7836 if( sps->vui_parameters_present_flag )
7837 decode_vui_parameters(h, sps);
7839 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7840 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7841 sps_id, sps->profile_idc, sps->level_idc,
7843 sps->ref_frame_count,
7844 sps->mb_width, sps->mb_height,
7845 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7846 sps->direct_8x8_inference_flag ? "8B8" : "",
7847 sps->crop_left, sps->crop_right,
7848 sps->crop_top, sps->crop_bottom,
7849 sps->vui_parameters_present_flag ? "VUI" : ""
7855 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7856 MpegEncContext * const s = &h->s;
7857 int pps_id= get_ue_golomb(&s->gb);
7858 PPS *pps= &h->pps_buffer[pps_id];
7860 pps->sps_id= get_ue_golomb(&s->gb);
7861 pps->cabac= get_bits1(&s->gb);
7862 pps->pic_order_present= get_bits1(&s->gb);
7863 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7864 if(pps->slice_group_count > 1 ){
7865 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7866 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7867 switch(pps->mb_slice_group_map_type){
7870 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7871 | run_length[ i ] |1 |ue(v) |
7876 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7878 | top_left_mb[ i ] |1 |ue(v) |
7879 | bottom_right_mb[ i ] |1 |ue(v) |
7887 | slice_group_change_direction_flag |1 |u(1) |
7888 | slice_group_change_rate_minus1 |1 |ue(v) |
7893 | slice_group_id_cnt_minus1 |1 |ue(v) |
7894 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7896 | slice_group_id[ i ] |1 |u(v) |
7901 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7902 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7903 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7904 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7908 pps->weighted_pred= get_bits1(&s->gb);
7909 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7910 pps->init_qp= get_se_golomb(&s->gb) + 26;
7911 pps->init_qs= get_se_golomb(&s->gb) + 26;
7912 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7913 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7914 pps->constrained_intra_pred= get_bits1(&s->gb);
7915 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7917 pps->transform_8x8_mode= 0;
7918 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7919 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7920 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7922 if(get_bits_count(&s->gb) < bit_length){
7923 pps->transform_8x8_mode= get_bits1(&s->gb);
7924 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7925 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7928 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7929 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7930 pps_id, pps->sps_id,
7931 pps->cabac ? "CABAC" : "CAVLC",
7932 pps->slice_group_count,
7933 pps->ref_count[0], pps->ref_count[1],
7934 pps->weighted_pred ? "weighted" : "",
7935 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7936 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7937 pps->constrained_intra_pred ? "CONSTR" : "",
7938 pps->redundant_pic_cnt_present ? "REDU" : "",
7939 pps->transform_8x8_mode ? "8x8DCT" : ""
7947 * finds the end of the current frame in the bitstream.
7948 * @return the position of the first byte of the next frame, or -1
7950 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7953 ParseContext *pc = &(h->s.parse_context);
7954 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7955 // mb_addr= pc->mb_addr - 1;
7957 for(i=0; i<=buf_size; i++){
7958 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7959 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7960 if(pc->frame_start_found){
7961 // If there isn't one more byte in the buffer
7962 // the test on first_mb_in_slice cannot be done yet
7963 // do it at next call.
7964 if (i >= buf_size) break;
7965 if (buf[i] & 0x80) {
7966 // first_mb_in_slice is 0, probably the first nal of a new
7968 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7970 pc->frame_start_found= 0;
7974 pc->frame_start_found = 1;
7976 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7977 if(pc->frame_start_found){
7979 pc->frame_start_found= 0;
7984 state= (state<<8) | buf[i];
7988 return END_NOT_FOUND;
7991 #ifdef CONFIG_H264_PARSER
7992 static int h264_parse(AVCodecParserContext *s,
7993 AVCodecContext *avctx,
7994 uint8_t **poutbuf, int *poutbuf_size,
7995 const uint8_t *buf, int buf_size)
7997 H264Context *h = s->priv_data;
7998 ParseContext *pc = &h->s.parse_context;
8001 next= find_frame_end(h, buf, buf_size);
8003 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
8009 *poutbuf = (uint8_t *)buf;
8010 *poutbuf_size = buf_size;
8014 static int h264_split(AVCodecContext *avctx,
8015 const uint8_t *buf, int buf_size)
8018 uint32_t state = -1;
8021 for(i=0; i<=buf_size; i++){
8022 if((state&0xFFFFFF1F) == 0x107)
8024 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
8026 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
8028 while(i>4 && buf[i-5]==0) i--;
8033 state= (state<<8) | buf[i];
8037 #endif /* CONFIG_H264_PARSER */
8039 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
8040 MpegEncContext * const s = &h->s;
8041 AVCodecContext * const avctx= s->avctx;
8045 for(i=0; i<50; i++){
8046 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
8050 s->current_picture_ptr= NULL;
8059 if(buf_index >= buf_size) break;
8061 for(i = 0; i < h->nal_length_size; i++)
8062 nalsize = (nalsize << 8) | buf[buf_index++];
8068 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
8073 // start code prefix search
8074 for(; buf_index + 3 < buf_size; buf_index++){
8075 // this should allways succeed in the first iteration
8076 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
8080 if(buf_index+3 >= buf_size) break;
8085 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
8086 while(ptr[dst_length - 1] == 0 && dst_length > 1)
8088 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
8090 if(s->avctx->debug&FF_DEBUG_STARTCODE){
8091 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
8094 if (h->is_avc && (nalsize != consumed))
8095 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
8097 buf_index += consumed;
8099 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
8100 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
8103 switch(h->nal_unit_type){
8105 idr(h); //FIXME ensure we don't loose some frames if there is reordering
8107 init_get_bits(&s->gb, ptr, bit_length);
8109 h->inter_gb_ptr= &s->gb;
8110 s->data_partitioning = 0;
8112 if(decode_slice_header(h) < 0){
8113 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8116 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
8117 if(h->redundant_pic_count==0 && s->hurry_up < 5
8118 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8119 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8120 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8121 && avctx->skip_frame < AVDISCARD_ALL)
8125 init_get_bits(&s->gb, ptr, bit_length);
8127 h->inter_gb_ptr= NULL;
8128 s->data_partitioning = 1;
8130 if(decode_slice_header(h) < 0){
8131 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8135 init_get_bits(&h->intra_gb, ptr, bit_length);
8136 h->intra_gb_ptr= &h->intra_gb;
8139 init_get_bits(&h->inter_gb, ptr, bit_length);
8140 h->inter_gb_ptr= &h->inter_gb;
8142 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
8144 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8145 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8146 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8147 && avctx->skip_frame < AVDISCARD_ALL)
8151 init_get_bits(&s->gb, ptr, bit_length);
8155 init_get_bits(&s->gb, ptr, bit_length);
8156 decode_seq_parameter_set(h);
8158 if(s->flags& CODEC_FLAG_LOW_DELAY)
8161 if(avctx->has_b_frames < 2)
8162 avctx->has_b_frames= !s->low_delay;
8165 init_get_bits(&s->gb, ptr, bit_length);
8167 decode_picture_parameter_set(h, bit_length);
8171 case NAL_END_SEQUENCE:
8172 case NAL_END_STREAM:
8173 case NAL_FILLER_DATA:
8175 case NAL_AUXILIARY_SLICE:
8178 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
8182 if(!s->current_picture_ptr) return buf_index; //no frame
8184 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
8185 s->current_picture_ptr->pict_type= s->pict_type;
8187 h->prev_frame_num_offset= h->frame_num_offset;
8188 h->prev_frame_num= h->frame_num;
8189 if(s->current_picture_ptr->reference){
8190 h->prev_poc_msb= h->poc_msb;
8191 h->prev_poc_lsb= h->poc_lsb;
8193 if(s->current_picture_ptr->reference)
8194 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
8204 * returns the number of bytes consumed for building the current frame
8206 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
8207 if(s->flags&CODEC_FLAG_TRUNCATED){
8208 pos -= s->parse_context.last_index;
8209 if(pos<0) pos=0; // FIXME remove (unneeded?)
8213 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
8214 if(pos+10>buf_size) pos=buf_size; // oops ;)
8220 static int decode_frame(AVCodecContext *avctx,
8221 void *data, int *data_size,
8222 uint8_t *buf, int buf_size)
8224 H264Context *h = avctx->priv_data;
8225 MpegEncContext *s = &h->s;
8226 AVFrame *pict = data;
8229 s->flags= avctx->flags;
8230 s->flags2= avctx->flags2;
8232 /* no supplementary picture */
8233 if (buf_size == 0) {
8237 if(s->flags&CODEC_FLAG_TRUNCATED){
8238 int next= find_frame_end(h, buf, buf_size);
8240 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
8242 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
8245 if(h->is_avc && !h->got_avcC) {
8246 int i, cnt, nalsize;
8247 unsigned char *p = avctx->extradata;
8248 if(avctx->extradata_size < 7) {
8249 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
8253 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
8256 /* sps and pps in the avcC always have length coded with 2 bytes,
8257 so put a fake nal_length_size = 2 while parsing them */
8258 h->nal_length_size = 2;
8259 // Decode sps from avcC
8260 cnt = *(p+5) & 0x1f; // Number of sps
8262 for (i = 0; i < cnt; i++) {
8263 nalsize = BE_16(p) + 2;
8264 if(decode_nal_units(h, p, nalsize) < 0) {
8265 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
8270 // Decode pps from avcC
8271 cnt = *(p++); // Number of pps
8272 for (i = 0; i < cnt; i++) {
8273 nalsize = BE_16(p) + 2;
8274 if(decode_nal_units(h, p, nalsize) != nalsize) {
8275 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
8280 // Now store right nal length size, that will be use to parse all other nals
8281 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
8282 // Do not reparse avcC
8286 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
8287 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
8291 buf_index=decode_nal_units(h, buf, buf_size);
8295 //FIXME do something with unavailable reference frames
8297 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
8298 if(!s->current_picture_ptr){
8299 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
8304 Picture *out = s->current_picture_ptr;
8305 #if 0 //decode order
8306 *data_size = sizeof(AVFrame);
8308 /* Sort B-frames into display order */
8309 Picture *cur = s->current_picture_ptr;
8310 Picture *prev = h->delayed_output_pic;
8311 int i, pics, cross_idr, out_of_order, out_idx;
8313 if(h->sps.bitstream_restriction_flag
8314 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
8315 s->avctx->has_b_frames = h->sps.num_reorder_frames;
8320 while(h->delayed_pic[pics]) pics++;
8321 h->delayed_pic[pics++] = cur;
8322 if(cur->reference == 0)
8326 for(i=0; h->delayed_pic[i]; i++)
8327 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
8330 out = h->delayed_pic[0];
8332 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8333 if(h->delayed_pic[i]->poc < out->poc){
8334 out = h->delayed_pic[i];
8338 out_of_order = !cross_idr && prev && out->poc < prev->poc;
8339 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
8341 else if(prev && pics <= s->avctx->has_b_frames)
8343 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8345 ((!cross_idr && prev && out->poc > prev->poc + 2)
8346 || cur->pict_type == B_TYPE)))
8349 s->avctx->has_b_frames++;
8352 else if(out_of_order)
8355 if(out_of_order || pics > s->avctx->has_b_frames){
8356 for(i=out_idx; h->delayed_pic[i]; i++)
8357 h->delayed_pic[i] = h->delayed_pic[i+1];
8363 *data_size = sizeof(AVFrame);
8364 if(prev && prev != out && prev->reference == 1)
8365 prev->reference = 0;
8366 h->delayed_output_pic = out;
8370 *pict= *(AVFrame*)out;
8372 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8375 assert(pict->data[0] || !*data_size);
8376 ff_print_debug_info(s, pict);
8377 //printf("out %d\n", (int)pict->data[0]);
8380 /* Return the Picture timestamp as the frame number */
8381 /* we substract 1 because it is added on utils.c */
8382 avctx->frame_number = s->picture_number - 1;
8384 return get_consumed_bytes(s, buf_index, buf_size);
8387 static inline void fill_mb_avail(H264Context *h){
8388 MpegEncContext * const s = &h->s;
8389 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8392 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8393 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8394 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8400 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8401 h->mb_avail[4]= 1; //FIXME move out
8402 h->mb_avail[5]= 0; //FIXME move out
8408 #define SIZE (COUNT*40)
8414 // int int_temp[10000];
8416 AVCodecContext avctx;
8418 dsputil_init(&dsp, &avctx);
8420 init_put_bits(&pb, temp, SIZE);
8421 printf("testing unsigned exp golomb\n");
8422 for(i=0; i<COUNT; i++){
8424 set_ue_golomb(&pb, i);
8425 STOP_TIMER("set_ue_golomb");
8427 flush_put_bits(&pb);
8429 init_get_bits(&gb, temp, 8*SIZE);
8430 for(i=0; i<COUNT; i++){
8433 s= show_bits(&gb, 24);
8436 j= get_ue_golomb(&gb);
8438 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8441 STOP_TIMER("get_ue_golomb");
8445 init_put_bits(&pb, temp, SIZE);
8446 printf("testing signed exp golomb\n");
8447 for(i=0; i<COUNT; i++){
8449 set_se_golomb(&pb, i - COUNT/2);
8450 STOP_TIMER("set_se_golomb");
8452 flush_put_bits(&pb);
8454 init_get_bits(&gb, temp, 8*SIZE);
8455 for(i=0; i<COUNT; i++){
8458 s= show_bits(&gb, 24);
8461 j= get_se_golomb(&gb);
8462 if(j != i - COUNT/2){
8463 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8466 STOP_TIMER("get_se_golomb");
8469 printf("testing 4x4 (I)DCT\n");
8472 uint8_t src[16], ref[16];
8473 uint64_t error= 0, max_error=0;
8475 for(i=0; i<COUNT; i++){
8477 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8478 for(j=0; j<16; j++){
8479 ref[j]= random()%255;
8480 src[j]= random()%255;
8483 h264_diff_dct_c(block, src, ref, 4);
8486 for(j=0; j<16; j++){
8487 // printf("%d ", block[j]);
8488 block[j]= block[j]*4;
8489 if(j&1) block[j]= (block[j]*4 + 2)/5;
8490 if(j&4) block[j]= (block[j]*4 + 2)/5;
8494 s->dsp.h264_idct_add(ref, block, 4);
8495 /* for(j=0; j<16; j++){
8496 printf("%d ", ref[j]);
8500 for(j=0; j<16; j++){
8501 int diff= ABS(src[j] - ref[j]);
8504 max_error= FFMAX(max_error, diff);
8507 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8509 printf("testing quantizer\n");
8510 for(qp=0; qp<52; qp++){
8512 src1_block[i]= src2_block[i]= random()%255;
8516 printf("Testing NAL layer\n");
8518 uint8_t bitstream[COUNT];
8519 uint8_t nal[COUNT*2];
8521 memset(&h, 0, sizeof(H264Context));
8523 for(i=0; i<COUNT; i++){
8531 for(j=0; j<COUNT; j++){
8532 bitstream[j]= (random() % 255) + 1;
8535 for(j=0; j<zeros; j++){
8536 int pos= random() % COUNT;
8537 while(bitstream[pos] == 0){
8546 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8548 printf("encoding failed\n");
8552 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8556 if(out_length != COUNT){
8557 printf("incorrect length %d %d\n", out_length, COUNT);
8561 if(consumed != nal_length){
8562 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8566 if(memcmp(bitstream, out, COUNT)){
8567 printf("missmatch\n");
8572 printf("Testing RBSP\n");
8580 static int decode_end(AVCodecContext *avctx)
8582 H264Context *h = avctx->priv_data;
8583 MpegEncContext *s = &h->s;
8585 av_freep(&h->rbsp_buffer);
8586 free_tables(h); //FIXME cleanup init stuff perhaps
8589 // memset(h, 0, sizeof(H264Context));
8595 AVCodec h264_decoder = {
8599 sizeof(H264Context),
8604 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8608 #ifdef CONFIG_H264_PARSER
8609 AVCodecParser h264_parser = {
8611 sizeof(H264Context),