2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
57 /* Compiling in interlaced support reduces the speed
58 * of progressive decoding by about 2%. */
59 #define ALLOW_INTERLACE
61 #ifdef ALLOW_INTERLACE
62 #define MB_MBAFF h->mb_mbaff
63 #define MB_FIELD h->mb_field_decoding_flag
64 #define FRAME_MBAFF h->mb_aff_frame
70 #define IS_INTERLACED(mb_type) 0
74 * Sequence parameter set
80 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
81 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
82 int poc_type; ///< pic_order_cnt_type
83 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
84 int delta_pic_order_always_zero_flag;
85 int offset_for_non_ref_pic;
86 int offset_for_top_to_bottom_field;
87 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
88 int ref_frame_count; ///< num_ref_frames
89 int gaps_in_frame_num_allowed_flag;
90 int mb_width; ///< frame_width_in_mbs_minus1 + 1
91 int mb_height; ///< frame_height_in_mbs_minus1 + 1
92 int frame_mbs_only_flag;
93 int mb_aff; ///<mb_adaptive_frame_field_flag
94 int direct_8x8_inference_flag;
95 int crop; ///< frame_cropping_flag
96 int crop_left; ///< frame_cropping_rect_left_offset
97 int crop_right; ///< frame_cropping_rect_right_offset
98 int crop_top; ///< frame_cropping_rect_top_offset
99 int crop_bottom; ///< frame_cropping_rect_bottom_offset
100 int vui_parameters_present_flag;
102 int timing_info_present_flag;
103 uint32_t num_units_in_tick;
105 int fixed_frame_rate_flag;
106 short offset_for_ref_frame[256]; //FIXME dyn aloc?
107 int bitstream_restriction_flag;
108 int num_reorder_frames;
109 int scaling_matrix_present;
110 uint8_t scaling_matrix4[6][16];
111 uint8_t scaling_matrix8[2][64];
115 * Picture parameter set
119 int cabac; ///< entropy_coding_mode_flag
120 int pic_order_present; ///< pic_order_present_flag
121 int slice_group_count; ///< num_slice_groups_minus1 + 1
122 int mb_slice_group_map_type;
123 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
124 int weighted_pred; ///< weighted_pred_flag
125 int weighted_bipred_idc;
126 int init_qp; ///< pic_init_qp_minus26 + 26
127 int init_qs; ///< pic_init_qs_minus26 + 26
128 int chroma_qp_index_offset;
129 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
130 int constrained_intra_pred; ///< constrained_intra_pred_flag
131 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
132 int transform_8x8_mode; ///< transform_8x8_mode_flag
133 uint8_t scaling_matrix4[6][16];
134 uint8_t scaling_matrix8[2][64];
138 * Memory management control operation opcode.
140 typedef enum MMCOOpcode{
151 * Memory management control operation.
162 typedef struct H264Context{
170 #define NAL_IDR_SLICE 5
175 #define NAL_END_SEQUENCE 10
176 #define NAL_END_STREAM 11
177 #define NAL_FILLER_DATA 12
178 #define NAL_SPS_EXT 13
179 #define NAL_AUXILIARY_SLICE 19
180 uint8_t *rbsp_buffer;
181 unsigned int rbsp_buffer_size;
184 * Used to parse AVC variant of h264
186 int is_avc; ///< this flag is != 0 if codec is avc1
187 int got_avcC; ///< flag used to parse avcC data only once
188 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
196 int chroma_pred_mode;
197 int intra16x16_pred_mode;
202 int8_t intra4x4_pred_mode_cache[5*8];
203 int8_t (*intra4x4_pred_mode)[8];
204 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
205 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
206 void (*pred8x8 [4+3])(uint8_t *src, int stride);
207 void (*pred16x16[4+3])(uint8_t *src, int stride);
208 unsigned int topleft_samples_available;
209 unsigned int top_samples_available;
210 unsigned int topright_samples_available;
211 unsigned int left_samples_available;
212 uint8_t (*top_borders[2])[16+2*8];
213 uint8_t left_border[2*(17+2*9)];
216 * non zero coeff count cache.
217 * is 64 if not available.
219 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
220 uint8_t (*non_zero_count)[16];
223 * Motion vector cache.
225 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
226 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
227 #define LIST_NOT_USED -1 //FIXME rename?
228 #define PART_NOT_AVAILABLE -2
231 * is 1 if the specific list MV&references are set to 0,0,-2.
233 int mv_cache_clean[2];
236 * number of neighbors (top and/or left) that used 8x8 dct
238 int neighbor_transform_size;
241 * block_offset[ 0..23] for frame macroblocks
242 * block_offset[24..47] for field macroblocks
244 int block_offset[2*(16+8)];
246 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
248 int b_stride; //FIXME use s->b4_stride
251 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff
260 int unknown_svq3_flag;
261 int next_slice_index;
263 SPS sps_buffer[MAX_SPS_COUNT];
264 SPS sps; ///< current sps
266 PPS pps_buffer[MAX_PPS_COUNT];
270 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
272 uint32_t dequant4_buffer[6][52][16];
273 uint32_t dequant8_buffer[2][52][64];
274 uint32_t (*dequant4_coeff[6])[16];
275 uint32_t (*dequant8_coeff[2])[64];
276 int dequant_coeff_pps; ///< reinit tables when pps changes
279 uint8_t *slice_table_base;
280 uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
282 int slice_type_fixed;
284 //interlacing specific flags
286 int mb_field_decoding_flag;
287 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
294 int delta_poc_bottom;
297 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
298 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
299 int frame_num_offset; ///< for POC type 2
300 int prev_frame_num_offset; ///< for POC type 2
301 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
304 * frame_num for frames or 2*frame_num for field pics.
309 * max_frame_num or 2*max_frame_num for field pics.
313 //Weighted pred stuff
315 int use_weight_chroma;
316 int luma_log2_weight_denom;
317 int chroma_log2_weight_denom;
318 int luma_weight[2][48];
319 int luma_offset[2][48];
320 int chroma_weight[2][48][2];
321 int chroma_offset[2][48][2];
322 int implicit_weight[48][48];
325 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
326 int slice_alpha_c0_offset;
327 int slice_beta_offset;
329 int redundant_pic_count;
331 int direct_spatial_mv_pred;
332 int dist_scale_factor[16];
333 int dist_scale_factor_field[32];
334 int map_col_to_list0[2][16];
335 int map_col_to_list0_field[2][32];
338 * num_ref_idx_l0/1_active_minus1 + 1
340 int ref_count[2]; ///< counts frames or fields, depending on current mb mode
341 Picture *short_ref[32];
342 Picture *long_ref[32];
343 Picture default_ref_list[2][32];
344 Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs
345 Picture *delayed_pic[16]; //FIXME size?
346 Picture *delayed_output_pic;
349 * memory management control operations buffer.
351 MMCO mmco[MAX_MMCO_COUNT];
354 int long_ref_count; ///< number of actual long term references
355 int short_ref_count; ///< number of actual short term references
358 GetBitContext intra_gb;
359 GetBitContext inter_gb;
360 GetBitContext *intra_gb_ptr;
361 GetBitContext *inter_gb_ptr;
363 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
369 uint8_t cabac_state[460];
372 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
377 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
378 uint8_t *chroma_pred_mode_table;
379 int last_qscale_diff;
380 int16_t (*mvd_table[2])[2];
381 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
382 uint8_t *direct_table;
383 uint8_t direct_cache[5*8];
385 uint8_t zigzag_scan[16];
386 uint8_t zigzag_scan8x8[64];
387 uint8_t zigzag_scan8x8_cavlc[64];
388 uint8_t field_scan[16];
389 uint8_t field_scan8x8[64];
390 uint8_t field_scan8x8_cavlc[64];
391 const uint8_t *zigzag_scan_q0;
392 const uint8_t *zigzag_scan8x8_q0;
393 const uint8_t *zigzag_scan8x8_cavlc_q0;
394 const uint8_t *field_scan_q0;
395 const uint8_t *field_scan8x8_q0;
396 const uint8_t *field_scan8x8_cavlc_q0;
401 static VLC coeff_token_vlc[4];
402 static VLC chroma_dc_coeff_token_vlc;
404 static VLC total_zeros_vlc[15];
405 static VLC chroma_dc_total_zeros_vlc[3];
407 static VLC run_vlc[6];
410 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
411 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
412 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
413 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
415 static always_inline uint32_t pack16to32(int a, int b){
416 #ifdef WORDS_BIGENDIAN
417 return (b&0xFFFF) + (a<<16);
419 return (a&0xFFFF) + (b<<16);
425 * @param h height of the rectangle, should be a constant
426 * @param w width of the rectangle, should be a constant
427 * @param size the size of val (1 or 4), should be a constant
429 static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
430 uint8_t *p= (uint8_t*)vp;
431 assert(size==1 || size==4);
437 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
438 assert((stride&(w-1))==0);
440 const uint16_t v= size==4 ? val : val*0x0101;
441 *(uint16_t*)(p + 0*stride)= v;
443 *(uint16_t*)(p + 1*stride)= v;
445 *(uint16_t*)(p + 2*stride)=
446 *(uint16_t*)(p + 3*stride)= v;
448 const uint32_t v= size==4 ? val : val*0x01010101;
449 *(uint32_t*)(p + 0*stride)= v;
451 *(uint32_t*)(p + 1*stride)= v;
453 *(uint32_t*)(p + 2*stride)=
454 *(uint32_t*)(p + 3*stride)= v;
456 //gcc can't optimize 64bit math on x86_32
457 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
458 const uint64_t v= val*0x0100000001ULL;
459 *(uint64_t*)(p + 0*stride)= v;
461 *(uint64_t*)(p + 1*stride)= v;
463 *(uint64_t*)(p + 2*stride)=
464 *(uint64_t*)(p + 3*stride)= v;
466 const uint64_t v= val*0x0100000001ULL;
467 *(uint64_t*)(p + 0+0*stride)=
468 *(uint64_t*)(p + 8+0*stride)=
469 *(uint64_t*)(p + 0+1*stride)=
470 *(uint64_t*)(p + 8+1*stride)= v;
472 *(uint64_t*)(p + 0+2*stride)=
473 *(uint64_t*)(p + 8+2*stride)=
474 *(uint64_t*)(p + 0+3*stride)=
475 *(uint64_t*)(p + 8+3*stride)= v;
477 *(uint32_t*)(p + 0+0*stride)=
478 *(uint32_t*)(p + 4+0*stride)= val;
480 *(uint32_t*)(p + 0+1*stride)=
481 *(uint32_t*)(p + 4+1*stride)= val;
483 *(uint32_t*)(p + 0+2*stride)=
484 *(uint32_t*)(p + 4+2*stride)=
485 *(uint32_t*)(p + 0+3*stride)=
486 *(uint32_t*)(p + 4+3*stride)= val;
488 *(uint32_t*)(p + 0+0*stride)=
489 *(uint32_t*)(p + 4+0*stride)=
490 *(uint32_t*)(p + 8+0*stride)=
491 *(uint32_t*)(p +12+0*stride)=
492 *(uint32_t*)(p + 0+1*stride)=
493 *(uint32_t*)(p + 4+1*stride)=
494 *(uint32_t*)(p + 8+1*stride)=
495 *(uint32_t*)(p +12+1*stride)= val;
497 *(uint32_t*)(p + 0+2*stride)=
498 *(uint32_t*)(p + 4+2*stride)=
499 *(uint32_t*)(p + 8+2*stride)=
500 *(uint32_t*)(p +12+2*stride)=
501 *(uint32_t*)(p + 0+3*stride)=
502 *(uint32_t*)(p + 4+3*stride)=
503 *(uint32_t*)(p + 8+3*stride)=
504 *(uint32_t*)(p +12+3*stride)= val;
511 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
512 MpegEncContext * const s = &h->s;
513 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
514 int topleft_xy, top_xy, topright_xy, left_xy[2];
515 int topleft_type, top_type, topright_type, left_type[2];
519 //FIXME deblocking could skip the intra and nnz parts.
520 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
523 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
525 top_xy = mb_xy - s->mb_stride;
526 topleft_xy = top_xy - 1;
527 topright_xy= top_xy + 1;
528 left_xy[1] = left_xy[0] = mb_xy-1;
538 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
539 const int top_pair_xy = pair_xy - s->mb_stride;
540 const int topleft_pair_xy = top_pair_xy - 1;
541 const int topright_pair_xy = top_pair_xy + 1;
542 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
543 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
544 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
545 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
546 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
547 const int bottom = (s->mb_y & 1);
548 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
550 ? !curr_mb_frame_flag // bottom macroblock
551 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
553 top_xy -= s->mb_stride;
556 ? !curr_mb_frame_flag // bottom macroblock
557 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
559 topleft_xy -= s->mb_stride;
562 ? !curr_mb_frame_flag // bottom macroblock
563 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
565 topright_xy -= s->mb_stride;
567 if (left_mb_frame_flag != curr_mb_frame_flag) {
568 left_xy[1] = left_xy[0] = pair_xy - 1;
569 if (curr_mb_frame_flag) {
590 left_xy[1] += s->mb_stride;
603 h->top_mb_xy = top_xy;
604 h->left_mb_xy[0] = left_xy[0];
605 h->left_mb_xy[1] = left_xy[1];
609 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
610 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
611 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
613 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
615 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
617 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
618 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
619 if(USES_LIST(mb_type,list)){
620 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
621 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
622 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
623 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
629 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
630 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
632 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
633 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
635 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
636 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
641 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
642 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
643 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
644 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
645 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
648 if(IS_INTRA(mb_type)){
649 h->topleft_samples_available=
650 h->top_samples_available=
651 h->left_samples_available= 0xFFFF;
652 h->topright_samples_available= 0xEEEA;
654 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
655 h->topleft_samples_available= 0xB3FF;
656 h->top_samples_available= 0x33FF;
657 h->topright_samples_available= 0x26EA;
660 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
661 h->topleft_samples_available&= 0xDF5F;
662 h->left_samples_available&= 0x5F5F;
666 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
667 h->topleft_samples_available&= 0x7FFF;
669 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
670 h->topright_samples_available&= 0xFBFF;
672 if(IS_INTRA4x4(mb_type)){
673 if(IS_INTRA4x4(top_type)){
674 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
675 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
676 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
677 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
680 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
685 h->intra4x4_pred_mode_cache[4+8*0]=
686 h->intra4x4_pred_mode_cache[5+8*0]=
687 h->intra4x4_pred_mode_cache[6+8*0]=
688 h->intra4x4_pred_mode_cache[7+8*0]= pred;
691 if(IS_INTRA4x4(left_type[i])){
692 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
693 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
696 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
701 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
702 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
717 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
719 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
720 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
721 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
722 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
724 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
725 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
727 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
728 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
731 h->non_zero_count_cache[4+8*0]=
732 h->non_zero_count_cache[5+8*0]=
733 h->non_zero_count_cache[6+8*0]=
734 h->non_zero_count_cache[7+8*0]=
736 h->non_zero_count_cache[1+8*0]=
737 h->non_zero_count_cache[2+8*0]=
739 h->non_zero_count_cache[1+8*3]=
740 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
744 for (i=0; i<2; i++) {
746 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
747 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
748 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
749 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
751 h->non_zero_count_cache[3+8*1 + 2*8*i]=
752 h->non_zero_count_cache[3+8*2 + 2*8*i]=
753 h->non_zero_count_cache[0+8*1 + 8*i]=
754 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
761 h->top_cbp = h->cbp_table[top_xy];
762 } else if(IS_INTRA(mb_type)) {
769 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
770 } else if(IS_INTRA(mb_type)) {
776 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
779 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
784 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
786 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
787 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
788 /*if(!h->mv_cache_clean[list]){
789 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
790 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
791 h->mv_cache_clean[list]= 1;
795 h->mv_cache_clean[list]= 0;
797 if(USES_LIST(top_type, list)){
798 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
799 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
800 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
801 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
802 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
803 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
804 h->ref_cache[list][scan8[0] + 0 - 1*8]=
805 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
806 h->ref_cache[list][scan8[0] + 2 - 1*8]=
807 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
809 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
810 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
811 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
812 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
813 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
816 //FIXME unify cleanup or sth
817 if(USES_LIST(left_type[0], list)){
818 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
819 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
820 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
821 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
822 h->ref_cache[list][scan8[0] - 1 + 0*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
823 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1]>>1)];
825 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
826 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
827 h->ref_cache[list][scan8[0] - 1 + 0*8]=
828 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
831 if(USES_LIST(left_type[1], list)){
832 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
833 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
834 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
835 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
836 h->ref_cache[list][scan8[0] - 1 + 2*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
837 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[3]>>1)];
839 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
840 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
841 h->ref_cache[list][scan8[0] - 1 + 2*8]=
842 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
843 assert((!left_type[0]) == (!left_type[1]));
846 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
849 if(USES_LIST(topleft_type, list)){
850 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
851 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
852 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
853 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
855 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
856 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
859 if(USES_LIST(topright_type, list)){
860 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
861 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
862 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
863 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
865 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
866 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
869 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
872 h->ref_cache[list][scan8[5 ]+1] =
873 h->ref_cache[list][scan8[7 ]+1] =
874 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
875 h->ref_cache[list][scan8[4 ]] =
876 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
877 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
878 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
879 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
880 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
881 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
884 /* XXX beurk, Load mvd */
885 if(USES_LIST(top_type, list)){
886 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
887 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
888 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
889 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
890 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
892 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
893 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
894 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
895 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
897 if(USES_LIST(left_type[0], list)){
898 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
899 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
900 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
902 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
903 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
905 if(USES_LIST(left_type[1], list)){
906 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
907 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
908 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
910 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
911 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
913 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
914 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
915 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
916 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
917 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
919 if(h->slice_type == B_TYPE){
920 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
922 if(IS_DIRECT(top_type)){
923 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
924 }else if(IS_8X8(top_type)){
925 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
926 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
927 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
929 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
932 if(IS_DIRECT(left_type[0]))
933 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
934 else if(IS_8X8(left_type[0]))
935 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
937 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
939 if(IS_DIRECT(left_type[1]))
940 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
941 else if(IS_8X8(left_type[1]))
942 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
944 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
950 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
951 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
952 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
953 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
954 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
955 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
956 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
957 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
958 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
959 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
961 #define MAP_F2F(idx, mb_type)\
962 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
963 h->ref_cache[list][idx] <<= 1;\
964 h->mv_cache[list][idx][1] /= 2;\
965 h->mvd_cache[list][idx][1] /= 2;\
970 #define MAP_F2F(idx, mb_type)\
971 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
972 h->ref_cache[list][idx] >>= 1;\
973 h->mv_cache[list][idx][1] <<= 1;\
974 h->mvd_cache[list][idx][1] <<= 1;\
984 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
987 static inline void write_back_intra_pred_mode(H264Context *h){
988 MpegEncContext * const s = &h->s;
989 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
991 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
992 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
993 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
994 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
995 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
996 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
997 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
1001 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1003 static inline int check_intra4x4_pred_mode(H264Context *h){
1004 MpegEncContext * const s = &h->s;
1005 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
1006 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
1009 if(!(h->top_samples_available&0x8000)){
1011 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
1013 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1016 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
1021 if(!(h->left_samples_available&0x8000)){
1023 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
1025 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1028 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
1034 } //FIXME cleanup like next
1037 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1039 static inline int check_intra_pred_mode(H264Context *h, int mode){
1040 MpegEncContext * const s = &h->s;
1041 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
1042 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
1044 if(mode < 0 || mode > 6) {
1045 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
1049 if(!(h->top_samples_available&0x8000)){
1052 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1057 if(!(h->left_samples_available&0x8000)){
1060 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1069 * gets the predicted intra4x4 prediction mode.
1071 static inline int pred_intra_mode(H264Context *h, int n){
1072 const int index8= scan8[n];
1073 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
1074 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
1075 const int min= FFMIN(left, top);
1077 tprintf("mode:%d %d min:%d\n", left ,top, min);
1079 if(min<0) return DC_PRED;
1083 static inline void write_back_non_zero_count(H264Context *h){
1084 MpegEncContext * const s = &h->s;
1085 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1087 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
1088 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
1089 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
1090 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
1091 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
1092 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
1093 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
1095 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
1096 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
1097 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
1099 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1100 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1101 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1104 // store all luma nnzs, for deblocking
1107 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
1108 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
1113 * gets the predicted number of non zero coefficients.
1114 * @param n block index
1116 static inline int pred_non_zero_count(H264Context *h, int n){
1117 const int index8= scan8[n];
1118 const int left= h->non_zero_count_cache[index8 - 1];
1119 const int top = h->non_zero_count_cache[index8 - 8];
1122 if(i<64) i= (i+1)>>1;
1124 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1129 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1130 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1132 /* there is no consistent mapping of mvs to neighboring locations that will
1133 * make mbaff happy, so we can't move all this logic to fill_caches */
1135 MpegEncContext *s = &h->s;
1136 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
1138 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
1139 *C = h->mv_cache[list][scan8[0]-2];
1142 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
1143 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
1144 if(IS_INTERLACED(mb_types[topright_xy])){
1145 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
1146 const int x4 = X4, y4 = Y4;\
1147 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
1148 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
1149 return LIST_NOT_USED;\
1150 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
1151 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
1152 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
1153 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
1155 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
1158 if(topright_ref == PART_NOT_AVAILABLE
1159 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
1160 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
1162 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
1163 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
1166 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
1167 && i >= scan8[0]+8){
1168 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
1169 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
1175 if(topright_ref != PART_NOT_AVAILABLE){
1176 *C= h->mv_cache[list][ i - 8 + part_width ];
1177 return topright_ref;
1179 tprintf("topright MV not available\n");
1181 *C= h->mv_cache[list][ i - 8 - 1 ];
1182 return h->ref_cache[list][ i - 8 - 1 ];
1187 * gets the predicted MV.
1188 * @param n the block index
1189 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1190 * @param mx the x component of the predicted motion vector
1191 * @param my the y component of the predicted motion vector
1193 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1194 const int index8= scan8[n];
1195 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1196 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1197 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1198 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1200 int diagonal_ref, match_count;
1202 assert(part_width==1 || part_width==2 || part_width==4);
1212 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1213 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1214 tprintf("pred_motion match_count=%d\n", match_count);
1215 if(match_count > 1){ //most common
1216 *mx= mid_pred(A[0], B[0], C[0]);
1217 *my= mid_pred(A[1], B[1], C[1]);
1218 }else if(match_count==1){
1222 }else if(top_ref==ref){
1230 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1234 *mx= mid_pred(A[0], B[0], C[0]);
1235 *my= mid_pred(A[1], B[1], C[1]);
1239 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1243 * gets the directionally predicted 16x8 MV.
1244 * @param n the block index
1245 * @param mx the x component of the predicted motion vector
1246 * @param my the y component of the predicted motion vector
1248 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1250 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1251 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1253 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1261 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1262 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1264 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1266 if(left_ref == ref){
1274 pred_motion(h, n, 4, list, ref, mx, my);
1278 * gets the directionally predicted 8x16 MV.
1279 * @param n the block index
1280 * @param mx the x component of the predicted motion vector
1281 * @param my the y component of the predicted motion vector
1283 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1285 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1286 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1288 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1290 if(left_ref == ref){
1299 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1301 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1303 if(diagonal_ref == ref){
1311 pred_motion(h, n, 2, list, ref, mx, my);
1314 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1315 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1316 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1318 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1320 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1321 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1322 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1328 pred_motion(h, 0, 4, 0, 0, mx, my);
1333 static inline void direct_dist_scale_factor(H264Context * const h){
1334 const int poc = h->s.current_picture_ptr->poc;
1335 const int poc1 = h->ref_list[1][0].poc;
1337 for(i=0; i<h->ref_count[0]; i++){
1338 int poc0 = h->ref_list[0][i].poc;
1339 int td = clip(poc1 - poc0, -128, 127);
1340 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1341 h->dist_scale_factor[i] = 256;
1343 int tb = clip(poc - poc0, -128, 127);
1344 int tx = (16384 + (ABS(td) >> 1)) / td;
1345 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1349 for(i=0; i<h->ref_count[0]; i++){
1350 h->dist_scale_factor_field[2*i] =
1351 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
1355 static inline void direct_ref_list_init(H264Context * const h){
1356 MpegEncContext * const s = &h->s;
1357 Picture * const ref1 = &h->ref_list[1][0];
1358 Picture * const cur = s->current_picture_ptr;
1360 if(cur->pict_type == I_TYPE)
1361 cur->ref_count[0] = 0;
1362 if(cur->pict_type != B_TYPE)
1363 cur->ref_count[1] = 0;
1364 for(list=0; list<2; list++){
1365 cur->ref_count[list] = h->ref_count[list];
1366 for(j=0; j<h->ref_count[list]; j++)
1367 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1369 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1371 for(list=0; list<2; list++){
1372 for(i=0; i<ref1->ref_count[list]; i++){
1373 const int poc = ref1->ref_poc[list][i];
1374 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1375 for(j=0; j<h->ref_count[list]; j++)
1376 if(h->ref_list[list][j].poc == poc){
1377 h->map_col_to_list0[list][i] = j;
1383 for(list=0; list<2; list++){
1384 for(i=0; i<ref1->ref_count[list]; i++){
1385 j = h->map_col_to_list0[list][i];
1386 h->map_col_to_list0_field[list][2*i] = 2*j;
1387 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1393 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1394 MpegEncContext * const s = &h->s;
1395 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1396 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1397 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1398 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1399 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1400 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1401 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1402 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1403 const int is_b8x8 = IS_8X8(*mb_type);
1407 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1408 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1409 /* FIXME save sub mb types from previous frames (or derive from MVs)
1410 * so we know exactly what block size to use */
1411 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1412 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1413 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1414 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1415 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1417 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1418 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1421 *mb_type |= MB_TYPE_DIRECT2;
1423 *mb_type |= MB_TYPE_INTERLACED;
1425 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1427 if(h->direct_spatial_mv_pred){
1432 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1434 /* ref = min(neighbors) */
1435 for(list=0; list<2; list++){
1436 int refa = h->ref_cache[list][scan8[0] - 1];
1437 int refb = h->ref_cache[list][scan8[0] - 8];
1438 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1440 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1442 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1444 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1450 if(ref[0] < 0 && ref[1] < 0){
1451 ref[0] = ref[1] = 0;
1452 mv[0][0] = mv[0][1] =
1453 mv[1][0] = mv[1][1] = 0;
1455 for(list=0; list<2; list++){
1457 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1459 mv[list][0] = mv[list][1] = 0;
1464 *mb_type &= ~MB_TYPE_P0L1;
1465 sub_mb_type &= ~MB_TYPE_P0L1;
1466 }else if(ref[0] < 0){
1467 *mb_type &= ~MB_TYPE_P0L0;
1468 sub_mb_type &= ~MB_TYPE_P0L0;
1471 if(IS_16X16(*mb_type)){
1472 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1473 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1474 if(!IS_INTRA(mb_type_col)
1475 && ( (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1)
1476 || (l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1
1477 && (h->x264_build>33 || !h->x264_build)))){
1479 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1481 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1483 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1485 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1487 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1488 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1491 for(i8=0; i8<4; i8++){
1492 const int x8 = i8&1;
1493 const int y8 = i8>>1;
1495 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1497 h->sub_mb_type[i8] = sub_mb_type;
1499 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1500 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1501 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1502 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1505 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1506 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1507 && (h->x264_build>33 || !h->x264_build)))){
1508 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1509 if(IS_SUB_8X8(sub_mb_type)){
1510 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1511 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1513 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1515 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1518 for(i4=0; i4<4; i4++){
1519 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1520 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1522 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1524 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1530 }else{ /* direct temporal mv pred */
1531 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1532 const int *dist_scale_factor = h->dist_scale_factor;
1535 if(IS_INTERLACED(*mb_type)){
1536 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1537 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1538 dist_scale_factor = h->dist_scale_factor_field;
1540 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1541 /* FIXME assumes direct_8x8_inference == 1 */
1542 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1543 int mb_types_col[2];
1546 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1547 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1548 | (*mb_type & MB_TYPE_INTERLACED);
1549 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1551 if(IS_INTERLACED(*mb_type)){
1552 /* frame to field scaling */
1553 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1554 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1556 l1ref0 -= 2*h->b8_stride;
1557 l1ref1 -= 2*h->b8_stride;
1558 l1mv0 -= 4*h->b_stride;
1559 l1mv1 -= 4*h->b_stride;
1563 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1564 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1566 *mb_type |= MB_TYPE_16x8;
1568 *mb_type |= MB_TYPE_8x8;
1570 /* field to frame scaling */
1571 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1572 * but in MBAFF, top and bottom POC are equal */
1573 int dy = (s->mb_y&1) ? 1 : 2;
1575 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1576 l1ref0 += dy*h->b8_stride;
1577 l1ref1 += dy*h->b8_stride;
1578 l1mv0 += 2*dy*h->b_stride;
1579 l1mv1 += 2*dy*h->b_stride;
1582 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1584 *mb_type |= MB_TYPE_16x16;
1586 *mb_type |= MB_TYPE_8x8;
1589 for(i8=0; i8<4; i8++){
1590 const int x8 = i8&1;
1591 const int y8 = i8>>1;
1593 const int16_t (*l1mv)[2]= l1mv0;
1595 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1597 h->sub_mb_type[i8] = sub_mb_type;
1599 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1600 if(IS_INTRA(mb_types_col[y8])){
1601 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1602 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1603 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1607 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1609 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1611 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1614 scale = dist_scale_factor[ref0];
1615 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1618 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1619 int my_col = (mv_col[1]<<y_shift)/2;
1620 int mx = (scale * mv_col[0] + 128) >> 8;
1621 int my = (scale * my_col + 128) >> 8;
1622 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1623 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1630 /* one-to-one mv scaling */
1632 if(IS_16X16(*mb_type)){
1633 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1634 if(IS_INTRA(mb_type_col)){
1635 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1636 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1637 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1639 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1640 : map_col_to_list0[1][l1ref1[0]];
1641 const int scale = dist_scale_factor[ref0];
1642 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1644 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1645 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1646 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1647 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1648 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1651 for(i8=0; i8<4; i8++){
1652 const int x8 = i8&1;
1653 const int y8 = i8>>1;
1655 const int16_t (*l1mv)[2]= l1mv0;
1657 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1659 h->sub_mb_type[i8] = sub_mb_type;
1660 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1661 if(IS_INTRA(mb_type_col)){
1662 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1663 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1664 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1668 ref0 = l1ref0[x8 + y8*h->b8_stride];
1670 ref0 = map_col_to_list0[0][ref0];
1672 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1675 scale = dist_scale_factor[ref0];
1677 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1678 if(IS_SUB_8X8(sub_mb_type)){
1679 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1680 int mx = (scale * mv_col[0] + 128) >> 8;
1681 int my = (scale * mv_col[1] + 128) >> 8;
1682 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1683 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1685 for(i4=0; i4<4; i4++){
1686 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1687 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1688 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1689 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1690 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1691 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1698 static inline void write_back_motion(H264Context *h, int mb_type){
1699 MpegEncContext * const s = &h->s;
1700 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1701 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1704 if(!USES_LIST(mb_type, 0))
1705 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1707 for(list=0; list<2; list++){
1709 if(!USES_LIST(mb_type, list))
1713 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1714 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1716 if( h->pps.cabac ) {
1718 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1719 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1724 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1725 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1726 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1727 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1728 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1732 if(h->slice_type == B_TYPE && h->pps.cabac){
1733 if(IS_8X8(mb_type)){
1734 uint8_t *direct_table = &h->direct_table[b8_xy];
1735 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1736 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1737 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1743 * Decodes a network abstraction layer unit.
1744 * @param consumed is the number of bytes used as input
1745 * @param length is the length of the array
1746 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1747 * @returns decoded bytes, might be src+1 if no escapes
1749 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1753 // src[0]&0x80; //forbidden bit
1754 h->nal_ref_idc= src[0]>>5;
1755 h->nal_unit_type= src[0]&0x1F;
1759 for(i=0; i<length; i++)
1760 printf("%2X ", src[i]);
1762 for(i=0; i+1<length; i+=2){
1763 if(src[i]) continue;
1764 if(i>0 && src[i-1]==0) i--;
1765 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1767 /* startcode, so we must be past the end */
1774 if(i>=length-1){ //no escaped 0
1775 *dst_length= length;
1776 *consumed= length+1; //+1 for the header
1780 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1781 dst= h->rbsp_buffer;
1783 //printf("decoding esc\n");
1786 //remove escapes (very rare 1:2^22)
1787 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1788 if(src[si+2]==3){ //escape
1793 }else //next start code
1797 dst[di++]= src[si++];
1801 *consumed= si + 1;//+1 for the header
1802 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1808 * @param src the data which should be escaped
1809 * @param dst the target buffer, dst+1 == src is allowed as a special case
1810 * @param length the length of the src data
1811 * @param dst_length the length of the dst array
1812 * @returns length of escaped data in bytes or -1 if an error occured
1814 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1815 int i, escape_count, si, di;
1819 assert(dst_length>0);
1821 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1823 if(length==0) return 1;
1826 for(i=0; i<length; i+=2){
1827 if(src[i]) continue;
1828 if(i>0 && src[i-1]==0)
1830 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1836 if(escape_count==0){
1838 memcpy(dst+1, src, length);
1842 if(length + escape_count + 1> dst_length)
1845 //this should be damn rare (hopefully)
1847 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1848 temp= h->rbsp_buffer;
1849 //printf("encoding esc\n");
1854 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1855 temp[di++]= 0; si++;
1856 temp[di++]= 0; si++;
1858 temp[di++]= src[si++];
1861 temp[di++]= src[si++];
1863 memcpy(dst+1, temp, length+escape_count);
1865 assert(di == length+escape_count);
1871 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1873 static void encode_rbsp_trailing(PutBitContext *pb){
1876 length= (-put_bits_count(pb))&7;
1877 if(length) put_bits(pb, length, 0);
1882 * identifies the exact end of the bitstream
1883 * @return the length of the trailing, or 0 if damaged
1885 static int decode_rbsp_trailing(uint8_t *src){
1889 tprintf("rbsp trailing %X\n", v);
1899 * idct tranforms the 16 dc values and dequantize them.
1900 * @param qp quantization parameter
1902 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1905 int temp[16]; //FIXME check if this is a good idea
1906 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1907 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1909 //memset(block, 64, 2*256);
1912 const int offset= y_offset[i];
1913 const int z0= block[offset+stride*0] + block[offset+stride*4];
1914 const int z1= block[offset+stride*0] - block[offset+stride*4];
1915 const int z2= block[offset+stride*1] - block[offset+stride*5];
1916 const int z3= block[offset+stride*1] + block[offset+stride*5];
1925 const int offset= x_offset[i];
1926 const int z0= temp[4*0+i] + temp[4*2+i];
1927 const int z1= temp[4*0+i] - temp[4*2+i];
1928 const int z2= temp[4*1+i] - temp[4*3+i];
1929 const int z3= temp[4*1+i] + temp[4*3+i];
1931 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1932 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1933 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1934 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1940 * dct tranforms the 16 dc values.
1941 * @param qp quantization parameter ??? FIXME
1943 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1944 // const int qmul= dequant_coeff[qp][0];
1946 int temp[16]; //FIXME check if this is a good idea
1947 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1948 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1951 const int offset= y_offset[i];
1952 const int z0= block[offset+stride*0] + block[offset+stride*4];
1953 const int z1= block[offset+stride*0] - block[offset+stride*4];
1954 const int z2= block[offset+stride*1] - block[offset+stride*5];
1955 const int z3= block[offset+stride*1] + block[offset+stride*5];
1964 const int offset= x_offset[i];
1965 const int z0= temp[4*0+i] + temp[4*2+i];
1966 const int z1= temp[4*0+i] - temp[4*2+i];
1967 const int z2= temp[4*1+i] - temp[4*3+i];
1968 const int z3= temp[4*1+i] + temp[4*3+i];
1970 block[stride*0 +offset]= (z0 + z3)>>1;
1971 block[stride*2 +offset]= (z1 + z2)>>1;
1972 block[stride*8 +offset]= (z1 - z2)>>1;
1973 block[stride*10+offset]= (z0 - z3)>>1;
1981 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1982 const int stride= 16*2;
1983 const int xStride= 16;
1986 a= block[stride*0 + xStride*0];
1987 b= block[stride*0 + xStride*1];
1988 c= block[stride*1 + xStride*0];
1989 d= block[stride*1 + xStride*1];
1996 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1997 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1998 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1999 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
2003 static void chroma_dc_dct_c(DCTELEM *block){
2004 const int stride= 16*2;
2005 const int xStride= 16;
2008 a= block[stride*0 + xStride*0];
2009 b= block[stride*0 + xStride*1];
2010 c= block[stride*1 + xStride*0];
2011 d= block[stride*1 + xStride*1];
2018 block[stride*0 + xStride*0]= (a+c);
2019 block[stride*0 + xStride*1]= (e+b);
2020 block[stride*1 + xStride*0]= (a-c);
2021 block[stride*1 + xStride*1]= (e-b);
2026 * gets the chroma qp.
2028 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
2030 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
2035 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
2037 //FIXME try int temp instead of block
2040 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
2041 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
2042 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
2043 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
2044 const int z0= d0 + d3;
2045 const int z3= d0 - d3;
2046 const int z1= d1 + d2;
2047 const int z2= d1 - d2;
2049 block[0 + 4*i]= z0 + z1;
2050 block[1 + 4*i]= 2*z3 + z2;
2051 block[2 + 4*i]= z0 - z1;
2052 block[3 + 4*i]= z3 - 2*z2;
2056 const int z0= block[0*4 + i] + block[3*4 + i];
2057 const int z3= block[0*4 + i] - block[3*4 + i];
2058 const int z1= block[1*4 + i] + block[2*4 + i];
2059 const int z2= block[1*4 + i] - block[2*4 + i];
2061 block[0*4 + i]= z0 + z1;
2062 block[1*4 + i]= 2*z3 + z2;
2063 block[2*4 + i]= z0 - z1;
2064 block[3*4 + i]= z3 - 2*z2;
2069 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
2070 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
2071 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
2073 const int * const quant_table= quant_coeff[qscale];
2074 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
2075 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
2076 const unsigned int threshold2= (threshold1<<1);
2082 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
2083 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
2084 const unsigned int dc_threshold2= (dc_threshold1<<1);
2086 int level= block[0]*quant_coeff[qscale+18][0];
2087 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2089 level= (dc_bias + level)>>(QUANT_SHIFT-2);
2092 level= (dc_bias - level)>>(QUANT_SHIFT-2);
2095 // last_non_zero = i;
2100 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
2101 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
2102 const unsigned int dc_threshold2= (dc_threshold1<<1);
2104 int level= block[0]*quant_table[0];
2105 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2107 level= (dc_bias + level)>>(QUANT_SHIFT+1);
2110 level= (dc_bias - level)>>(QUANT_SHIFT+1);
2113 // last_non_zero = i;
2126 const int j= scantable[i];
2127 int level= block[j]*quant_table[j];
2129 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
2130 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
2131 if(((unsigned)(level+threshold1))>threshold2){
2133 level= (bias + level)>>QUANT_SHIFT;
2136 level= (bias - level)>>QUANT_SHIFT;
2145 return last_non_zero;
2148 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
2149 const uint32_t a= ((uint32_t*)(src-stride))[0];
2150 ((uint32_t*)(src+0*stride))[0]= a;
2151 ((uint32_t*)(src+1*stride))[0]= a;
2152 ((uint32_t*)(src+2*stride))[0]= a;
2153 ((uint32_t*)(src+3*stride))[0]= a;
2156 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
2157 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
2158 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
2159 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
2160 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
2163 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
2164 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
2165 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
2167 ((uint32_t*)(src+0*stride))[0]=
2168 ((uint32_t*)(src+1*stride))[0]=
2169 ((uint32_t*)(src+2*stride))[0]=
2170 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2173 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
2174 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
2176 ((uint32_t*)(src+0*stride))[0]=
2177 ((uint32_t*)(src+1*stride))[0]=
2178 ((uint32_t*)(src+2*stride))[0]=
2179 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2182 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
2183 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
2185 ((uint32_t*)(src+0*stride))[0]=
2186 ((uint32_t*)(src+1*stride))[0]=
2187 ((uint32_t*)(src+2*stride))[0]=
2188 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2191 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
2192 ((uint32_t*)(src+0*stride))[0]=
2193 ((uint32_t*)(src+1*stride))[0]=
2194 ((uint32_t*)(src+2*stride))[0]=
2195 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
2199 #define LOAD_TOP_RIGHT_EDGE\
2200 const int t4= topright[0];\
2201 const int t5= topright[1];\
2202 const int t6= topright[2];\
2203 const int t7= topright[3];\
2205 #define LOAD_LEFT_EDGE\
2206 const int l0= src[-1+0*stride];\
2207 const int l1= src[-1+1*stride];\
2208 const int l2= src[-1+2*stride];\
2209 const int l3= src[-1+3*stride];\
2211 #define LOAD_TOP_EDGE\
2212 const int t0= src[ 0-1*stride];\
2213 const int t1= src[ 1-1*stride];\
2214 const int t2= src[ 2-1*stride];\
2215 const int t3= src[ 3-1*stride];\
2217 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
2218 const int lt= src[-1-1*stride];
2222 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
2224 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
2227 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
2231 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2234 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
2236 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2237 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2240 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
2245 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
2247 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
2250 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
2254 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
2257 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
2259 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
2260 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
2263 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
2264 const int lt= src[-1-1*stride];
2267 const __attribute__((unused)) int unu= l3;
2270 src[1+2*stride]=(lt + t0 + 1)>>1;
2272 src[2+2*stride]=(t0 + t1 + 1)>>1;
2274 src[3+2*stride]=(t1 + t2 + 1)>>1;
2275 src[3+0*stride]=(t2 + t3 + 1)>>1;
2277 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2279 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2281 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2282 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2283 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2284 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2287 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2290 const __attribute__((unused)) int unu= t7;
2292 src[0+0*stride]=(t0 + t1 + 1)>>1;
2294 src[0+2*stride]=(t1 + t2 + 1)>>1;
2296 src[1+2*stride]=(t2 + t3 + 1)>>1;
2298 src[2+2*stride]=(t3 + t4+ 1)>>1;
2299 src[3+2*stride]=(t4 + t5+ 1)>>1;
2300 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2302 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2304 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2306 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2307 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2310 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2313 src[0+0*stride]=(l0 + l1 + 1)>>1;
2314 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2316 src[0+1*stride]=(l1 + l2 + 1)>>1;
2318 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2320 src[0+2*stride]=(l2 + l3 + 1)>>1;
2322 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2331 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2332 const int lt= src[-1-1*stride];
2335 const __attribute__((unused)) int unu= t3;
2338 src[2+1*stride]=(lt + l0 + 1)>>1;
2340 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2341 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2342 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2344 src[2+2*stride]=(l0 + l1 + 1)>>1;
2346 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2348 src[2+3*stride]=(l1 + l2+ 1)>>1;
2350 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2351 src[0+3*stride]=(l2 + l3 + 1)>>1;
2352 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2355 static void pred16x16_vertical_c(uint8_t *src, int stride){
2357 const uint32_t a= ((uint32_t*)(src-stride))[0];
2358 const uint32_t b= ((uint32_t*)(src-stride))[1];
2359 const uint32_t c= ((uint32_t*)(src-stride))[2];
2360 const uint32_t d= ((uint32_t*)(src-stride))[3];
2362 for(i=0; i<16; i++){
2363 ((uint32_t*)(src+i*stride))[0]= a;
2364 ((uint32_t*)(src+i*stride))[1]= b;
2365 ((uint32_t*)(src+i*stride))[2]= c;
2366 ((uint32_t*)(src+i*stride))[3]= d;
2370 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2373 for(i=0; i<16; i++){
2374 ((uint32_t*)(src+i*stride))[0]=
2375 ((uint32_t*)(src+i*stride))[1]=
2376 ((uint32_t*)(src+i*stride))[2]=
2377 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2381 static void pred16x16_dc_c(uint8_t *src, int stride){
2385 dc+= src[-1+i*stride];
2392 dc= 0x01010101*((dc + 16)>>5);
2394 for(i=0; i<16; i++){
2395 ((uint32_t*)(src+i*stride))[0]=
2396 ((uint32_t*)(src+i*stride))[1]=
2397 ((uint32_t*)(src+i*stride))[2]=
2398 ((uint32_t*)(src+i*stride))[3]= dc;
2402 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2406 dc+= src[-1+i*stride];
2409 dc= 0x01010101*((dc + 8)>>4);
2411 for(i=0; i<16; i++){
2412 ((uint32_t*)(src+i*stride))[0]=
2413 ((uint32_t*)(src+i*stride))[1]=
2414 ((uint32_t*)(src+i*stride))[2]=
2415 ((uint32_t*)(src+i*stride))[3]= dc;
2419 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2425 dc= 0x01010101*((dc + 8)>>4);
2427 for(i=0; i<16; i++){
2428 ((uint32_t*)(src+i*stride))[0]=
2429 ((uint32_t*)(src+i*stride))[1]=
2430 ((uint32_t*)(src+i*stride))[2]=
2431 ((uint32_t*)(src+i*stride))[3]= dc;
2435 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2438 for(i=0; i<16; i++){
2439 ((uint32_t*)(src+i*stride))[0]=
2440 ((uint32_t*)(src+i*stride))[1]=
2441 ((uint32_t*)(src+i*stride))[2]=
2442 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2446 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2449 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2450 const uint8_t * const src0 = src+7-stride;
2451 const uint8_t *src1 = src+8*stride-1;
2452 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2453 int H = src0[1] - src0[-1];
2454 int V = src1[0] - src2[ 0];
2455 for(k=2; k<=8; ++k) {
2456 src1 += stride; src2 -= stride;
2457 H += k*(src0[k] - src0[-k]);
2458 V += k*(src1[0] - src2[ 0]);
2461 H = ( 5*(H/4) ) / 16;
2462 V = ( 5*(V/4) ) / 16;
2464 /* required for 100% accuracy */
2465 i = H; H = V; V = i;
2467 H = ( 5*H+32 ) >> 6;
2468 V = ( 5*V+32 ) >> 6;
2471 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2472 for(j=16; j>0; --j) {
2475 for(i=-16; i<0; i+=4) {
2476 src[16+i] = cm[ (b ) >> 5 ];
2477 src[17+i] = cm[ (b+ H) >> 5 ];
2478 src[18+i] = cm[ (b+2*H) >> 5 ];
2479 src[19+i] = cm[ (b+3*H) >> 5 ];
2486 static void pred16x16_plane_c(uint8_t *src, int stride){
2487 pred16x16_plane_compat_c(src, stride, 0);
2490 static void pred8x8_vertical_c(uint8_t *src, int stride){
2492 const uint32_t a= ((uint32_t*)(src-stride))[0];
2493 const uint32_t b= ((uint32_t*)(src-stride))[1];
2496 ((uint32_t*)(src+i*stride))[0]= a;
2497 ((uint32_t*)(src+i*stride))[1]= b;
2501 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2505 ((uint32_t*)(src+i*stride))[0]=
2506 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2510 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2514 ((uint32_t*)(src+i*stride))[0]=
2515 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2519 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2525 dc0+= src[-1+i*stride];
2526 dc2+= src[-1+(i+4)*stride];
2528 dc0= 0x01010101*((dc0 + 2)>>2);
2529 dc2= 0x01010101*((dc2 + 2)>>2);
2532 ((uint32_t*)(src+i*stride))[0]=
2533 ((uint32_t*)(src+i*stride))[1]= dc0;
2536 ((uint32_t*)(src+i*stride))[0]=
2537 ((uint32_t*)(src+i*stride))[1]= dc2;
2541 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2547 dc0+= src[i-stride];
2548 dc1+= src[4+i-stride];
2550 dc0= 0x01010101*((dc0 + 2)>>2);
2551 dc1= 0x01010101*((dc1 + 2)>>2);
2554 ((uint32_t*)(src+i*stride))[0]= dc0;
2555 ((uint32_t*)(src+i*stride))[1]= dc1;
2558 ((uint32_t*)(src+i*stride))[0]= dc0;
2559 ((uint32_t*)(src+i*stride))[1]= dc1;
2564 static void pred8x8_dc_c(uint8_t *src, int stride){
2566 int dc0, dc1, dc2, dc3;
2570 dc0+= src[-1+i*stride] + src[i-stride];
2571 dc1+= src[4+i-stride];
2572 dc2+= src[-1+(i+4)*stride];
2574 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2575 dc0= 0x01010101*((dc0 + 4)>>3);
2576 dc1= 0x01010101*((dc1 + 2)>>2);
2577 dc2= 0x01010101*((dc2 + 2)>>2);
2580 ((uint32_t*)(src+i*stride))[0]= dc0;
2581 ((uint32_t*)(src+i*stride))[1]= dc1;
2584 ((uint32_t*)(src+i*stride))[0]= dc2;
2585 ((uint32_t*)(src+i*stride))[1]= dc3;
2589 static void pred8x8_plane_c(uint8_t *src, int stride){
2592 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2593 const uint8_t * const src0 = src+3-stride;
2594 const uint8_t *src1 = src+4*stride-1;
2595 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2596 int H = src0[1] - src0[-1];
2597 int V = src1[0] - src2[ 0];
2598 for(k=2; k<=4; ++k) {
2599 src1 += stride; src2 -= stride;
2600 H += k*(src0[k] - src0[-k]);
2601 V += k*(src1[0] - src2[ 0]);
2603 H = ( 17*H+16 ) >> 5;
2604 V = ( 17*V+16 ) >> 5;
2606 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2607 for(j=8; j>0; --j) {
2610 src[0] = cm[ (b ) >> 5 ];
2611 src[1] = cm[ (b+ H) >> 5 ];
2612 src[2] = cm[ (b+2*H) >> 5 ];
2613 src[3] = cm[ (b+3*H) >> 5 ];
2614 src[4] = cm[ (b+4*H) >> 5 ];
2615 src[5] = cm[ (b+5*H) >> 5 ];
2616 src[6] = cm[ (b+6*H) >> 5 ];
2617 src[7] = cm[ (b+7*H) >> 5 ];
2622 #define SRC(x,y) src[(x)+(y)*stride]
2624 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2625 #define PREDICT_8x8_LOAD_LEFT \
2626 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2627 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2628 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2629 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2632 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2633 #define PREDICT_8x8_LOAD_TOP \
2634 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2635 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2636 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2637 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2638 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2641 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2642 #define PREDICT_8x8_LOAD_TOPRIGHT \
2643 int t8, t9, t10, t11, t12, t13, t14, t15; \
2644 if(has_topright) { \
2645 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2646 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2647 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2649 #define PREDICT_8x8_LOAD_TOPLEFT \
2650 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2652 #define PREDICT_8x8_DC(v) \
2654 for( y = 0; y < 8; y++ ) { \
2655 ((uint32_t*)src)[0] = \
2656 ((uint32_t*)src)[1] = v; \
2660 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2662 PREDICT_8x8_DC(0x80808080);
2664 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2666 PREDICT_8x8_LOAD_LEFT;
2667 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2670 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2672 PREDICT_8x8_LOAD_TOP;
2673 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2676 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2678 PREDICT_8x8_LOAD_LEFT;
2679 PREDICT_8x8_LOAD_TOP;
2680 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2681 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2684 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2686 PREDICT_8x8_LOAD_LEFT;
2687 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2688 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2689 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2692 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2695 PREDICT_8x8_LOAD_TOP;
2704 for( y = 1; y < 8; y++ )
2705 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2707 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2709 PREDICT_8x8_LOAD_TOP;
2710 PREDICT_8x8_LOAD_TOPRIGHT;
2711 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2712 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2713 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2714 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2715 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2716 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2717 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2718 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2719 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2720 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2721 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2722 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2723 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2724 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2725 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2727 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2729 PREDICT_8x8_LOAD_TOP;
2730 PREDICT_8x8_LOAD_LEFT;
2731 PREDICT_8x8_LOAD_TOPLEFT;
2732 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2733 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2734 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2735 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2736 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2737 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2738 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2739 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2740 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2741 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2742 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2743 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2744 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2745 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2746 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2749 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2751 PREDICT_8x8_LOAD_TOP;
2752 PREDICT_8x8_LOAD_LEFT;
2753 PREDICT_8x8_LOAD_TOPLEFT;
2754 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2755 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2756 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2757 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2758 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2759 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2760 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2761 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2762 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2763 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2764 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2765 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2766 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2767 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2768 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2769 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2770 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2771 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2772 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2773 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2774 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2775 SRC(7,0)= (t6 + t7 + 1) >> 1;
2777 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2779 PREDICT_8x8_LOAD_TOP;
2780 PREDICT_8x8_LOAD_LEFT;
2781 PREDICT_8x8_LOAD_TOPLEFT;
2782 SRC(0,7)= (l6 + l7 + 1) >> 1;
2783 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2784 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2785 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2786 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2787 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2788 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2789 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2790 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2791 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2792 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2793 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2794 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2795 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2796 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2797 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2798 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2799 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2800 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2801 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2802 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2803 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2805 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2807 PREDICT_8x8_LOAD_TOP;
2808 PREDICT_8x8_LOAD_TOPRIGHT;
2809 SRC(0,0)= (t0 + t1 + 1) >> 1;
2810 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2811 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2812 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2813 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2814 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2815 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2816 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2817 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2818 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2819 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2820 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2821 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2822 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2823 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2824 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2825 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2826 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2827 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2828 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2829 SRC(7,6)= (t10 + t11 + 1) >> 1;
2830 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2832 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2834 PREDICT_8x8_LOAD_LEFT;
2835 SRC(0,0)= (l0 + l1 + 1) >> 1;
2836 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2837 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2838 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2839 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2840 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2841 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2842 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2843 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2844 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2845 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2846 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2847 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2848 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2849 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2850 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2851 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2852 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2854 #undef PREDICT_8x8_LOAD_LEFT
2855 #undef PREDICT_8x8_LOAD_TOP
2856 #undef PREDICT_8x8_LOAD_TOPLEFT
2857 #undef PREDICT_8x8_LOAD_TOPRIGHT
2858 #undef PREDICT_8x8_DC
2864 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2865 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2866 int src_x_offset, int src_y_offset,
2867 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2868 MpegEncContext * const s = &h->s;
2869 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2870 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2871 const int luma_xy= (mx&3) + ((my&3)<<2);
2872 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2873 uint8_t * src_cb, * src_cr;
2874 int extra_width= h->emu_edge_width;
2875 int extra_height= h->emu_edge_height;
2877 const int full_mx= mx>>2;
2878 const int full_my= my>>2;
2879 const int pic_width = 16*s->mb_width;
2880 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2885 if(mx&7) extra_width -= 3;
2886 if(my&7) extra_height -= 3;
2888 if( full_mx < 0-extra_width
2889 || full_my < 0-extra_height
2890 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2891 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2892 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2893 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2897 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2899 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2902 if(s->flags&CODEC_FLAG_GRAY) return;
2905 // chroma offset when predicting from a field of opposite parity
2906 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2907 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2909 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2910 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2913 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2914 src_cb= s->edge_emu_buffer;
2916 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2919 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2920 src_cr= s->edge_emu_buffer;
2922 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2925 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2926 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2927 int x_offset, int y_offset,
2928 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2929 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2930 int list0, int list1){
2931 MpegEncContext * const s = &h->s;
2932 qpel_mc_func *qpix_op= qpix_put;
2933 h264_chroma_mc_func chroma_op= chroma_put;
2935 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2936 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2937 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2938 x_offset += 8*s->mb_x;
2939 y_offset += 8*(s->mb_y >> MB_MBAFF);
2942 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2943 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2944 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2945 qpix_op, chroma_op);
2948 chroma_op= chroma_avg;
2952 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2953 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2954 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2955 qpix_op, chroma_op);
2959 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2960 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2961 int x_offset, int y_offset,
2962 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2963 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2964 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2965 int list0, int list1){
2966 MpegEncContext * const s = &h->s;
2968 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2969 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2970 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2971 x_offset += 8*s->mb_x;
2972 y_offset += 8*(s->mb_y >> MB_MBAFF);
2975 /* don't optimize for luma-only case, since B-frames usually
2976 * use implicit weights => chroma too. */
2977 uint8_t *tmp_cb = s->obmc_scratchpad;
2978 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2979 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2980 int refn0 = h->ref_cache[0][ scan8[n] ];
2981 int refn1 = h->ref_cache[1][ scan8[n] ];
2983 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2984 dest_y, dest_cb, dest_cr,
2985 x_offset, y_offset, qpix_put, chroma_put);
2986 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2987 tmp_y, tmp_cb, tmp_cr,
2988 x_offset, y_offset, qpix_put, chroma_put);
2990 if(h->use_weight == 2){
2991 int weight0 = h->implicit_weight[refn0][refn1];
2992 int weight1 = 64 - weight0;
2993 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2994 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
2995 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
2997 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
2998 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2999 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
3000 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3001 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
3002 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
3003 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3004 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
3005 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
3008 int list = list1 ? 1 : 0;
3009 int refn = h->ref_cache[list][ scan8[n] ];
3010 Picture *ref= &h->ref_list[list][refn];
3011 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
3012 dest_y, dest_cb, dest_cr, x_offset, y_offset,
3013 qpix_put, chroma_put);
3015 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
3016 h->luma_weight[list][refn], h->luma_offset[list][refn]);
3017 if(h->use_weight_chroma){
3018 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3019 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
3020 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3021 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
3026 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
3027 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3028 int x_offset, int y_offset,
3029 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
3030 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
3031 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
3032 int list0, int list1){
3033 if((h->use_weight==2 && list0 && list1
3034 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
3035 || h->use_weight==1)
3036 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3037 x_offset, y_offset, qpix_put, chroma_put,
3038 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
3040 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3041 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
3044 static inline void prefetch_motion(H264Context *h, int list){
3045 /* fetch pixels for estimated mv 4 macroblocks ahead
3046 * optimized for 64byte cache lines */
3047 MpegEncContext * const s = &h->s;
3048 const int refn = h->ref_cache[list][scan8[0]];
3050 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
3051 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
3052 uint8_t **src= h->ref_list[list][refn].data;
3053 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
3054 s->dsp.prefetch(src[0]+off, s->linesize, 4);
3055 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3056 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
3060 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3061 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
3062 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
3063 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
3064 MpegEncContext * const s = &h->s;
3065 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
3066 const int mb_type= s->current_picture.mb_type[mb_xy];
3068 assert(IS_INTER(mb_type));
3070 prefetch_motion(h, 0);
3072 if(IS_16X16(mb_type)){
3073 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
3074 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
3075 &weight_op[0], &weight_avg[0],
3076 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3077 }else if(IS_16X8(mb_type)){
3078 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
3079 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3080 &weight_op[1], &weight_avg[1],
3081 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3082 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
3083 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3084 &weight_op[1], &weight_avg[1],
3085 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3086 }else if(IS_8X16(mb_type)){
3087 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
3088 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3089 &weight_op[2], &weight_avg[2],
3090 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3091 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
3092 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3093 &weight_op[2], &weight_avg[2],
3094 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3098 assert(IS_8X8(mb_type));
3101 const int sub_mb_type= h->sub_mb_type[i];
3103 int x_offset= (i&1)<<2;
3104 int y_offset= (i&2)<<1;
3106 if(IS_SUB_8X8(sub_mb_type)){
3107 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3108 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3109 &weight_op[3], &weight_avg[3],
3110 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3111 }else if(IS_SUB_8X4(sub_mb_type)){
3112 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3113 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3114 &weight_op[4], &weight_avg[4],
3115 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3116 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
3117 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3118 &weight_op[4], &weight_avg[4],
3119 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3120 }else if(IS_SUB_4X8(sub_mb_type)){
3121 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3122 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3123 &weight_op[5], &weight_avg[5],
3124 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3125 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
3126 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3127 &weight_op[5], &weight_avg[5],
3128 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3131 assert(IS_SUB_4X4(sub_mb_type));
3133 int sub_x_offset= x_offset + 2*(j&1);
3134 int sub_y_offset= y_offset + (j&2);
3135 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
3136 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3137 &weight_op[6], &weight_avg[6],
3138 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3144 prefetch_motion(h, 1);
3147 static void decode_init_vlc(H264Context *h){
3148 static int done = 0;
3154 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
3155 &chroma_dc_coeff_token_len [0], 1, 1,
3156 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
3159 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
3160 &coeff_token_len [i][0], 1, 1,
3161 &coeff_token_bits[i][0], 1, 1, 1);
3165 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
3166 &chroma_dc_total_zeros_len [i][0], 1, 1,
3167 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
3169 for(i=0; i<15; i++){
3170 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
3171 &total_zeros_len [i][0], 1, 1,
3172 &total_zeros_bits[i][0], 1, 1, 1);
3176 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
3177 &run_len [i][0], 1, 1,
3178 &run_bits[i][0], 1, 1, 1);
3180 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
3181 &run_len [6][0], 1, 1,
3182 &run_bits[6][0], 1, 1, 1);
3187 * Sets the intra prediction function pointers.
3189 static void init_pred_ptrs(H264Context *h){
3190 // MpegEncContext * const s = &h->s;
3192 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
3193 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
3194 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
3195 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
3196 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
3197 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
3198 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
3199 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
3200 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
3201 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
3202 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
3203 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
3205 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
3206 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
3207 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
3208 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
3209 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
3210 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
3211 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
3212 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
3213 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
3214 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
3215 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
3216 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
3218 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
3219 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
3220 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
3221 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
3222 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
3223 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
3224 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
3226 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
3227 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
3228 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
3229 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
3230 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
3231 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
3232 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
3235 static void free_tables(H264Context *h){
3236 av_freep(&h->intra4x4_pred_mode);
3237 av_freep(&h->chroma_pred_mode_table);
3238 av_freep(&h->cbp_table);
3239 av_freep(&h->mvd_table[0]);
3240 av_freep(&h->mvd_table[1]);
3241 av_freep(&h->direct_table);
3242 av_freep(&h->non_zero_count);
3243 av_freep(&h->slice_table_base);
3244 av_freep(&h->top_borders[1]);
3245 av_freep(&h->top_borders[0]);
3246 h->slice_table= NULL;
3248 av_freep(&h->mb2b_xy);
3249 av_freep(&h->mb2b8_xy);
3251 av_freep(&h->s.obmc_scratchpad);
3254 static void init_dequant8_coeff_table(H264Context *h){
3256 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
3257 h->dequant8_coeff[0] = h->dequant8_buffer[0];
3258 h->dequant8_coeff[1] = h->dequant8_buffer[1];
3260 for(i=0; i<2; i++ ){
3261 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
3262 h->dequant8_coeff[1] = h->dequant8_buffer[0];
3266 for(q=0; q<52; q++){
3267 int shift = div6[q];
3270 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
3271 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
3272 h->pps.scaling_matrix8[i][x]) << shift;
3277 static void init_dequant4_coeff_table(H264Context *h){
3279 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
3280 for(i=0; i<6; i++ ){
3281 h->dequant4_coeff[i] = h->dequant4_buffer[i];
3283 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
3284 h->dequant4_coeff[i] = h->dequant4_buffer[j];
3291 for(q=0; q<52; q++){
3292 int shift = div6[q] + 2;
3295 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
3296 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
3297 h->pps.scaling_matrix4[i][x]) << shift;
3302 static void init_dequant_tables(H264Context *h){
3304 init_dequant4_coeff_table(h);
3305 if(h->pps.transform_8x8_mode)
3306 init_dequant8_coeff_table(h);
3307 if(h->sps.transform_bypass){
3310 h->dequant4_coeff[i][0][x] = 1<<6;
3311 if(h->pps.transform_8x8_mode)
3314 h->dequant8_coeff[i][0][x] = 1<<6;
3321 * needs width/height
3323 static int alloc_tables(H264Context *h){
3324 MpegEncContext * const s = &h->s;
3325 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3328 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3330 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3331 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
3332 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3333 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3334 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3336 if( h->pps.cabac ) {
3337 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3338 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3339 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3340 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3343 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
3344 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
3346 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3347 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3348 for(y=0; y<s->mb_height; y++){
3349 for(x=0; x<s->mb_width; x++){
3350 const int mb_xy= x + y*s->mb_stride;
3351 const int b_xy = 4*x + 4*y*h->b_stride;
3352 const int b8_xy= 2*x + 2*y*h->b8_stride;
3354 h->mb2b_xy [mb_xy]= b_xy;
3355 h->mb2b8_xy[mb_xy]= b8_xy;
3359 s->obmc_scratchpad = NULL;
3361 if(!h->dequant4_coeff[0])
3362 init_dequant_tables(h);
3370 static void common_init(H264Context *h){
3371 MpegEncContext * const s = &h->s;
3373 s->width = s->avctx->width;
3374 s->height = s->avctx->height;
3375 s->codec_id= s->avctx->codec->id;
3379 h->dequant_coeff_pps= -1;
3380 s->unrestricted_mv=1;
3381 s->decode=1; //FIXME
3383 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3384 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3387 static int decode_init(AVCodecContext *avctx){
3388 H264Context *h= avctx->priv_data;
3389 MpegEncContext * const s = &h->s;
3391 MPV_decode_defaults(s);
3396 s->out_format = FMT_H264;
3397 s->workaround_bugs= avctx->workaround_bugs;
3400 // s->decode_mb= ff_h263_decode_mb;
3402 avctx->pix_fmt= PIX_FMT_YUV420P;
3406 if(avctx->extradata_size > 0 && avctx->extradata &&
3407 *(char *)avctx->extradata == 1){
3417 static int frame_start(H264Context *h){
3418 MpegEncContext * const s = &h->s;
3421 if(MPV_frame_start(s, s->avctx) < 0)
3423 ff_er_frame_start(s);
3425 assert(s->linesize && s->uvlinesize);
3427 for(i=0; i<16; i++){
3428 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3429 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3432 h->block_offset[16+i]=
3433 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3434 h->block_offset[24+16+i]=
3435 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3438 /* can't be in alloc_tables because linesize isn't known there.
3439 * FIXME: redo bipred weight to not require extra buffer? */
3440 if(!s->obmc_scratchpad)
3441 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
3443 /* some macroblocks will be accessed before they're available */
3445 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
3447 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3451 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3452 MpegEncContext * const s = &h->s;
3456 src_cb -= uvlinesize;
3457 src_cr -= uvlinesize;
3459 // There are two lines saved, the line above the the top macroblock of a pair,
3460 // and the line above the bottom macroblock
3461 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3462 for(i=1; i<17; i++){
3463 h->left_border[i]= src_y[15+i* linesize];
3466 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3467 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3469 if(!(s->flags&CODEC_FLAG_GRAY)){
3470 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3471 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3473 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3474 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3476 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3477 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3481 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3482 MpegEncContext * const s = &h->s;
3485 int deblock_left = (s->mb_x > 0);
3486 int deblock_top = (s->mb_y > 0);
3488 src_y -= linesize + 1;
3489 src_cb -= uvlinesize + 1;
3490 src_cr -= uvlinesize + 1;
3492 #define XCHG(a,b,t,xchg)\
3499 for(i = !deblock_top; i<17; i++){
3500 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3505 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3506 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3507 if(s->mb_x+1 < s->mb_width){
3508 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3512 if(!(s->flags&CODEC_FLAG_GRAY)){
3514 for(i = !deblock_top; i<9; i++){
3515 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3516 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3520 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3521 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3526 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3527 MpegEncContext * const s = &h->s;
3530 src_y -= 2 * linesize;
3531 src_cb -= 2 * uvlinesize;
3532 src_cr -= 2 * uvlinesize;
3534 // There are two lines saved, the line above the the top macroblock of a pair,
3535 // and the line above the bottom macroblock
3536 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3537 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3538 for(i=2; i<34; i++){
3539 h->left_border[i]= src_y[15+i* linesize];
3542 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3543 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3544 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3545 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3547 if(!(s->flags&CODEC_FLAG_GRAY)){
3548 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3549 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3550 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3551 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3552 for(i=2; i<18; i++){
3553 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3554 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3556 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3557 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3558 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3559 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3563 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3564 MpegEncContext * const s = &h->s;
3567 int deblock_left = (s->mb_x > 0);
3568 int deblock_top = (s->mb_y > 1);
3570 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3572 src_y -= 2 * linesize + 1;
3573 src_cb -= 2 * uvlinesize + 1;
3574 src_cr -= 2 * uvlinesize + 1;
3576 #define XCHG(a,b,t,xchg)\
3583 for(i = (!deblock_top)<<1; i<34; i++){
3584 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3589 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3590 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3591 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3592 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3593 if(s->mb_x+1 < s->mb_width){
3594 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3595 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3599 if(!(s->flags&CODEC_FLAG_GRAY)){
3601 for(i = (!deblock_top) << 1; i<18; i++){
3602 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3603 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3607 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3608 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3609 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3610 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3615 static void hl_decode_mb(H264Context *h){
3616 MpegEncContext * const s = &h->s;
3617 const int mb_x= s->mb_x;
3618 const int mb_y= s->mb_y;
3619 const int mb_xy= mb_x + mb_y*s->mb_stride;
3620 const int mb_type= s->current_picture.mb_type[mb_xy];
3621 uint8_t *dest_y, *dest_cb, *dest_cr;
3622 int linesize, uvlinesize /*dct_offset*/;
3624 int *block_offset = &h->block_offset[0];
3625 const unsigned int bottom = mb_y & 1;
3626 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3627 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3628 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3633 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3634 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3635 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3638 linesize = h->mb_linesize = s->linesize * 2;
3639 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3640 block_offset = &h->block_offset[24];
3641 if(mb_y&1){ //FIXME move out of this func?
3642 dest_y -= s->linesize*15;
3643 dest_cb-= s->uvlinesize*7;
3644 dest_cr-= s->uvlinesize*7;
3648 for(list=0; list<2; list++){
3649 if(!USES_LIST(mb_type, list))
3651 if(IS_16X16(mb_type)){
3652 int8_t *ref = &h->ref_cache[list][scan8[0]];
3653 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3655 for(i=0; i<16; i+=4){
3656 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3657 int ref = h->ref_cache[list][scan8[i]];
3659 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3665 linesize = h->mb_linesize = s->linesize;
3666 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3667 // dct_offset = s->linesize * 16;
3670 if(transform_bypass){
3672 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3673 }else if(IS_8x8DCT(mb_type)){
3674 idct_dc_add = s->dsp.h264_idct8_dc_add;
3675 idct_add = s->dsp.h264_idct8_add;
3677 idct_dc_add = s->dsp.h264_idct_dc_add;
3678 idct_add = s->dsp.h264_idct_add;
3681 if(FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3682 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3683 int mbt_y = mb_y&~1;
3684 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3685 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3686 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3687 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3690 if (IS_INTRA_PCM(mb_type)) {
3693 // The pixels are stored in h->mb array in the same order as levels,
3694 // copy them in output in the correct order.
3695 for(i=0; i<16; i++) {
3696 for (y=0; y<4; y++) {
3697 for (x=0; x<4; x++) {
3698 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3702 for(i=16; i<16+4; i++) {
3703 for (y=0; y<4; y++) {
3704 for (x=0; x<4; x++) {
3705 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3709 for(i=20; i<20+4; i++) {
3710 for (y=0; y<4; y++) {
3711 for (x=0; x<4; x++) {
3712 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3717 if(IS_INTRA(mb_type)){
3718 if(h->deblocking_filter && !FRAME_MBAFF)
3719 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3721 if(!(s->flags&CODEC_FLAG_GRAY)){
3722 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3723 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3726 if(IS_INTRA4x4(mb_type)){
3728 if(IS_8x8DCT(mb_type)){
3729 for(i=0; i<16; i+=4){
3730 uint8_t * const ptr= dest_y + block_offset[i];
3731 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3732 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3733 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3734 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3736 if(nnz == 1 && h->mb[i*16])
3737 idct_dc_add(ptr, h->mb + i*16, linesize);
3739 idct_add(ptr, h->mb + i*16, linesize);
3743 for(i=0; i<16; i++){
3744 uint8_t * const ptr= dest_y + block_offset[i];
3746 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3749 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3750 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3751 assert(mb_y || linesize <= block_offset[i]);
3752 if(!topright_avail){
3753 tr= ptr[3 - linesize]*0x01010101;
3754 topright= (uint8_t*) &tr;
3756 topright= ptr + 4 - linesize;
3760 h->pred4x4[ dir ](ptr, topright, linesize);
3761 nnz = h->non_zero_count_cache[ scan8[i] ];
3763 if(s->codec_id == CODEC_ID_H264){
3764 if(nnz == 1 && h->mb[i*16])
3765 idct_dc_add(ptr, h->mb + i*16, linesize);
3767 idct_add(ptr, h->mb + i*16, linesize);
3769 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3774 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3775 if(s->codec_id == CODEC_ID_H264){
3776 if(!transform_bypass)
3777 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3779 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3781 if(h->deblocking_filter && !FRAME_MBAFF)
3782 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3783 }else if(s->codec_id == CODEC_ID_H264){
3784 hl_motion(h, dest_y, dest_cb, dest_cr,
3785 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
3786 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
3787 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3791 if(!IS_INTRA4x4(mb_type)){
3792 if(s->codec_id == CODEC_ID_H264){
3793 if(IS_INTRA16x16(mb_type)){
3794 for(i=0; i<16; i++){
3795 if(h->non_zero_count_cache[ scan8[i] ])
3796 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3797 else if(h->mb[i*16])
3798 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3801 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3802 for(i=0; i<16; i+=di){
3803 int nnz = h->non_zero_count_cache[ scan8[i] ];
3805 if(nnz==1 && h->mb[i*16])
3806 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3808 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3813 for(i=0; i<16; i++){
3814 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3815 uint8_t * const ptr= dest_y + block_offset[i];
3816 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3822 if(!(s->flags&CODEC_FLAG_GRAY)){
3823 uint8_t *dest[2] = {dest_cb, dest_cr};
3824 if(transform_bypass){
3825 idct_add = idct_dc_add = s->dsp.add_pixels4;
3827 idct_add = s->dsp.h264_idct_add;
3828 idct_dc_add = s->dsp.h264_idct_dc_add;
3829 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3830 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3832 if(s->codec_id == CODEC_ID_H264){
3833 for(i=16; i<16+8; i++){
3834 if(h->non_zero_count_cache[ scan8[i] ])
3835 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3836 else if(h->mb[i*16])
3837 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3840 for(i=16; i<16+8; i++){
3841 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3842 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3843 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3849 if(h->deblocking_filter) {
3851 //FIXME try deblocking one mb at a time?
3852 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3853 const int mb_y = s->mb_y - 1;
3854 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3855 const int mb_xy= mb_x + mb_y*s->mb_stride;
3856 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3857 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3858 if (!bottom) return;
3859 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3860 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3861 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3863 if(IS_INTRA(mb_type_top | mb_type_bottom))
3864 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3866 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3870 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3871 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3872 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
3873 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3876 tprintf("call mbaff filter_mb\n");
3877 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3878 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3879 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3881 tprintf("call filter_mb\n");
3882 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3883 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3884 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3890 * fills the default_ref_list.
3892 static int fill_default_ref_list(H264Context *h){
3893 MpegEncContext * const s = &h->s;
3895 int smallest_poc_greater_than_current = -1;
3896 Picture sorted_short_ref[32];
3898 if(h->slice_type==B_TYPE){
3902 /* sort frame according to poc in B slice */
3903 for(out_i=0; out_i<h->short_ref_count; out_i++){
3905 int best_poc=INT_MAX;
3907 for(i=0; i<h->short_ref_count; i++){
3908 const int poc= h->short_ref[i]->poc;
3909 if(poc > limit && poc < best_poc){
3915 assert(best_i != INT_MIN);
3918 sorted_short_ref[out_i]= *h->short_ref[best_i];
3919 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3920 if (-1 == smallest_poc_greater_than_current) {
3921 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3922 smallest_poc_greater_than_current = out_i;
3928 if(s->picture_structure == PICT_FRAME){
3929 if(h->slice_type==B_TYPE){
3931 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3933 // find the largest poc
3934 for(list=0; list<2; list++){
3937 int step= list ? -1 : 1;
3939 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3940 while(j<0 || j>= h->short_ref_count){
3941 if(j != -99 && step == (list ? -1 : 1))
3944 j= smallest_poc_greater_than_current + (step>>1);
3946 if(sorted_short_ref[j].reference != 3) continue;
3947 h->default_ref_list[list][index ]= sorted_short_ref[j];
3948 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3951 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3952 if(h->long_ref[i] == NULL) continue;
3953 if(h->long_ref[i]->reference != 3) continue;
3955 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3956 h->default_ref_list[ list ][index++].pic_id= i;;
3959 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3960 // swap the two first elements of L1 when
3961 // L0 and L1 are identical
3962 Picture temp= h->default_ref_list[1][0];
3963 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3964 h->default_ref_list[1][1] = temp;
3967 if(index < h->ref_count[ list ])
3968 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3972 for(i=0; i<h->short_ref_count; i++){
3973 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3974 h->default_ref_list[0][index ]= *h->short_ref[i];
3975 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3977 for(i = 0; i < 16; i++){
3978 if(h->long_ref[i] == NULL) continue;
3979 if(h->long_ref[i]->reference != 3) continue;
3980 h->default_ref_list[0][index ]= *h->long_ref[i];
3981 h->default_ref_list[0][index++].pic_id= i;;
3983 if(index < h->ref_count[0])
3984 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3987 if(h->slice_type==B_TYPE){
3989 //FIXME second field balh
3993 for (i=0; i<h->ref_count[0]; i++) {
3994 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3996 if(h->slice_type==B_TYPE){
3997 for (i=0; i<h->ref_count[1]; i++) {
3998 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
4005 static void print_short_term(H264Context *h);
4006 static void print_long_term(H264Context *h);
4008 static int decode_ref_pic_list_reordering(H264Context *h){
4009 MpegEncContext * const s = &h->s;
4012 print_short_term(h);
4014 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
4016 for(list=0; list<2; list++){
4017 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
4019 if(get_bits1(&s->gb)){
4020 int pred= h->curr_pic_num;
4022 for(index=0; ; index++){
4023 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
4026 Picture *ref = NULL;
4028 if(reordering_of_pic_nums_idc==3)
4031 if(index >= h->ref_count[list]){
4032 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
4036 if(reordering_of_pic_nums_idc<3){
4037 if(reordering_of_pic_nums_idc<2){
4038 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
4040 if(abs_diff_pic_num >= h->max_pic_num){
4041 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
4045 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
4046 else pred+= abs_diff_pic_num;
4047 pred &= h->max_pic_num - 1;
4049 for(i= h->short_ref_count-1; i>=0; i--){
4050 ref = h->short_ref[i];
4051 assert(ref->reference == 3);
4052 assert(!ref->long_ref);
4053 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
4057 ref->pic_id= ref->frame_num;
4059 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
4060 ref = h->long_ref[pic_id];
4061 ref->pic_id= pic_id;
4062 assert(ref->reference == 3);
4063 assert(ref->long_ref);
4068 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
4069 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
4071 for(i=index; i+1<h->ref_count[list]; i++){
4072 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
4075 for(; i > index; i--){
4076 h->ref_list[list][i]= h->ref_list[list][i-1];
4078 h->ref_list[list][index]= *ref;
4081 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
4087 if(h->slice_type!=B_TYPE) break;
4089 for(list=0; list<2; list++){
4090 for(index= 0; index < h->ref_count[list]; index++){
4091 if(!h->ref_list[list][index].data[0])
4092 h->ref_list[list][index]= s->current_picture;
4094 if(h->slice_type!=B_TYPE) break;
4097 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
4098 direct_dist_scale_factor(h);
4099 direct_ref_list_init(h);
4103 static void fill_mbaff_ref_list(H264Context *h){
4105 for(list=0; list<2; list++){
4106 for(i=0; i<h->ref_count[list]; i++){
4107 Picture *frame = &h->ref_list[list][i];
4108 Picture *field = &h->ref_list[list][16+2*i];
4111 field[0].linesize[j] <<= 1;
4112 field[1] = field[0];
4114 field[1].data[j] += frame->linesize[j];
4116 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
4117 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
4119 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
4120 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
4124 for(j=0; j<h->ref_count[1]; j++){
4125 for(i=0; i<h->ref_count[0]; i++)
4126 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
4127 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
4128 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
4132 static int pred_weight_table(H264Context *h){
4133 MpegEncContext * const s = &h->s;
4135 int luma_def, chroma_def;
4138 h->use_weight_chroma= 0;
4139 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
4140 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
4141 luma_def = 1<<h->luma_log2_weight_denom;
4142 chroma_def = 1<<h->chroma_log2_weight_denom;
4144 for(list=0; list<2; list++){
4145 for(i=0; i<h->ref_count[list]; i++){
4146 int luma_weight_flag, chroma_weight_flag;
4148 luma_weight_flag= get_bits1(&s->gb);
4149 if(luma_weight_flag){
4150 h->luma_weight[list][i]= get_se_golomb(&s->gb);
4151 h->luma_offset[list][i]= get_se_golomb(&s->gb);
4152 if( h->luma_weight[list][i] != luma_def
4153 || h->luma_offset[list][i] != 0)
4156 h->luma_weight[list][i]= luma_def;
4157 h->luma_offset[list][i]= 0;
4160 chroma_weight_flag= get_bits1(&s->gb);
4161 if(chroma_weight_flag){
4164 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
4165 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
4166 if( h->chroma_weight[list][i][j] != chroma_def
4167 || h->chroma_offset[list][i][j] != 0)
4168 h->use_weight_chroma= 1;
4173 h->chroma_weight[list][i][j]= chroma_def;
4174 h->chroma_offset[list][i][j]= 0;
4178 if(h->slice_type != B_TYPE) break;
4180 h->use_weight= h->use_weight || h->use_weight_chroma;
4184 static void implicit_weight_table(H264Context *h){
4185 MpegEncContext * const s = &h->s;
4187 int cur_poc = s->current_picture_ptr->poc;
4189 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
4190 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
4192 h->use_weight_chroma= 0;
4197 h->use_weight_chroma= 2;
4198 h->luma_log2_weight_denom= 5;
4199 h->chroma_log2_weight_denom= 5;
4201 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
4202 int poc0 = h->ref_list[0][ref0].poc;
4203 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
4204 int poc1 = h->ref_list[1][ref1].poc;
4205 int td = clip(poc1 - poc0, -128, 127);
4207 int tb = clip(cur_poc - poc0, -128, 127);
4208 int tx = (16384 + (ABS(td) >> 1)) / td;
4209 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
4210 if(dist_scale_factor < -64 || dist_scale_factor > 128)
4211 h->implicit_weight[ref0][ref1] = 32;
4213 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
4215 h->implicit_weight[ref0][ref1] = 32;
4220 static inline void unreference_pic(H264Context *h, Picture *pic){
4223 if(pic == h->delayed_output_pic)
4226 for(i = 0; h->delayed_pic[i]; i++)
4227 if(pic == h->delayed_pic[i]){
4235 * instantaneous decoder refresh.
4237 static void idr(H264Context *h){
4240 for(i=0; i<16; i++){
4241 if (h->long_ref[i] != NULL) {
4242 unreference_pic(h, h->long_ref[i]);
4243 h->long_ref[i]= NULL;
4246 h->long_ref_count=0;
4248 for(i=0; i<h->short_ref_count; i++){
4249 unreference_pic(h, h->short_ref[i]);
4250 h->short_ref[i]= NULL;
4252 h->short_ref_count=0;
4255 /* forget old pics after a seek */
4256 static void flush_dpb(AVCodecContext *avctx){
4257 H264Context *h= avctx->priv_data;
4259 for(i=0; i<16; i++) {
4260 if(h->delayed_pic[i])
4261 h->delayed_pic[i]->reference= 0;
4262 h->delayed_pic[i]= NULL;
4264 if(h->delayed_output_pic)
4265 h->delayed_output_pic->reference= 0;
4266 h->delayed_output_pic= NULL;
4268 if(h->s.current_picture_ptr)
4269 h->s.current_picture_ptr->reference= 0;
4274 * @return the removed picture or NULL if an error occurs
4276 static Picture * remove_short(H264Context *h, int frame_num){
4277 MpegEncContext * const s = &h->s;
4280 if(s->avctx->debug&FF_DEBUG_MMCO)
4281 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
4283 for(i=0; i<h->short_ref_count; i++){
4284 Picture *pic= h->short_ref[i];
4285 if(s->avctx->debug&FF_DEBUG_MMCO)
4286 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
4287 if(pic->frame_num == frame_num){
4288 h->short_ref[i]= NULL;
4289 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
4290 h->short_ref_count--;
4299 * @return the removed picture or NULL if an error occurs
4301 static Picture * remove_long(H264Context *h, int i){
4304 pic= h->long_ref[i];
4305 h->long_ref[i]= NULL;
4306 if(pic) h->long_ref_count--;
4312 * print short term list
4314 static void print_short_term(H264Context *h) {
4316 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4317 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
4318 for(i=0; i<h->short_ref_count; i++){
4319 Picture *pic= h->short_ref[i];
4320 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4326 * print long term list
4328 static void print_long_term(H264Context *h) {
4330 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4331 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
4332 for(i = 0; i < 16; i++){
4333 Picture *pic= h->long_ref[i];
4335 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4342 * Executes the reference picture marking (memory management control operations).
4344 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
4345 MpegEncContext * const s = &h->s;
4347 int current_is_long=0;
4350 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
4351 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4353 for(i=0; i<mmco_count; i++){
4354 if(s->avctx->debug&FF_DEBUG_MMCO)
4355 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4357 switch(mmco[i].opcode){
4358 case MMCO_SHORT2UNUSED:
4359 pic= remove_short(h, mmco[i].short_frame_num);
4361 unreference_pic(h, pic);
4362 else if(s->avctx->debug&FF_DEBUG_MMCO)
4363 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4365 case MMCO_SHORT2LONG:
4366 pic= remove_long(h, mmco[i].long_index);
4367 if(pic) unreference_pic(h, pic);
4369 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4370 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4371 h->long_ref_count++;
4373 case MMCO_LONG2UNUSED:
4374 pic= remove_long(h, mmco[i].long_index);
4376 unreference_pic(h, pic);
4377 else if(s->avctx->debug&FF_DEBUG_MMCO)
4378 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4381 pic= remove_long(h, mmco[i].long_index);
4382 if(pic) unreference_pic(h, pic);
4384 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4385 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4386 h->long_ref_count++;
4390 case MMCO_SET_MAX_LONG:
4391 assert(mmco[i].long_index <= 16);
4392 // just remove the long term which index is greater than new max
4393 for(j = mmco[i].long_index; j<16; j++){
4394 pic = remove_long(h, j);
4395 if (pic) unreference_pic(h, pic);
4399 while(h->short_ref_count){
4400 pic= remove_short(h, h->short_ref[0]->frame_num);
4401 unreference_pic(h, pic);
4403 for(j = 0; j < 16; j++) {
4404 pic= remove_long(h, j);
4405 if(pic) unreference_pic(h, pic);
4412 if(!current_is_long){
4413 pic= remove_short(h, s->current_picture_ptr->frame_num);
4415 unreference_pic(h, pic);
4416 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4419 if(h->short_ref_count)
4420 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4422 h->short_ref[0]= s->current_picture_ptr;
4423 h->short_ref[0]->long_ref=0;
4424 h->short_ref_count++;
4427 print_short_term(h);
4432 static int decode_ref_pic_marking(H264Context *h){
4433 MpegEncContext * const s = &h->s;
4436 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4437 s->broken_link= get_bits1(&s->gb) -1;
4438 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4439 if(h->mmco[0].long_index == -1)
4442 h->mmco[0].opcode= MMCO_LONG;
4446 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4447 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4448 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4450 h->mmco[i].opcode= opcode;
4451 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4452 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4453 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4454 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4458 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4459 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4460 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4461 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4466 if(opcode > MMCO_LONG){
4467 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4470 if(opcode == MMCO_END)
4475 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4477 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4478 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4479 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4489 static int init_poc(H264Context *h){
4490 MpegEncContext * const s = &h->s;
4491 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4494 if(h->nal_unit_type == NAL_IDR_SLICE){
4495 h->frame_num_offset= 0;
4497 if(h->frame_num < h->prev_frame_num)
4498 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4500 h->frame_num_offset= h->prev_frame_num_offset;
4503 if(h->sps.poc_type==0){
4504 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4506 if(h->nal_unit_type == NAL_IDR_SLICE){
4511 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4512 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4513 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4514 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4516 h->poc_msb = h->prev_poc_msb;
4517 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4519 field_poc[1] = h->poc_msb + h->poc_lsb;
4520 if(s->picture_structure == PICT_FRAME)
4521 field_poc[1] += h->delta_poc_bottom;
4522 }else if(h->sps.poc_type==1){
4523 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4526 if(h->sps.poc_cycle_length != 0)
4527 abs_frame_num = h->frame_num_offset + h->frame_num;
4531 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4534 expected_delta_per_poc_cycle = 0;
4535 for(i=0; i < h->sps.poc_cycle_length; i++)
4536 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4538 if(abs_frame_num > 0){
4539 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4540 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4542 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4543 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4544 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4548 if(h->nal_ref_idc == 0)
4549 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4551 field_poc[0] = expectedpoc + h->delta_poc[0];
4552 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4554 if(s->picture_structure == PICT_FRAME)
4555 field_poc[1] += h->delta_poc[1];
4558 if(h->nal_unit_type == NAL_IDR_SLICE){
4561 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4562 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4568 if(s->picture_structure != PICT_BOTTOM_FIELD)
4569 s->current_picture_ptr->field_poc[0]= field_poc[0];
4570 if(s->picture_structure != PICT_TOP_FIELD)
4571 s->current_picture_ptr->field_poc[1]= field_poc[1];
4572 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4573 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4579 * decodes a slice header.
4580 * this will allso call MPV_common_init() and frame_start() as needed
4582 static int decode_slice_header(H264Context *h){
4583 MpegEncContext * const s = &h->s;
4584 int first_mb_in_slice, pps_id;
4585 int num_ref_idx_active_override_flag;
4586 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4588 int default_ref_list_done = 0;
4590 s->current_picture.reference= h->nal_ref_idc != 0;
4591 s->dropable= h->nal_ref_idc == 0;
4593 first_mb_in_slice= get_ue_golomb(&s->gb);
4595 slice_type= get_ue_golomb(&s->gb);
4597 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4602 h->slice_type_fixed=1;
4604 h->slice_type_fixed=0;
4606 slice_type= slice_type_map[ slice_type ];
4607 if (slice_type == I_TYPE
4608 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4609 default_ref_list_done = 1;
4611 h->slice_type= slice_type;
4613 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4615 pps_id= get_ue_golomb(&s->gb);
4617 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4620 h->pps= h->pps_buffer[pps_id];
4621 if(h->pps.slice_group_count == 0){
4622 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4626 h->sps= h->sps_buffer[ h->pps.sps_id ];
4627 if(h->sps.log2_max_frame_num == 0){
4628 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4632 if(h->dequant_coeff_pps != pps_id){
4633 h->dequant_coeff_pps = pps_id;
4634 init_dequant_tables(h);
4637 s->mb_width= h->sps.mb_width;
4638 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4640 h->b_stride= s->mb_width*4;
4641 h->b8_stride= s->mb_width*2;
4643 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4644 if(h->sps.frame_mbs_only_flag)
4645 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4647 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4649 if (s->context_initialized
4650 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4654 if (!s->context_initialized) {
4655 if (MPV_common_init(s) < 0)
4658 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4659 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4660 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4663 for(i=0; i<16; i++){
4664 #define T(x) (x>>2) | ((x<<2) & 0xF)
4665 h->zigzag_scan[i] = T(zigzag_scan[i]);
4666 h-> field_scan[i] = T( field_scan[i]);
4670 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4671 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4672 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4673 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4674 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4677 for(i=0; i<64; i++){
4678 #define T(x) (x>>3) | ((x&7)<<3)
4679 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4680 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4681 h->field_scan8x8[i] = T(field_scan8x8[i]);
4682 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4686 if(h->sps.transform_bypass){ //FIXME same ugly
4687 h->zigzag_scan_q0 = zigzag_scan;
4688 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4689 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4690 h->field_scan_q0 = field_scan;
4691 h->field_scan8x8_q0 = field_scan8x8;
4692 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4694 h->zigzag_scan_q0 = h->zigzag_scan;
4695 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4696 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4697 h->field_scan_q0 = h->field_scan;
4698 h->field_scan8x8_q0 = h->field_scan8x8;
4699 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4704 s->avctx->width = s->width;
4705 s->avctx->height = s->height;
4706 s->avctx->sample_aspect_ratio= h->sps.sar;
4707 if(!s->avctx->sample_aspect_ratio.den)
4708 s->avctx->sample_aspect_ratio.den = 1;
4710 if(h->sps.timing_info_present_flag){
4711 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4712 if(h->x264_build > 0 && h->x264_build < 44)
4713 s->avctx->time_base.den *= 2;
4714 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4715 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4719 if(h->slice_num == 0){
4720 if(frame_start(h) < 0)
4724 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4725 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4728 h->mb_aff_frame = 0;
4729 if(h->sps.frame_mbs_only_flag){
4730 s->picture_structure= PICT_FRAME;
4732 if(get_bits1(&s->gb)) { //field_pic_flag
4733 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4734 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4736 s->picture_structure= PICT_FRAME;
4737 h->mb_aff_frame = h->sps.mb_aff;
4741 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4742 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4743 if(s->mb_y >= s->mb_height){
4747 if(s->picture_structure==PICT_FRAME){
4748 h->curr_pic_num= h->frame_num;
4749 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4751 h->curr_pic_num= 2*h->frame_num;
4752 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4755 if(h->nal_unit_type == NAL_IDR_SLICE){
4756 get_ue_golomb(&s->gb); /* idr_pic_id */
4759 if(h->sps.poc_type==0){
4760 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4762 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4763 h->delta_poc_bottom= get_se_golomb(&s->gb);
4767 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4768 h->delta_poc[0]= get_se_golomb(&s->gb);
4770 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4771 h->delta_poc[1]= get_se_golomb(&s->gb);
4776 if(h->pps.redundant_pic_cnt_present){
4777 h->redundant_pic_count= get_ue_golomb(&s->gb);
4780 //set defaults, might be overriden a few line later
4781 h->ref_count[0]= h->pps.ref_count[0];
4782 h->ref_count[1]= h->pps.ref_count[1];
4784 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4785 if(h->slice_type == B_TYPE){
4786 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4787 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4788 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4790 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4792 if(num_ref_idx_active_override_flag){
4793 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4794 if(h->slice_type==B_TYPE)
4795 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4797 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4798 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4804 if(!default_ref_list_done){
4805 fill_default_ref_list(h);
4808 if(decode_ref_pic_list_reordering(h) < 0)
4811 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4812 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4813 pred_weight_table(h);
4814 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4815 implicit_weight_table(h);
4819 if(s->current_picture.reference)
4820 decode_ref_pic_marking(h);
4823 fill_mbaff_ref_list(h);
4825 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4826 h->cabac_init_idc = get_ue_golomb(&s->gb);
4828 h->last_qscale_diff = 0;
4829 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4830 if(s->qscale<0 || s->qscale>51){
4831 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4834 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4835 //FIXME qscale / qp ... stuff
4836 if(h->slice_type == SP_TYPE){
4837 get_bits1(&s->gb); /* sp_for_switch_flag */
4839 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4840 get_se_golomb(&s->gb); /* slice_qs_delta */
4843 h->deblocking_filter = 1;
4844 h->slice_alpha_c0_offset = 0;
4845 h->slice_beta_offset = 0;
4846 if( h->pps.deblocking_filter_parameters_present ) {
4847 h->deblocking_filter= get_ue_golomb(&s->gb);
4848 if(h->deblocking_filter < 2)
4849 h->deblocking_filter^= 1; // 1<->0
4851 if( h->deblocking_filter ) {
4852 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4853 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4856 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4857 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4858 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4859 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4860 h->deblocking_filter= 0;
4863 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4864 slice_group_change_cycle= get_bits(&s->gb, ?);
4869 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4870 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4872 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4873 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4875 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4877 av_get_pict_type_char(h->slice_type),
4878 pps_id, h->frame_num,
4879 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4880 h->ref_count[0], h->ref_count[1],
4882 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4884 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4888 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
4889 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4890 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4892 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4893 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4902 static inline int get_level_prefix(GetBitContext *gb){
4906 OPEN_READER(re, gb);
4907 UPDATE_CACHE(re, gb);
4908 buf=GET_CACHE(re, gb);
4910 log= 32 - av_log2(buf);
4912 print_bin(buf>>(32-log), log);
4913 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4916 LAST_SKIP_BITS(re, gb, log);
4917 CLOSE_READER(re, gb);
4922 static inline int get_dct8x8_allowed(H264Context *h){
4925 if(!IS_SUB_8X8(h->sub_mb_type[i])
4926 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4933 * decodes a residual block.
4934 * @param n block index
4935 * @param scantable scantable
4936 * @param max_coeff number of coefficients in the block
4937 * @return <0 if an error occured
4939 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4940 MpegEncContext * const s = &h->s;
4941 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4943 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4945 //FIXME put trailing_onex into the context
4947 if(n == CHROMA_DC_BLOCK_INDEX){
4948 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4949 total_coeff= coeff_token>>2;
4951 if(n == LUMA_DC_BLOCK_INDEX){
4952 total_coeff= pred_non_zero_count(h, 0);
4953 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4954 total_coeff= coeff_token>>2;
4956 total_coeff= pred_non_zero_count(h, n);
4957 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4958 total_coeff= coeff_token>>2;
4959 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4963 //FIXME set last_non_zero?
4968 trailing_ones= coeff_token&3;
4969 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4970 assert(total_coeff<=16);
4972 for(i=0; i<trailing_ones; i++){
4973 level[i]= 1 - 2*get_bits1(gb);
4977 int level_code, mask;
4978 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4979 int prefix= get_level_prefix(gb);
4981 //first coefficient has suffix_length equal to 0 or 1
4982 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4984 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4986 level_code= (prefix<<suffix_length); //part
4987 }else if(prefix==14){
4989 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4991 level_code= prefix + get_bits(gb, 4); //part
4992 }else if(prefix==15){
4993 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4994 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4996 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
5000 if(trailing_ones < 3) level_code += 2;
5005 mask= -(level_code&1);
5006 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5009 //remaining coefficients have suffix_length > 0
5010 for(;i<total_coeff;i++) {
5011 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
5012 prefix = get_level_prefix(gb);
5014 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
5015 }else if(prefix==15){
5016 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
5018 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
5021 mask= -(level_code&1);
5022 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5023 if(level_code > suffix_limit[suffix_length])
5028 if(total_coeff == max_coeff)
5031 if(n == CHROMA_DC_BLOCK_INDEX)
5032 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
5034 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
5037 coeff_num = zeros_left + total_coeff - 1;
5038 j = scantable[coeff_num];
5040 block[j] = level[0];
5041 for(i=1;i<total_coeff;i++) {
5044 else if(zeros_left < 7){
5045 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5047 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5049 zeros_left -= run_before;
5050 coeff_num -= 1 + run_before;
5051 j= scantable[ coeff_num ];
5056 block[j] = (level[0] * qmul[j] + 32)>>6;
5057 for(i=1;i<total_coeff;i++) {
5060 else if(zeros_left < 7){
5061 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5063 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5065 zeros_left -= run_before;
5066 coeff_num -= 1 + run_before;
5067 j= scantable[ coeff_num ];
5069 block[j]= (level[i] * qmul[j] + 32)>>6;
5074 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
5081 static void predict_field_decoding_flag(H264Context *h){
5082 MpegEncContext * const s = &h->s;
5083 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5084 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
5085 ? s->current_picture.mb_type[mb_xy-1]
5086 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
5087 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
5089 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
5093 * decodes a P_SKIP or B_SKIP macroblock
5095 static void decode_mb_skip(H264Context *h){
5096 MpegEncContext * const s = &h->s;
5097 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5100 memset(h->non_zero_count[mb_xy], 0, 16);
5101 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
5104 mb_type|= MB_TYPE_INTERLACED;
5106 if( h->slice_type == B_TYPE )
5108 // just for fill_caches. pred_direct_motion will set the real mb_type
5109 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
5111 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5112 pred_direct_motion(h, &mb_type);
5114 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5115 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5121 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
5123 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5124 pred_pskip_motion(h, &mx, &my);
5125 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
5126 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
5128 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5131 write_back_motion(h, mb_type);
5132 s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP;
5133 s->current_picture.qscale_table[mb_xy]= s->qscale;
5134 h->slice_table[ mb_xy ]= h->slice_num;
5135 h->prev_mb_skipped= 1;
5139 * decodes a macroblock
5140 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5142 static int decode_mb_cavlc(H264Context *h){
5143 MpegEncContext * const s = &h->s;
5144 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5145 int mb_type, partition_count, cbp;
5146 int dct8x8_allowed= h->pps.transform_8x8_mode;
5148 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
5150 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5151 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
5153 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
5154 if(s->mb_skip_run==-1)
5155 s->mb_skip_run= get_ue_golomb(&s->gb);
5157 if (s->mb_skip_run--) {
5158 if(FRAME_MBAFF && (s->mb_y&1) == 0){
5159 if(s->mb_skip_run==0)
5160 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5162 predict_field_decoding_flag(h);
5169 if( (s->mb_y&1) == 0 )
5170 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5172 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5174 h->prev_mb_skipped= 0;
5176 mb_type= get_ue_golomb(&s->gb);
5177 if(h->slice_type == B_TYPE){
5179 partition_count= b_mb_type_info[mb_type].partition_count;
5180 mb_type= b_mb_type_info[mb_type].type;
5183 goto decode_intra_mb;
5185 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
5187 partition_count= p_mb_type_info[mb_type].partition_count;
5188 mb_type= p_mb_type_info[mb_type].type;
5191 goto decode_intra_mb;
5194 assert(h->slice_type == I_TYPE);
5197 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
5201 cbp= i_mb_type_info[mb_type].cbp;
5202 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5203 mb_type= i_mb_type_info[mb_type].type;
5207 mb_type |= MB_TYPE_INTERLACED;
5209 h->slice_table[ mb_xy ]= h->slice_num;
5211 if(IS_INTRA_PCM(mb_type)){
5214 // we assume these blocks are very rare so we dont optimize it
5215 align_get_bits(&s->gb);
5217 // The pixels are stored in the same order as levels in h->mb array.
5218 for(y=0; y<16; y++){
5219 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5220 for(x=0; x<16; x++){
5221 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5222 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
5226 const int index= 256 + 4*(y&3) + 32*(y>>2);
5228 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5229 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5233 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5235 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5236 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5240 // In deblocking, the quantizer is 0
5241 s->current_picture.qscale_table[mb_xy]= 0;
5242 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5243 // All coeffs are present
5244 memset(h->non_zero_count[mb_xy], 16, 16);
5246 s->current_picture.mb_type[mb_xy]= mb_type;
5251 h->ref_count[0] <<= 1;
5252 h->ref_count[1] <<= 1;
5255 fill_caches(h, mb_type, 0);
5258 if(IS_INTRA(mb_type)){
5259 // init_top_left_availability(h);
5260 if(IS_INTRA4x4(mb_type)){
5263 if(dct8x8_allowed && get_bits1(&s->gb)){
5264 mb_type |= MB_TYPE_8x8DCT;
5268 // fill_intra4x4_pred_table(h);
5269 for(i=0; i<16; i+=di){
5270 int mode= pred_intra_mode(h, i);
5272 if(!get_bits1(&s->gb)){
5273 const int rem_mode= get_bits(&s->gb, 3);
5274 mode = rem_mode + (rem_mode >= mode);
5278 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5280 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
5282 write_back_intra_pred_mode(h);
5283 if( check_intra4x4_pred_mode(h) < 0)
5286 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
5287 if(h->intra16x16_pred_mode < 0)
5290 h->chroma_pred_mode= get_ue_golomb(&s->gb);
5292 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
5293 if(h->chroma_pred_mode < 0)
5295 }else if(partition_count==4){
5296 int i, j, sub_partition_count[4], list, ref[2][4];
5298 if(h->slice_type == B_TYPE){
5300 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5301 if(h->sub_mb_type[i] >=13){
5302 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5305 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5306 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5308 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5309 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5310 pred_direct_motion(h, &mb_type);
5311 h->ref_cache[0][scan8[4]] =
5312 h->ref_cache[1][scan8[4]] =
5313 h->ref_cache[0][scan8[12]] =
5314 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5317 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
5319 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5320 if(h->sub_mb_type[i] >=4){
5321 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5324 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5325 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5329 for(list=0; list<2; list++){
5330 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5331 if(ref_count == 0) continue;
5333 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5334 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5335 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
5344 dct8x8_allowed = get_dct8x8_allowed(h);
5346 for(list=0; list<2; list++){
5347 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5348 if(ref_count == 0) continue;
5351 if(IS_DIRECT(h->sub_mb_type[i])) {
5352 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
5355 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
5356 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5358 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5359 const int sub_mb_type= h->sub_mb_type[i];
5360 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5361 for(j=0; j<sub_partition_count[i]; j++){
5363 const int index= 4*i + block_width*j;
5364 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5365 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
5366 mx += get_se_golomb(&s->gb);
5367 my += get_se_golomb(&s->gb);
5368 tprintf("final mv:%d %d\n", mx, my);
5370 if(IS_SUB_8X8(sub_mb_type)){
5371 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5372 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5373 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5374 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5375 }else if(IS_SUB_8X4(sub_mb_type)){
5376 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5377 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5378 }else if(IS_SUB_4X8(sub_mb_type)){
5379 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5380 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5382 assert(IS_SUB_4X4(sub_mb_type));
5383 mv_cache[ 0 ][0]= mx;
5384 mv_cache[ 0 ][1]= my;
5388 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5394 }else if(IS_DIRECT(mb_type)){
5395 pred_direct_motion(h, &mb_type);
5396 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5398 int list, mx, my, i;
5399 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5400 if(IS_16X16(mb_type)){
5401 for(list=0; list<2; list++){
5402 if(h->ref_count[list]>0){
5403 if(IS_DIR(mb_type, 0, list)){
5404 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5405 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5407 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
5410 for(list=0; list<2; list++){
5411 if(IS_DIR(mb_type, 0, list)){
5412 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5413 mx += get_se_golomb(&s->gb);
5414 my += get_se_golomb(&s->gb);
5415 tprintf("final mv:%d %d\n", mx, my);
5417 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5419 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5422 else if(IS_16X8(mb_type)){
5423 for(list=0; list<2; list++){
5424 if(h->ref_count[list]>0){
5426 if(IS_DIR(mb_type, i, list)){
5427 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5428 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5430 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5434 for(list=0; list<2; list++){
5436 if(IS_DIR(mb_type, i, list)){
5437 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5438 mx += get_se_golomb(&s->gb);
5439 my += get_se_golomb(&s->gb);
5440 tprintf("final mv:%d %d\n", mx, my);
5442 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5444 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5448 assert(IS_8X16(mb_type));
5449 for(list=0; list<2; list++){
5450 if(h->ref_count[list]>0){
5452 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5453 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5454 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5456 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5460 for(list=0; list<2; list++){
5462 if(IS_DIR(mb_type, i, list)){
5463 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5464 mx += get_se_golomb(&s->gb);
5465 my += get_se_golomb(&s->gb);
5466 tprintf("final mv:%d %d\n", mx, my);
5468 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5470 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5476 if(IS_INTER(mb_type))
5477 write_back_motion(h, mb_type);
5479 if(!IS_INTRA16x16(mb_type)){
5480 cbp= get_ue_golomb(&s->gb);
5482 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5486 if(IS_INTRA4x4(mb_type))
5487 cbp= golomb_to_intra4x4_cbp[cbp];
5489 cbp= golomb_to_inter_cbp[cbp];
5493 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5494 if(get_bits1(&s->gb))
5495 mb_type |= MB_TYPE_8x8DCT;
5497 s->current_picture.mb_type[mb_xy]= mb_type;
5499 if(cbp || IS_INTRA16x16(mb_type)){
5500 int i8x8, i4x4, chroma_idx;
5501 int chroma_qp, dquant;
5502 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5503 const uint8_t *scan, *scan8x8, *dc_scan;
5505 // fill_non_zero_count_cache(h);
5507 if(IS_INTERLACED(mb_type)){
5508 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5509 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5510 dc_scan= luma_dc_field_scan;
5512 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5513 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5514 dc_scan= luma_dc_zigzag_scan;
5517 dquant= get_se_golomb(&s->gb);
5519 if( dquant > 25 || dquant < -26 ){
5520 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5524 s->qscale += dquant;
5525 if(((unsigned)s->qscale) > 51){
5526 if(s->qscale<0) s->qscale+= 52;
5527 else s->qscale-= 52;
5530 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5531 if(IS_INTRA16x16(mb_type)){
5532 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5533 return -1; //FIXME continue if partitioned and other return -1 too
5536 assert((cbp&15) == 0 || (cbp&15) == 15);
5539 for(i8x8=0; i8x8<4; i8x8++){
5540 for(i4x4=0; i4x4<4; i4x4++){
5541 const int index= i4x4 + 4*i8x8;
5542 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5548 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5551 for(i8x8=0; i8x8<4; i8x8++){
5552 if(cbp & (1<<i8x8)){
5553 if(IS_8x8DCT(mb_type)){
5554 DCTELEM *buf = &h->mb[64*i8x8];
5556 for(i4x4=0; i4x4<4; i4x4++){
5557 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5558 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5561 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5562 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5564 for(i4x4=0; i4x4<4; i4x4++){
5565 const int index= i4x4 + 4*i8x8;
5567 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5573 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5574 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5580 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5581 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5587 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5588 for(i4x4=0; i4x4<4; i4x4++){
5589 const int index= 16 + 4*chroma_idx + i4x4;
5590 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5596 uint8_t * const nnz= &h->non_zero_count_cache[0];
5597 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5598 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5601 uint8_t * const nnz= &h->non_zero_count_cache[0];
5602 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5603 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5604 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5606 s->current_picture.qscale_table[mb_xy]= s->qscale;
5607 write_back_non_zero_count(h);
5610 h->ref_count[0] >>= 1;
5611 h->ref_count[1] >>= 1;
5617 static int decode_cabac_field_decoding_flag(H264Context *h) {
5618 MpegEncContext * const s = &h->s;
5619 const int mb_x = s->mb_x;
5620 const int mb_y = s->mb_y & ~1;
5621 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5622 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5624 unsigned int ctx = 0;
5626 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5629 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5633 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5636 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5637 uint8_t *state= &h->cabac_state[ctx_base];
5641 MpegEncContext * const s = &h->s;
5642 const int mba_xy = h->left_mb_xy[0];
5643 const int mbb_xy = h->top_mb_xy;
5645 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5647 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5649 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5650 return 0; /* I4x4 */
5653 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5654 return 0; /* I4x4 */
5657 if( get_cabac_terminate( &h->cabac ) )
5658 return 25; /* PCM */
5660 mb_type = 1; /* I16x16 */
5661 mb_type += 12 * get_cabac( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5662 if( get_cabac( &h->cabac, &state[2] ) ) /* cbp_chroma */
5663 mb_type += 4 + 4 * get_cabac( &h->cabac, &state[2+intra_slice] );
5664 mb_type += 2 * get_cabac( &h->cabac, &state[3+intra_slice] );
5665 mb_type += 1 * get_cabac( &h->cabac, &state[3+2*intra_slice] );
5669 static int decode_cabac_mb_type( H264Context *h ) {
5670 MpegEncContext * const s = &h->s;
5672 if( h->slice_type == I_TYPE ) {
5673 return decode_cabac_intra_mb_type(h, 3, 1);
5674 } else if( h->slice_type == P_TYPE ) {
5675 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5677 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5678 /* P_L0_D16x16, P_8x8 */
5679 return 3 * get_cabac( &h->cabac, &h->cabac_state[16] );
5681 /* P_L0_D8x16, P_L0_D16x8 */
5682 return 2 - get_cabac( &h->cabac, &h->cabac_state[17] );
5685 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5687 } else if( h->slice_type == B_TYPE ) {
5688 const int mba_xy = h->left_mb_xy[0];
5689 const int mbb_xy = h->top_mb_xy;
5693 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5695 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5698 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5699 return 0; /* B_Direct_16x16 */
5701 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5702 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5705 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5706 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5707 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5708 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5710 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5711 else if( bits == 13 ) {
5712 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5713 } else if( bits == 14 )
5714 return 11; /* B_L1_L0_8x16 */
5715 else if( bits == 15 )
5716 return 22; /* B_8x8 */
5718 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5719 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5721 /* TODO SI/SP frames? */
5726 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5727 MpegEncContext * const s = &h->s;
5731 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5732 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5735 && h->slice_table[mba_xy] == h->slice_num
5736 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5737 mba_xy += s->mb_stride;
5739 mbb_xy = mb_xy - s->mb_stride;
5741 && h->slice_table[mbb_xy] == h->slice_num
5742 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5743 mbb_xy -= s->mb_stride;
5745 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5747 int mb_xy = mb_x + mb_y*s->mb_stride;
5749 mbb_xy = mb_xy - s->mb_stride;
5752 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5754 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5757 if( h->slice_type == B_TYPE )
5759 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5762 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5765 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5768 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5769 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5770 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5772 if( mode >= pred_mode )
5778 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5779 const int mba_xy = h->left_mb_xy[0];
5780 const int mbb_xy = h->top_mb_xy;
5784 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5785 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5788 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5791 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5794 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5796 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5802 static const uint8_t block_idx_x[16] = {
5803 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5805 static const uint8_t block_idx_y[16] = {
5806 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5808 static const uint8_t block_idx_xy[4][4] = {
5815 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5820 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5822 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5825 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5830 x = block_idx_x[4*i8x8];
5831 y = block_idx_y[4*i8x8];
5835 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5836 cbp_a = h->left_cbp;
5837 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5843 /* No need to test for skip as we put 0 for skip block */
5844 /* No need to test for IPCM as we put 1 for IPCM block */
5846 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5847 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5852 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5853 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5857 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5863 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5867 cbp_a = (h->left_cbp>>4)&0x03;
5868 cbp_b = (h-> top_cbp>>4)&0x03;
5871 if( cbp_a > 0 ) ctx++;
5872 if( cbp_b > 0 ) ctx += 2;
5873 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5877 if( cbp_a == 2 ) ctx++;
5878 if( cbp_b == 2 ) ctx += 2;
5879 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5881 static int decode_cabac_mb_dqp( H264Context *h) {
5882 MpegEncContext * const s = &h->s;
5888 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5890 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5892 if( h->last_qscale_diff != 0 )
5895 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5901 if(val > 102) //prevent infinite loop
5908 return -(val + 1)/2;
5910 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5911 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5913 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5915 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5919 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5921 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5922 return 0; /* B_Direct_8x8 */
5923 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5924 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5926 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5927 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5928 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5931 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5932 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5936 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5937 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5940 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5941 int refa = h->ref_cache[list][scan8[n] - 1];
5942 int refb = h->ref_cache[list][scan8[n] - 8];
5946 if( h->slice_type == B_TYPE) {
5947 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5949 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5958 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5968 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5969 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5970 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5971 int ctxbase = (l == 0) ? 40 : 47;
5976 else if( amvd > 32 )
5981 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5986 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5994 while( get_cabac_bypass( &h->cabac ) ) {
5999 if( get_cabac_bypass( &h->cabac ) )
6003 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
6007 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
6012 nza = h->left_cbp&0x100;
6013 nzb = h-> top_cbp&0x100;
6014 } else if( cat == 1 || cat == 2 ) {
6015 nza = h->non_zero_count_cache[scan8[idx] - 1];
6016 nzb = h->non_zero_count_cache[scan8[idx] - 8];
6017 } else if( cat == 3 ) {
6018 nza = (h->left_cbp>>(6+idx))&0x01;
6019 nzb = (h-> top_cbp>>(6+idx))&0x01;
6022 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
6023 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
6032 return ctx + 4 * cat;
6035 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
6036 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
6037 static const int significant_coeff_flag_offset[2][6] = {
6038 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
6039 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
6041 static const int last_coeff_flag_offset[2][6] = {
6042 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
6043 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
6045 static const int coeff_abs_level_m1_offset[6] = {
6046 227+0, 227+10, 227+20, 227+30, 227+39, 426
6048 static const int significant_coeff_flag_offset_8x8[2][63] = {
6049 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
6050 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
6051 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
6052 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
6053 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
6054 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
6055 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
6056 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
6058 static const int last_coeff_flag_offset_8x8[63] = {
6059 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6060 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
6061 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
6062 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
6068 int coeff_count = 0;
6071 int abslevelgt1 = 0;
6073 uint8_t *significant_coeff_ctx_base;
6074 uint8_t *last_coeff_ctx_base;
6075 uint8_t *abs_level_m1_ctx_base;
6077 /* cat: 0-> DC 16x16 n = 0
6078 * 1-> AC 16x16 n = luma4x4idx
6079 * 2-> Luma4x4 n = luma4x4idx
6080 * 3-> DC Chroma n = iCbCr
6081 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
6082 * 5-> Luma8x8 n = 4 * luma8x8idx
6085 /* read coded block flag */
6087 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
6088 if( cat == 1 || cat == 2 )
6089 h->non_zero_count_cache[scan8[n]] = 0;
6091 h->non_zero_count_cache[scan8[16+n]] = 0;
6097 significant_coeff_ctx_base = h->cabac_state
6098 + significant_coeff_flag_offset[MB_FIELD][cat];
6099 last_coeff_ctx_base = h->cabac_state
6100 + last_coeff_flag_offset[MB_FIELD][cat];
6101 abs_level_m1_ctx_base = h->cabac_state
6102 + coeff_abs_level_m1_offset[cat];
6105 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
6106 for(last= 0; last < coefs; last++) { \
6107 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
6108 if( get_cabac( &h->cabac, sig_ctx )) { \
6109 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
6110 index[coeff_count++] = last; \
6111 if( get_cabac( &h->cabac, last_ctx ) ) { \
6117 const int *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
6118 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
6120 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
6122 if( last == max_coeff -1 ) {
6123 index[coeff_count++] = last;
6125 assert(coeff_count > 0);
6128 h->cbp_table[mb_xy] |= 0x100;
6129 else if( cat == 1 || cat == 2 )
6130 h->non_zero_count_cache[scan8[n]] = coeff_count;
6132 h->cbp_table[mb_xy] |= 0x40 << n;
6134 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
6137 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
6140 for( i = coeff_count - 1; i >= 0; i-- ) {
6141 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
6142 int j= scantable[index[i]];
6144 if( get_cabac( &h->cabac, ctx ) == 0 ) {
6146 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
6149 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6;
6150 else block[j] = ( qmul[j] + 32) >> 6;
6156 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
6157 while( coeff_abs < 15 && get_cabac( &h->cabac, ctx ) ) {
6161 if( coeff_abs >= 15 ) {
6163 while( get_cabac_bypass( &h->cabac ) ) {
6164 coeff_abs += 1 << j;
6169 if( get_cabac_bypass( &h->cabac ) )
6170 coeff_abs += 1 << j ;
6175 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
6176 else block[j] = coeff_abs;
6178 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
6179 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
6188 static void inline compute_mb_neighbors(H264Context *h)
6190 MpegEncContext * const s = &h->s;
6191 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
6192 h->top_mb_xy = mb_xy - s->mb_stride;
6193 h->left_mb_xy[0] = mb_xy - 1;
6195 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
6196 const int top_pair_xy = pair_xy - s->mb_stride;
6197 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
6198 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
6199 const int curr_mb_frame_flag = !MB_FIELD;
6200 const int bottom = (s->mb_y & 1);
6202 ? !curr_mb_frame_flag // bottom macroblock
6203 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
6205 h->top_mb_xy -= s->mb_stride;
6207 if (left_mb_frame_flag != curr_mb_frame_flag) {
6208 h->left_mb_xy[0] = pair_xy - 1;
6215 * decodes a macroblock
6216 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
6218 static int decode_mb_cabac(H264Context *h) {
6219 MpegEncContext * const s = &h->s;
6220 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
6221 int mb_type, partition_count, cbp = 0;
6222 int dct8x8_allowed= h->pps.transform_8x8_mode;
6224 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
6226 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
6227 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
6229 /* a skipped mb needs the aff flag from the following mb */
6230 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
6231 predict_field_decoding_flag(h);
6232 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
6233 skip = h->next_mb_skipped;
6235 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
6236 /* read skip flags */
6238 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
6239 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
6240 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
6241 if(h->next_mb_skipped)
6242 predict_field_decoding_flag(h);
6244 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6249 h->cbp_table[mb_xy] = 0;
6250 h->chroma_pred_mode_table[mb_xy] = 0;
6251 h->last_qscale_diff = 0;
6258 if( (s->mb_y&1) == 0 )
6260 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6262 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
6264 h->prev_mb_skipped = 0;
6266 compute_mb_neighbors(h);
6267 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
6268 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
6272 if( h->slice_type == B_TYPE ) {
6274 partition_count= b_mb_type_info[mb_type].partition_count;
6275 mb_type= b_mb_type_info[mb_type].type;
6278 goto decode_intra_mb;
6280 } else if( h->slice_type == P_TYPE ) {
6282 partition_count= p_mb_type_info[mb_type].partition_count;
6283 mb_type= p_mb_type_info[mb_type].type;
6286 goto decode_intra_mb;
6289 assert(h->slice_type == I_TYPE);
6291 partition_count = 0;
6292 cbp= i_mb_type_info[mb_type].cbp;
6293 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
6294 mb_type= i_mb_type_info[mb_type].type;
6297 mb_type |= MB_TYPE_INTERLACED;
6299 h->slice_table[ mb_xy ]= h->slice_num;
6301 if(IS_INTRA_PCM(mb_type)) {
6305 // We assume these blocks are very rare so we dont optimize it.
6306 // FIXME The two following lines get the bitstream position in the cabac
6307 // decode, I think it should be done by a function in cabac.h (or cabac.c).
6308 ptr= h->cabac.bytestream;
6309 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
6311 // The pixels are stored in the same order as levels in h->mb array.
6312 for(y=0; y<16; y++){
6313 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
6314 for(x=0; x<16; x++){
6315 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
6316 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
6320 const int index= 256 + 4*(y&3) + 32*(y>>2);
6322 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
6323 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6327 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
6329 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
6330 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6334 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6336 // All blocks are present
6337 h->cbp_table[mb_xy] = 0x1ef;
6338 h->chroma_pred_mode_table[mb_xy] = 0;
6339 // In deblocking, the quantizer is 0
6340 s->current_picture.qscale_table[mb_xy]= 0;
6341 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
6342 // All coeffs are present
6343 memset(h->non_zero_count[mb_xy], 16, 16);
6344 s->current_picture.mb_type[mb_xy]= mb_type;
6349 h->ref_count[0] <<= 1;
6350 h->ref_count[1] <<= 1;
6353 fill_caches(h, mb_type, 0);
6355 if( IS_INTRA( mb_type ) ) {
6357 if( IS_INTRA4x4( mb_type ) ) {
6358 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6359 mb_type |= MB_TYPE_8x8DCT;
6360 for( i = 0; i < 16; i+=4 ) {
6361 int pred = pred_intra_mode( h, i );
6362 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6363 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6366 for( i = 0; i < 16; i++ ) {
6367 int pred = pred_intra_mode( h, i );
6368 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6370 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6373 write_back_intra_pred_mode(h);
6374 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6376 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6377 if( h->intra16x16_pred_mode < 0 ) return -1;
6379 h->chroma_pred_mode_table[mb_xy] =
6380 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6382 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
6383 if( h->chroma_pred_mode < 0 ) return -1;
6384 } else if( partition_count == 4 ) {
6385 int i, j, sub_partition_count[4], list, ref[2][4];
6387 if( h->slice_type == B_TYPE ) {
6388 for( i = 0; i < 4; i++ ) {
6389 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6390 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6391 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6393 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
6394 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
6395 pred_direct_motion(h, &mb_type);
6396 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6397 for( i = 0; i < 4; i++ )
6398 if( IS_DIRECT(h->sub_mb_type[i]) )
6399 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6403 for( i = 0; i < 4; i++ ) {
6404 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6405 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6406 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6410 for( list = 0; list < 2; list++ ) {
6411 if( h->ref_count[list] > 0 ) {
6412 for( i = 0; i < 4; i++ ) {
6413 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6414 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6415 if( h->ref_count[list] > 1 )
6416 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6422 h->ref_cache[list][ scan8[4*i]+1 ]=
6423 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6429 dct8x8_allowed = get_dct8x8_allowed(h);
6431 for(list=0; list<2; list++){
6433 if(IS_DIRECT(h->sub_mb_type[i])){
6434 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6437 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6439 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6440 const int sub_mb_type= h->sub_mb_type[i];
6441 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6442 for(j=0; j<sub_partition_count[i]; j++){
6445 const int index= 4*i + block_width*j;
6446 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6447 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6448 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6450 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6451 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6452 tprintf("final mv:%d %d\n", mx, my);
6454 if(IS_SUB_8X8(sub_mb_type)){
6455 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
6456 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6457 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
6458 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6460 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
6461 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6462 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
6463 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6464 }else if(IS_SUB_8X4(sub_mb_type)){
6465 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
6466 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
6468 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
6469 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
6470 }else if(IS_SUB_4X8(sub_mb_type)){
6471 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
6472 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
6474 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
6475 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
6477 assert(IS_SUB_4X4(sub_mb_type));
6478 mv_cache[ 0 ][0]= mx;
6479 mv_cache[ 0 ][1]= my;
6481 mvd_cache[ 0 ][0]= mx - mpx;
6482 mvd_cache[ 0 ][1]= my - mpy;
6486 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6487 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6488 p[0] = p[1] = p[8] = p[9] = 0;
6489 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6493 } else if( IS_DIRECT(mb_type) ) {
6494 pred_direct_motion(h, &mb_type);
6495 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6496 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6497 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6499 int list, mx, my, i, mpx, mpy;
6500 if(IS_16X16(mb_type)){
6501 for(list=0; list<2; list++){
6502 if(IS_DIR(mb_type, 0, list)){
6503 if(h->ref_count[list] > 0 ){
6504 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6505 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6508 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6510 for(list=0; list<2; list++){
6511 if(IS_DIR(mb_type, 0, list)){
6512 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6514 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6515 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6516 tprintf("final mv:%d %d\n", mx, my);
6518 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6519 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6521 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6524 else if(IS_16X8(mb_type)){
6525 for(list=0; list<2; list++){
6526 if(h->ref_count[list]>0){
6528 if(IS_DIR(mb_type, i, list)){
6529 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6530 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6532 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6536 for(list=0; list<2; list++){
6538 if(IS_DIR(mb_type, i, list)){
6539 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6540 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6541 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6542 tprintf("final mv:%d %d\n", mx, my);
6544 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6545 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6547 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6548 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6553 assert(IS_8X16(mb_type));
6554 for(list=0; list<2; list++){
6555 if(h->ref_count[list]>0){
6557 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6558 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6559 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6561 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6565 for(list=0; list<2; list++){
6567 if(IS_DIR(mb_type, i, list)){
6568 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6569 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6570 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6572 tprintf("final mv:%d %d\n", mx, my);
6573 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6574 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6576 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6577 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6584 if( IS_INTER( mb_type ) ) {
6585 h->chroma_pred_mode_table[mb_xy] = 0;
6586 write_back_motion( h, mb_type );
6589 if( !IS_INTRA16x16( mb_type ) ) {
6590 cbp = decode_cabac_mb_cbp_luma( h );
6591 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6594 h->cbp_table[mb_xy] = h->cbp = cbp;
6596 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6597 if( decode_cabac_mb_transform_size( h ) )
6598 mb_type |= MB_TYPE_8x8DCT;
6600 s->current_picture.mb_type[mb_xy]= mb_type;
6602 if( cbp || IS_INTRA16x16( mb_type ) ) {
6603 const uint8_t *scan, *scan8x8, *dc_scan;
6606 if(IS_INTERLACED(mb_type)){
6607 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6608 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6609 dc_scan= luma_dc_field_scan;
6611 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6612 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6613 dc_scan= luma_dc_zigzag_scan;
6616 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6617 if( dqp == INT_MIN ){
6618 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6622 if(((unsigned)s->qscale) > 51){
6623 if(s->qscale<0) s->qscale+= 52;
6624 else s->qscale-= 52;
6626 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6628 if( IS_INTRA16x16( mb_type ) ) {
6630 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6631 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6634 for( i = 0; i < 16; i++ ) {
6635 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6636 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6640 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6644 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6645 if( cbp & (1<<i8x8) ) {
6646 if( IS_8x8DCT(mb_type) ) {
6647 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6648 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6651 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6652 const int index = 4*i8x8 + i4x4;
6653 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6654 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6658 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6659 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6666 for( c = 0; c < 2; c++ ) {
6667 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6668 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6675 for( c = 0; c < 2; c++ ) {
6676 for( i = 0; i < 4; i++ ) {
6677 const int index = 16 + 4 * c + i;
6678 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6679 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6684 uint8_t * const nnz= &h->non_zero_count_cache[0];
6685 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6686 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6689 uint8_t * const nnz= &h->non_zero_count_cache[0];
6690 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6691 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6692 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6693 h->last_qscale_diff = 0;
6696 s->current_picture.qscale_table[mb_xy]= s->qscale;
6697 write_back_non_zero_count(h);
6700 h->ref_count[0] >>= 1;
6701 h->ref_count[1] >>= 1;
6708 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6710 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6711 const int alpha = alpha_table[index_a];
6712 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6717 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6718 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6720 /* 16px edge length, because bS=4 is triggered by being at
6721 * the edge of an intra MB, so all 4 bS are the same */
6722 for( d = 0; d < 16; d++ ) {
6723 const int p0 = pix[-1];
6724 const int p1 = pix[-2];
6725 const int p2 = pix[-3];
6727 const int q0 = pix[0];
6728 const int q1 = pix[1];
6729 const int q2 = pix[2];
6731 if( ABS( p0 - q0 ) < alpha &&
6732 ABS( p1 - p0 ) < beta &&
6733 ABS( q1 - q0 ) < beta ) {
6735 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6736 if( ABS( p2 - p0 ) < beta)
6738 const int p3 = pix[-4];
6740 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6741 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6742 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6745 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6747 if( ABS( q2 - q0 ) < beta)
6749 const int q3 = pix[3];
6751 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6752 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6753 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6756 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6760 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6761 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6763 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6769 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6771 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6772 const int alpha = alpha_table[index_a];
6773 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6778 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6779 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6781 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6785 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6787 for( i = 0; i < 16; i++, pix += stride) {
6793 int bS_index = (i >> 1);
6796 bS_index |= (i & 1);
6799 if( bS[bS_index] == 0 ) {
6803 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6804 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6805 alpha = alpha_table[index_a];
6806 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6808 if( bS[bS_index] < 4 ) {
6809 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6810 const int p0 = pix[-1];
6811 const int p1 = pix[-2];
6812 const int p2 = pix[-3];
6813 const int q0 = pix[0];
6814 const int q1 = pix[1];
6815 const int q2 = pix[2];
6817 if( ABS( p0 - q0 ) < alpha &&
6818 ABS( p1 - p0 ) < beta &&
6819 ABS( q1 - q0 ) < beta ) {
6823 if( ABS( p2 - p0 ) < beta ) {
6824 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6827 if( ABS( q2 - q0 ) < beta ) {
6828 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6832 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6833 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6834 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6835 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6838 const int p0 = pix[-1];
6839 const int p1 = pix[-2];
6840 const int p2 = pix[-3];
6842 const int q0 = pix[0];
6843 const int q1 = pix[1];
6844 const int q2 = pix[2];
6846 if( ABS( p0 - q0 ) < alpha &&
6847 ABS( p1 - p0 ) < beta &&
6848 ABS( q1 - q0 ) < beta ) {
6850 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6851 if( ABS( p2 - p0 ) < beta)
6853 const int p3 = pix[-4];
6855 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6856 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6857 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6860 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6862 if( ABS( q2 - q0 ) < beta)
6864 const int q3 = pix[3];
6866 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6867 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6868 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6871 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6875 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6876 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6878 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6883 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6885 for( i = 0; i < 8; i++, pix += stride) {
6893 if( bS[bS_index] == 0 ) {
6897 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6898 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6899 alpha = alpha_table[index_a];
6900 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6902 if( bS[bS_index] < 4 ) {
6903 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6904 const int p0 = pix[-1];
6905 const int p1 = pix[-2];
6906 const int q0 = pix[0];
6907 const int q1 = pix[1];
6909 if( ABS( p0 - q0 ) < alpha &&
6910 ABS( p1 - p0 ) < beta &&
6911 ABS( q1 - q0 ) < beta ) {
6912 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6914 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6915 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6916 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6919 const int p0 = pix[-1];
6920 const int p1 = pix[-2];
6921 const int q0 = pix[0];
6922 const int q1 = pix[1];
6924 if( ABS( p0 - q0 ) < alpha &&
6925 ABS( p1 - p0 ) < beta &&
6926 ABS( q1 - q0 ) < beta ) {
6928 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6929 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6930 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6936 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6938 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6939 const int alpha = alpha_table[index_a];
6940 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6941 const int pix_next = stride;
6946 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6947 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6949 /* 16px edge length, see filter_mb_edgev */
6950 for( d = 0; d < 16; d++ ) {
6951 const int p0 = pix[-1*pix_next];
6952 const int p1 = pix[-2*pix_next];
6953 const int p2 = pix[-3*pix_next];
6954 const int q0 = pix[0];
6955 const int q1 = pix[1*pix_next];
6956 const int q2 = pix[2*pix_next];
6958 if( ABS( p0 - q0 ) < alpha &&
6959 ABS( p1 - p0 ) < beta &&
6960 ABS( q1 - q0 ) < beta ) {
6962 const int p3 = pix[-4*pix_next];
6963 const int q3 = pix[ 3*pix_next];
6965 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6966 if( ABS( p2 - p0 ) < beta) {
6968 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6969 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6970 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6973 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6975 if( ABS( q2 - q0 ) < beta) {
6977 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6978 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6979 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6982 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6986 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6987 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6989 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6996 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6998 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6999 const int alpha = alpha_table[index_a];
7000 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
7005 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
7006 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
7008 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
7012 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7013 MpegEncContext * const s = &h->s;
7015 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
7017 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) {
7018 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
7021 assert(!FRAME_MBAFF);
7023 mb_xy = mb_x + mb_y*s->mb_stride;
7024 mb_type = s->current_picture.mb_type[mb_xy];
7025 qp = s->current_picture.qscale_table[mb_xy];
7026 qp0 = s->current_picture.qscale_table[mb_xy-1];
7027 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
7028 qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
7029 qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
7030 qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
7031 qp0 = (qp + qp0 + 1) >> 1;
7032 qp1 = (qp + qp1 + 1) >> 1;
7033 qpc0 = (qpc + qpc0 + 1) >> 1;
7034 qpc1 = (qpc + qpc1 + 1) >> 1;
7035 qp_thresh = 15 - h->slice_alpha_c0_offset;
7036 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
7037 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
7040 if( IS_INTRA(mb_type) ) {
7041 int16_t bS4[4] = {4,4,4,4};
7042 int16_t bS3[4] = {3,3,3,3};
7043 if( IS_8x8DCT(mb_type) ) {
7044 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7045 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7046 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7047 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7049 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7050 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
7051 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7052 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
7053 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7054 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
7055 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7056 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
7058 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
7059 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
7060 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
7061 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
7062 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7063 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
7064 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7065 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
7068 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
7069 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
7071 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
7073 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
7075 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
7076 (mb_type & MB_TYPE_16x8) ? 1 : 0;
7077 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
7078 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
7080 int step = IS_8x8DCT(mb_type) ? 2 : 1;
7081 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
7082 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
7083 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
7085 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
7086 bSv[0][0] = 0x0004000400040004ULL;
7087 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
7088 bSv[1][0] = 0x0004000400040004ULL;
7090 #define FILTER(hv,dir,edge)\
7091 if(bSv[dir][edge]) {\
7092 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
7094 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7095 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7101 } else if( IS_8x8DCT(mb_type) ) {
7120 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7121 MpegEncContext * const s = &h->s;
7122 const int mb_xy= mb_x + mb_y*s->mb_stride;
7123 const int mb_type = s->current_picture.mb_type[mb_xy];
7124 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
7125 int first_vertical_edge_done = 0;
7127 /* FIXME: A given frame may occupy more than one position in
7128 * the reference list. So ref2frm should be populated with
7129 * frame numbers, not indices. */
7130 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
7131 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7133 //for sufficiently low qp, filtering wouldn't do anything
7134 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
7136 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
7137 int qp = s->current_picture.qscale_table[mb_xy];
7139 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
7140 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
7146 // left mb is in picture
7147 && h->slice_table[mb_xy-1] != 255
7148 // and current and left pair do not have the same interlaced type
7149 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
7150 // and left mb is in the same slice if deblocking_filter == 2
7151 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
7152 /* First vertical edge is different in MBAFF frames
7153 * There are 8 different bS to compute and 2 different Qp
7155 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
7156 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
7160 int mb_qp, mbn0_qp, mbn1_qp;
7162 first_vertical_edge_done = 1;
7164 if( IS_INTRA(mb_type) )
7165 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
7167 for( i = 0; i < 8; i++ ) {
7168 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
7170 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
7172 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
7173 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
7174 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
7181 mb_qp = s->current_picture.qscale_table[mb_xy];
7182 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
7183 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
7184 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
7185 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7186 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
7187 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
7188 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7189 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
7192 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
7193 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7194 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
7195 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
7196 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
7198 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
7199 for( dir = 0; dir < 2; dir++ )
7202 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
7203 const int mbm_type = s->current_picture.mb_type[mbm_xy];
7204 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
7206 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
7207 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
7208 // how often to recheck mv-based bS when iterating between edges
7209 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
7210 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
7211 // how often to recheck mv-based bS when iterating along each edge
7212 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
7214 if (first_vertical_edge_done) {
7216 first_vertical_edge_done = 0;
7219 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
7222 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
7223 && !IS_INTERLACED(mb_type)
7224 && IS_INTERLACED(mbm_type)
7226 // This is a special case in the norm where the filtering must
7227 // be done twice (one each of the field) even if we are in a
7228 // frame macroblock.
7230 static const int nnz_idx[4] = {4,5,6,3};
7231 unsigned int tmp_linesize = 2 * linesize;
7232 unsigned int tmp_uvlinesize = 2 * uvlinesize;
7233 int mbn_xy = mb_xy - 2 * s->mb_stride;
7238 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
7239 if( IS_INTRA(mb_type) ||
7240 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
7241 bS[0] = bS[1] = bS[2] = bS[3] = 3;
7243 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
7244 for( i = 0; i < 4; i++ ) {
7245 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
7246 mbn_nnz[nnz_idx[i]] != 0 )
7252 // Do not use s->qscale as luma quantizer because it has not the same
7253 // value in IPCM macroblocks.
7254 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7255 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
7256 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7257 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
7258 chroma_qp = ( h->chroma_qp +
7259 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7260 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7261 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7268 for( edge = start; edge < edges; edge++ ) {
7269 /* mbn_xy: neighbor macroblock */
7270 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
7271 const int mbn_type = s->current_picture.mb_type[mbn_xy];
7275 if( (edge&1) && IS_8x8DCT(mb_type) )
7278 if( IS_INTRA(mb_type) ||
7279 IS_INTRA(mbn_type) ) {
7282 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
7283 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
7292 bS[0] = bS[1] = bS[2] = bS[3] = value;
7297 if( edge & mask_edge ) {
7298 bS[0] = bS[1] = bS[2] = bS[3] = 0;
7301 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
7302 bS[0] = bS[1] = bS[2] = bS[3] = 1;
7305 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
7306 int b_idx= 8 + 4 + edge * (dir ? 8:1);
7307 int bn_idx= b_idx - (dir ? 8:1);
7309 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
7310 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7311 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7312 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
7314 bS[0] = bS[1] = bS[2] = bS[3] = v;
7320 for( i = 0; i < 4; i++ ) {
7321 int x = dir == 0 ? edge : i;
7322 int y = dir == 0 ? i : edge;
7323 int b_idx= 8 + 4 + x + 8*y;
7324 int bn_idx= b_idx - (dir ? 8:1);
7326 if( h->non_zero_count_cache[b_idx] != 0 ||
7327 h->non_zero_count_cache[bn_idx] != 0 ) {
7333 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
7334 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7335 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7336 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7344 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7349 // Do not use s->qscale as luma quantizer because it has not the same
7350 // value in IPCM macroblocks.
7351 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7352 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7353 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7354 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7356 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7357 if( (edge&1) == 0 ) {
7358 int chroma_qp = ( h->chroma_qp +
7359 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7360 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
7361 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
7364 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7365 if( (edge&1) == 0 ) {
7366 int chroma_qp = ( h->chroma_qp +
7367 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7368 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7369 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7376 static int decode_slice(H264Context *h){
7377 MpegEncContext * const s = &h->s;
7378 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7382 if( h->pps.cabac ) {
7386 align_get_bits( &s->gb );
7389 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
7390 ff_init_cabac_decoder( &h->cabac,
7391 s->gb.buffer + get_bits_count(&s->gb)/8,
7392 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7393 /* calculate pre-state */
7394 for( i= 0; i < 460; i++ ) {
7396 if( h->slice_type == I_TYPE )
7397 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7399 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7402 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
7404 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7408 int ret = decode_mb_cabac(h);
7411 if(ret>=0) hl_decode_mb(h);
7413 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7416 if(ret>=0) ret = decode_mb_cabac(h);
7418 if(ret>=0) hl_decode_mb(h);
7421 eos = get_cabac_terminate( &h->cabac );
7423 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
7424 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7425 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7429 if( ++s->mb_x >= s->mb_width ) {
7431 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7438 if( eos || s->mb_y >= s->mb_height ) {
7439 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7440 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7447 int ret = decode_mb_cavlc(h);
7449 if(ret>=0) hl_decode_mb(h);
7451 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7453 ret = decode_mb_cavlc(h);
7455 if(ret>=0) hl_decode_mb(h);
7460 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7461 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7466 if(++s->mb_x >= s->mb_width){
7468 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7473 if(s->mb_y >= s->mb_height){
7474 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7476 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7477 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7481 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7488 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7489 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7490 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7491 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7495 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7504 for(;s->mb_y < s->mb_height; s->mb_y++){
7505 for(;s->mb_x < s->mb_width; s->mb_x++){
7506 int ret= decode_mb(h);
7511 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7512 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7517 if(++s->mb_x >= s->mb_width){
7519 if(++s->mb_y >= s->mb_height){
7520 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7521 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7525 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7532 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7533 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7534 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7538 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7545 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7548 return -1; //not reached
7551 static int decode_unregistered_user_data(H264Context *h, int size){
7552 MpegEncContext * const s = &h->s;
7553 uint8_t user_data[16+256];
7559 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7560 user_data[i]= get_bits(&s->gb, 8);
7564 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7565 if(e==1 && build>=0)
7566 h->x264_build= build;
7568 if(s->avctx->debug & FF_DEBUG_BUGS)
7569 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7572 skip_bits(&s->gb, 8);
7577 static int decode_sei(H264Context *h){
7578 MpegEncContext * const s = &h->s;
7580 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7585 type+= show_bits(&s->gb, 8);
7586 }while(get_bits(&s->gb, 8) == 255);
7590 size+= show_bits(&s->gb, 8);
7591 }while(get_bits(&s->gb, 8) == 255);
7595 if(decode_unregistered_user_data(h, size) < 0)
7599 skip_bits(&s->gb, 8*size);
7602 //FIXME check bits here
7603 align_get_bits(&s->gb);
7609 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7610 MpegEncContext * const s = &h->s;
7612 cpb_count = get_ue_golomb(&s->gb) + 1;
7613 get_bits(&s->gb, 4); /* bit_rate_scale */
7614 get_bits(&s->gb, 4); /* cpb_size_scale */
7615 for(i=0; i<cpb_count; i++){
7616 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7617 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7618 get_bits1(&s->gb); /* cbr_flag */
7620 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7621 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7622 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7623 get_bits(&s->gb, 5); /* time_offset_length */
7626 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7627 MpegEncContext * const s = &h->s;
7628 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7629 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7631 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7633 if( aspect_ratio_info_present_flag ) {
7634 aspect_ratio_idc= get_bits(&s->gb, 8);
7635 if( aspect_ratio_idc == EXTENDED_SAR ) {
7636 sps->sar.num= get_bits(&s->gb, 16);
7637 sps->sar.den= get_bits(&s->gb, 16);
7638 }else if(aspect_ratio_idc < 14){
7639 sps->sar= pixel_aspect[aspect_ratio_idc];
7641 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7648 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7650 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7651 get_bits1(&s->gb); /* overscan_appropriate_flag */
7654 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7655 get_bits(&s->gb, 3); /* video_format */
7656 get_bits1(&s->gb); /* video_full_range_flag */
7657 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7658 get_bits(&s->gb, 8); /* colour_primaries */
7659 get_bits(&s->gb, 8); /* transfer_characteristics */
7660 get_bits(&s->gb, 8); /* matrix_coefficients */
7664 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7665 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7666 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7669 sps->timing_info_present_flag = get_bits1(&s->gb);
7670 if(sps->timing_info_present_flag){
7671 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7672 sps->time_scale = get_bits_long(&s->gb, 32);
7673 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7676 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7677 if(nal_hrd_parameters_present_flag)
7678 decode_hrd_parameters(h, sps);
7679 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7680 if(vcl_hrd_parameters_present_flag)
7681 decode_hrd_parameters(h, sps);
7682 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7683 get_bits1(&s->gb); /* low_delay_hrd_flag */
7684 get_bits1(&s->gb); /* pic_struct_present_flag */
7686 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7687 if(sps->bitstream_restriction_flag){
7688 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7689 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7690 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7691 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7692 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7693 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7694 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7700 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7701 const uint8_t *jvt_list, const uint8_t *fallback_list){
7702 MpegEncContext * const s = &h->s;
7703 int i, last = 8, next = 8;
7704 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7705 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7706 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7708 for(i=0;i<size;i++){
7710 next = (last + get_se_golomb(&s->gb)) & 0xff;
7711 if(!i && !next){ /* matrix not written, we use the preset one */
7712 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7715 last = factors[scan[i]] = next ? next : last;
7719 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7720 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7721 MpegEncContext * const s = &h->s;
7722 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7723 const uint8_t *fallback[4] = {
7724 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7725 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7726 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7727 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7729 if(get_bits1(&s->gb)){
7730 sps->scaling_matrix_present |= is_sps;
7731 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7732 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7733 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7734 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7735 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7736 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7737 if(is_sps || pps->transform_8x8_mode){
7738 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7739 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7741 } else if(fallback_sps) {
7742 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7743 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7747 static inline int decode_seq_parameter_set(H264Context *h){
7748 MpegEncContext * const s = &h->s;
7749 int profile_idc, level_idc;
7753 profile_idc= get_bits(&s->gb, 8);
7754 get_bits1(&s->gb); //constraint_set0_flag
7755 get_bits1(&s->gb); //constraint_set1_flag
7756 get_bits1(&s->gb); //constraint_set2_flag
7757 get_bits1(&s->gb); //constraint_set3_flag
7758 get_bits(&s->gb, 4); // reserved
7759 level_idc= get_bits(&s->gb, 8);
7760 sps_id= get_ue_golomb(&s->gb);
7762 sps= &h->sps_buffer[ sps_id ];
7763 sps->profile_idc= profile_idc;
7764 sps->level_idc= level_idc;
7766 if(sps->profile_idc >= 100){ //high profile
7767 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7768 get_bits1(&s->gb); //residual_color_transform_flag
7769 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7770 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7771 sps->transform_bypass = get_bits1(&s->gb);
7772 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7774 sps->scaling_matrix_present = 0;
7776 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7777 sps->poc_type= get_ue_golomb(&s->gb);
7779 if(sps->poc_type == 0){ //FIXME #define
7780 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7781 } else if(sps->poc_type == 1){//FIXME #define
7782 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7783 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7784 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7785 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7787 for(i=0; i<sps->poc_cycle_length; i++)
7788 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7790 if(sps->poc_type > 2){
7791 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7795 sps->ref_frame_count= get_ue_golomb(&s->gb);
7796 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7797 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7799 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7800 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7801 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7802 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7803 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7806 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7807 if(!sps->frame_mbs_only_flag)
7808 sps->mb_aff= get_bits1(&s->gb);
7812 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7814 #ifndef ALLOW_INTERLACE
7816 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it compilation time\n");
7818 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7819 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7821 sps->crop= get_bits1(&s->gb);
7823 sps->crop_left = get_ue_golomb(&s->gb);
7824 sps->crop_right = get_ue_golomb(&s->gb);
7825 sps->crop_top = get_ue_golomb(&s->gb);
7826 sps->crop_bottom= get_ue_golomb(&s->gb);
7827 if(sps->crop_left || sps->crop_top){
7828 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7834 sps->crop_bottom= 0;
7837 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7838 if( sps->vui_parameters_present_flag )
7839 decode_vui_parameters(h, sps);
7841 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7842 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7843 sps_id, sps->profile_idc, sps->level_idc,
7845 sps->ref_frame_count,
7846 sps->mb_width, sps->mb_height,
7847 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7848 sps->direct_8x8_inference_flag ? "8B8" : "",
7849 sps->crop_left, sps->crop_right,
7850 sps->crop_top, sps->crop_bottom,
7851 sps->vui_parameters_present_flag ? "VUI" : ""
7857 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7858 MpegEncContext * const s = &h->s;
7859 int pps_id= get_ue_golomb(&s->gb);
7860 PPS *pps= &h->pps_buffer[pps_id];
7862 pps->sps_id= get_ue_golomb(&s->gb);
7863 pps->cabac= get_bits1(&s->gb);
7864 pps->pic_order_present= get_bits1(&s->gb);
7865 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7866 if(pps->slice_group_count > 1 ){
7867 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7868 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7869 switch(pps->mb_slice_group_map_type){
7872 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7873 | run_length[ i ] |1 |ue(v) |
7878 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7880 | top_left_mb[ i ] |1 |ue(v) |
7881 | bottom_right_mb[ i ] |1 |ue(v) |
7889 | slice_group_change_direction_flag |1 |u(1) |
7890 | slice_group_change_rate_minus1 |1 |ue(v) |
7895 | slice_group_id_cnt_minus1 |1 |ue(v) |
7896 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7898 | slice_group_id[ i ] |1 |u(v) |
7903 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7904 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7905 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7906 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7910 pps->weighted_pred= get_bits1(&s->gb);
7911 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7912 pps->init_qp= get_se_golomb(&s->gb) + 26;
7913 pps->init_qs= get_se_golomb(&s->gb) + 26;
7914 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7915 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7916 pps->constrained_intra_pred= get_bits1(&s->gb);
7917 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7919 pps->transform_8x8_mode= 0;
7920 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7921 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7922 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7924 if(get_bits_count(&s->gb) < bit_length){
7925 pps->transform_8x8_mode= get_bits1(&s->gb);
7926 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7927 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7930 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7931 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7932 pps_id, pps->sps_id,
7933 pps->cabac ? "CABAC" : "CAVLC",
7934 pps->slice_group_count,
7935 pps->ref_count[0], pps->ref_count[1],
7936 pps->weighted_pred ? "weighted" : "",
7937 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7938 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7939 pps->constrained_intra_pred ? "CONSTR" : "",
7940 pps->redundant_pic_cnt_present ? "REDU" : "",
7941 pps->transform_8x8_mode ? "8x8DCT" : ""
7949 * finds the end of the current frame in the bitstream.
7950 * @return the position of the first byte of the next frame, or -1
7952 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7955 ParseContext *pc = &(h->s.parse_context);
7956 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7957 // mb_addr= pc->mb_addr - 1;
7959 for(i=0; i<=buf_size; i++){
7960 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7961 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7962 if(pc->frame_start_found){
7963 // If there isn't one more byte in the buffer
7964 // the test on first_mb_in_slice cannot be done yet
7965 // do it at next call.
7966 if (i >= buf_size) break;
7967 if (buf[i] & 0x80) {
7968 // first_mb_in_slice is 0, probably the first nal of a new
7970 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7972 pc->frame_start_found= 0;
7976 pc->frame_start_found = 1;
7978 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7979 if(pc->frame_start_found){
7981 pc->frame_start_found= 0;
7986 state= (state<<8) | buf[i];
7990 return END_NOT_FOUND;
7993 #ifdef CONFIG_H264_PARSER
7994 static int h264_parse(AVCodecParserContext *s,
7995 AVCodecContext *avctx,
7996 uint8_t **poutbuf, int *poutbuf_size,
7997 const uint8_t *buf, int buf_size)
7999 H264Context *h = s->priv_data;
8000 ParseContext *pc = &h->s.parse_context;
8003 next= find_frame_end(h, buf, buf_size);
8005 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
8011 *poutbuf = (uint8_t *)buf;
8012 *poutbuf_size = buf_size;
8016 static int h264_split(AVCodecContext *avctx,
8017 const uint8_t *buf, int buf_size)
8020 uint32_t state = -1;
8023 for(i=0; i<=buf_size; i++){
8024 if((state&0xFFFFFF1F) == 0x107)
8026 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
8028 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
8030 while(i>4 && buf[i-5]==0) i--;
8035 state= (state<<8) | buf[i];
8039 #endif /* CONFIG_H264_PARSER */
8041 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
8042 MpegEncContext * const s = &h->s;
8043 AVCodecContext * const avctx= s->avctx;
8047 for(i=0; i<50; i++){
8048 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
8052 s->current_picture_ptr= NULL;
8061 if(buf_index >= buf_size) break;
8063 for(i = 0; i < h->nal_length_size; i++)
8064 nalsize = (nalsize << 8) | buf[buf_index++];
8070 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
8075 // start code prefix search
8076 for(; buf_index + 3 < buf_size; buf_index++){
8077 // this should allways succeed in the first iteration
8078 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
8082 if(buf_index+3 >= buf_size) break;
8087 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
8088 while(ptr[dst_length - 1] == 0 && dst_length > 1)
8090 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
8092 if(s->avctx->debug&FF_DEBUG_STARTCODE){
8093 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
8096 if (h->is_avc && (nalsize != consumed))
8097 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
8099 buf_index += consumed;
8101 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
8102 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
8105 switch(h->nal_unit_type){
8107 idr(h); //FIXME ensure we don't loose some frames if there is reordering
8109 init_get_bits(&s->gb, ptr, bit_length);
8111 h->inter_gb_ptr= &s->gb;
8112 s->data_partitioning = 0;
8114 if(decode_slice_header(h) < 0){
8115 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8118 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
8119 if(h->redundant_pic_count==0 && s->hurry_up < 5
8120 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8121 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8122 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8123 && avctx->skip_frame < AVDISCARD_ALL)
8127 init_get_bits(&s->gb, ptr, bit_length);
8129 h->inter_gb_ptr= NULL;
8130 s->data_partitioning = 1;
8132 if(decode_slice_header(h) < 0){
8133 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8137 init_get_bits(&h->intra_gb, ptr, bit_length);
8138 h->intra_gb_ptr= &h->intra_gb;
8141 init_get_bits(&h->inter_gb, ptr, bit_length);
8142 h->inter_gb_ptr= &h->inter_gb;
8144 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
8146 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8147 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8148 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8149 && avctx->skip_frame < AVDISCARD_ALL)
8153 init_get_bits(&s->gb, ptr, bit_length);
8157 init_get_bits(&s->gb, ptr, bit_length);
8158 decode_seq_parameter_set(h);
8160 if(s->flags& CODEC_FLAG_LOW_DELAY)
8163 if(avctx->has_b_frames < 2)
8164 avctx->has_b_frames= !s->low_delay;
8167 init_get_bits(&s->gb, ptr, bit_length);
8169 decode_picture_parameter_set(h, bit_length);
8173 case NAL_END_SEQUENCE:
8174 case NAL_END_STREAM:
8175 case NAL_FILLER_DATA:
8177 case NAL_AUXILIARY_SLICE:
8180 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
8184 if(!s->current_picture_ptr) return buf_index; //no frame
8186 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
8187 s->current_picture_ptr->pict_type= s->pict_type;
8189 h->prev_frame_num_offset= h->frame_num_offset;
8190 h->prev_frame_num= h->frame_num;
8191 if(s->current_picture_ptr->reference){
8192 h->prev_poc_msb= h->poc_msb;
8193 h->prev_poc_lsb= h->poc_lsb;
8195 if(s->current_picture_ptr->reference)
8196 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
8206 * returns the number of bytes consumed for building the current frame
8208 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
8209 if(s->flags&CODEC_FLAG_TRUNCATED){
8210 pos -= s->parse_context.last_index;
8211 if(pos<0) pos=0; // FIXME remove (unneeded?)
8215 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
8216 if(pos+10>buf_size) pos=buf_size; // oops ;)
8222 static int decode_frame(AVCodecContext *avctx,
8223 void *data, int *data_size,
8224 uint8_t *buf, int buf_size)
8226 H264Context *h = avctx->priv_data;
8227 MpegEncContext *s = &h->s;
8228 AVFrame *pict = data;
8231 s->flags= avctx->flags;
8232 s->flags2= avctx->flags2;
8234 /* no supplementary picture */
8235 if (buf_size == 0) {
8239 if(s->flags&CODEC_FLAG_TRUNCATED){
8240 int next= find_frame_end(h, buf, buf_size);
8242 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
8244 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
8247 if(h->is_avc && !h->got_avcC) {
8248 int i, cnt, nalsize;
8249 unsigned char *p = avctx->extradata;
8250 if(avctx->extradata_size < 7) {
8251 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
8255 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
8258 /* sps and pps in the avcC always have length coded with 2 bytes,
8259 so put a fake nal_length_size = 2 while parsing them */
8260 h->nal_length_size = 2;
8261 // Decode sps from avcC
8262 cnt = *(p+5) & 0x1f; // Number of sps
8264 for (i = 0; i < cnt; i++) {
8265 nalsize = BE_16(p) + 2;
8266 if(decode_nal_units(h, p, nalsize) < 0) {
8267 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
8272 // Decode pps from avcC
8273 cnt = *(p++); // Number of pps
8274 for (i = 0; i < cnt; i++) {
8275 nalsize = BE_16(p) + 2;
8276 if(decode_nal_units(h, p, nalsize) != nalsize) {
8277 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
8282 // Now store right nal length size, that will be use to parse all other nals
8283 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
8284 // Do not reparse avcC
8288 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
8289 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
8293 buf_index=decode_nal_units(h, buf, buf_size);
8297 //FIXME do something with unavailable reference frames
8299 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
8300 if(!s->current_picture_ptr){
8301 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
8306 Picture *out = s->current_picture_ptr;
8307 #if 0 //decode order
8308 *data_size = sizeof(AVFrame);
8310 /* Sort B-frames into display order */
8311 Picture *cur = s->current_picture_ptr;
8312 Picture *prev = h->delayed_output_pic;
8313 int i, pics, cross_idr, out_of_order, out_idx;
8315 if(h->sps.bitstream_restriction_flag
8316 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
8317 s->avctx->has_b_frames = h->sps.num_reorder_frames;
8322 while(h->delayed_pic[pics]) pics++;
8323 h->delayed_pic[pics++] = cur;
8324 if(cur->reference == 0)
8328 for(i=0; h->delayed_pic[i]; i++)
8329 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
8332 out = h->delayed_pic[0];
8334 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8335 if(h->delayed_pic[i]->poc < out->poc){
8336 out = h->delayed_pic[i];
8340 out_of_order = !cross_idr && prev && out->poc < prev->poc;
8341 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
8343 else if(prev && pics <= s->avctx->has_b_frames)
8345 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8347 ((!cross_idr && prev && out->poc > prev->poc + 2)
8348 || cur->pict_type == B_TYPE)))
8351 s->avctx->has_b_frames++;
8354 else if(out_of_order)
8357 if(out_of_order || pics > s->avctx->has_b_frames){
8358 for(i=out_idx; h->delayed_pic[i]; i++)
8359 h->delayed_pic[i] = h->delayed_pic[i+1];
8365 *data_size = sizeof(AVFrame);
8366 if(prev && prev != out && prev->reference == 1)
8367 prev->reference = 0;
8368 h->delayed_output_pic = out;
8372 *pict= *(AVFrame*)out;
8374 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8377 assert(pict->data[0] || !*data_size);
8378 ff_print_debug_info(s, pict);
8379 //printf("out %d\n", (int)pict->data[0]);
8382 /* Return the Picture timestamp as the frame number */
8383 /* we substract 1 because it is added on utils.c */
8384 avctx->frame_number = s->picture_number - 1;
8386 return get_consumed_bytes(s, buf_index, buf_size);
8389 static inline void fill_mb_avail(H264Context *h){
8390 MpegEncContext * const s = &h->s;
8391 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8394 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8395 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8396 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8402 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8403 h->mb_avail[4]= 1; //FIXME move out
8404 h->mb_avail[5]= 0; //FIXME move out
8410 #define SIZE (COUNT*40)
8416 // int int_temp[10000];
8418 AVCodecContext avctx;
8420 dsputil_init(&dsp, &avctx);
8422 init_put_bits(&pb, temp, SIZE);
8423 printf("testing unsigned exp golomb\n");
8424 for(i=0; i<COUNT; i++){
8426 set_ue_golomb(&pb, i);
8427 STOP_TIMER("set_ue_golomb");
8429 flush_put_bits(&pb);
8431 init_get_bits(&gb, temp, 8*SIZE);
8432 for(i=0; i<COUNT; i++){
8435 s= show_bits(&gb, 24);
8438 j= get_ue_golomb(&gb);
8440 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8443 STOP_TIMER("get_ue_golomb");
8447 init_put_bits(&pb, temp, SIZE);
8448 printf("testing signed exp golomb\n");
8449 for(i=0; i<COUNT; i++){
8451 set_se_golomb(&pb, i - COUNT/2);
8452 STOP_TIMER("set_se_golomb");
8454 flush_put_bits(&pb);
8456 init_get_bits(&gb, temp, 8*SIZE);
8457 for(i=0; i<COUNT; i++){
8460 s= show_bits(&gb, 24);
8463 j= get_se_golomb(&gb);
8464 if(j != i - COUNT/2){
8465 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8468 STOP_TIMER("get_se_golomb");
8471 printf("testing 4x4 (I)DCT\n");
8474 uint8_t src[16], ref[16];
8475 uint64_t error= 0, max_error=0;
8477 for(i=0; i<COUNT; i++){
8479 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8480 for(j=0; j<16; j++){
8481 ref[j]= random()%255;
8482 src[j]= random()%255;
8485 h264_diff_dct_c(block, src, ref, 4);
8488 for(j=0; j<16; j++){
8489 // printf("%d ", block[j]);
8490 block[j]= block[j]*4;
8491 if(j&1) block[j]= (block[j]*4 + 2)/5;
8492 if(j&4) block[j]= (block[j]*4 + 2)/5;
8496 s->dsp.h264_idct_add(ref, block, 4);
8497 /* for(j=0; j<16; j++){
8498 printf("%d ", ref[j]);
8502 for(j=0; j<16; j++){
8503 int diff= ABS(src[j] - ref[j]);
8506 max_error= FFMAX(max_error, diff);
8509 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8511 printf("testing quantizer\n");
8512 for(qp=0; qp<52; qp++){
8514 src1_block[i]= src2_block[i]= random()%255;
8518 printf("Testing NAL layer\n");
8520 uint8_t bitstream[COUNT];
8521 uint8_t nal[COUNT*2];
8523 memset(&h, 0, sizeof(H264Context));
8525 for(i=0; i<COUNT; i++){
8533 for(j=0; j<COUNT; j++){
8534 bitstream[j]= (random() % 255) + 1;
8537 for(j=0; j<zeros; j++){
8538 int pos= random() % COUNT;
8539 while(bitstream[pos] == 0){
8548 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8550 printf("encoding failed\n");
8554 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8558 if(out_length != COUNT){
8559 printf("incorrect length %d %d\n", out_length, COUNT);
8563 if(consumed != nal_length){
8564 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8568 if(memcmp(bitstream, out, COUNT)){
8569 printf("missmatch\n");
8574 printf("Testing RBSP\n");
8582 static int decode_end(AVCodecContext *avctx)
8584 H264Context *h = avctx->priv_data;
8585 MpegEncContext *s = &h->s;
8587 av_freep(&h->rbsp_buffer);
8588 free_tables(h); //FIXME cleanup init stuff perhaps
8591 // memset(h, 0, sizeof(H264Context));
8597 AVCodec h264_decoder = {
8601 sizeof(H264Context),
8606 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8610 #ifdef CONFIG_H264_PARSER
8611 AVCodecParser h264_parser = {
8613 sizeof(H264Context),