2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * H.264 / AVC / MPEG4 part10 codec.
26 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
41 #define interlaced_dct interlaced_dct_is_a_bad_name
42 #define mb_intra mb_intra_isnt_initalized_see_mb_type
44 #define LUMA_DC_BLOCK_INDEX 25
45 #define CHROMA_DC_BLOCK_INDEX 26
47 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
48 #define COEFF_TOKEN_VLC_BITS 8
49 #define TOTAL_ZEROS_VLC_BITS 9
50 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
51 #define RUN_VLC_BITS 3
52 #define RUN7_VLC_BITS 6
54 #define MAX_SPS_COUNT 32
55 #define MAX_PPS_COUNT 256
57 #define MAX_MMCO_COUNT 66
59 /* Compiling in interlaced support reduces the speed
60 * of progressive decoding by about 2%. */
61 #define ALLOW_INTERLACE
63 #ifdef ALLOW_INTERLACE
64 #define MB_MBAFF h->mb_mbaff
65 #define MB_FIELD h->mb_field_decoding_flag
66 #define FRAME_MBAFF h->mb_aff_frame
72 #define IS_INTERLACED(mb_type) 0
76 * Sequence parameter set
82 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
83 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
84 int poc_type; ///< pic_order_cnt_type
85 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
86 int delta_pic_order_always_zero_flag;
87 int offset_for_non_ref_pic;
88 int offset_for_top_to_bottom_field;
89 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
90 int ref_frame_count; ///< num_ref_frames
91 int gaps_in_frame_num_allowed_flag;
92 int mb_width; ///< frame_width_in_mbs_minus1 + 1
93 int mb_height; ///< frame_height_in_mbs_minus1 + 1
94 int frame_mbs_only_flag;
95 int mb_aff; ///<mb_adaptive_frame_field_flag
96 int direct_8x8_inference_flag;
97 int crop; ///< frame_cropping_flag
98 int crop_left; ///< frame_cropping_rect_left_offset
99 int crop_right; ///< frame_cropping_rect_right_offset
100 int crop_top; ///< frame_cropping_rect_top_offset
101 int crop_bottom; ///< frame_cropping_rect_bottom_offset
102 int vui_parameters_present_flag;
104 int timing_info_present_flag;
105 uint32_t num_units_in_tick;
107 int fixed_frame_rate_flag;
108 short offset_for_ref_frame[256]; //FIXME dyn aloc?
109 int bitstream_restriction_flag;
110 int num_reorder_frames;
111 int scaling_matrix_present;
112 uint8_t scaling_matrix4[6][16];
113 uint8_t scaling_matrix8[2][64];
117 * Picture parameter set
121 int cabac; ///< entropy_coding_mode_flag
122 int pic_order_present; ///< pic_order_present_flag
123 int slice_group_count; ///< num_slice_groups_minus1 + 1
124 int mb_slice_group_map_type;
125 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
126 int weighted_pred; ///< weighted_pred_flag
127 int weighted_bipred_idc;
128 int init_qp; ///< pic_init_qp_minus26 + 26
129 int init_qs; ///< pic_init_qs_minus26 + 26
130 int chroma_qp_index_offset;
131 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
132 int constrained_intra_pred; ///< constrained_intra_pred_flag
133 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
134 int transform_8x8_mode; ///< transform_8x8_mode_flag
135 uint8_t scaling_matrix4[6][16];
136 uint8_t scaling_matrix8[2][64];
140 * Memory management control operation opcode.
142 typedef enum MMCOOpcode{
153 * Memory management control operation.
164 typedef struct H264Context{
172 #define NAL_IDR_SLICE 5
177 #define NAL_END_SEQUENCE 10
178 #define NAL_END_STREAM 11
179 #define NAL_FILLER_DATA 12
180 #define NAL_SPS_EXT 13
181 #define NAL_AUXILIARY_SLICE 19
182 uint8_t *rbsp_buffer;
183 unsigned int rbsp_buffer_size;
186 * Used to parse AVC variant of h264
188 int is_avc; ///< this flag is != 0 if codec is avc1
189 int got_avcC; ///< flag used to parse avcC data only once
190 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
198 int chroma_pred_mode;
199 int intra16x16_pred_mode;
204 int8_t intra4x4_pred_mode_cache[5*8];
205 int8_t (*intra4x4_pred_mode)[8];
206 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
207 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
208 void (*pred8x8 [4+3])(uint8_t *src, int stride);
209 void (*pred16x16[4+3])(uint8_t *src, int stride);
210 unsigned int topleft_samples_available;
211 unsigned int top_samples_available;
212 unsigned int topright_samples_available;
213 unsigned int left_samples_available;
214 uint8_t (*top_borders[2])[16+2*8];
215 uint8_t left_border[2*(17+2*9)];
218 * non zero coeff count cache.
219 * is 64 if not available.
221 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
222 uint8_t (*non_zero_count)[16];
225 * Motion vector cache.
227 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
228 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
229 #define LIST_NOT_USED -1 //FIXME rename?
230 #define PART_NOT_AVAILABLE -2
233 * is 1 if the specific list MV&references are set to 0,0,-2.
235 int mv_cache_clean[2];
238 * number of neighbors (top and/or left) that used 8x8 dct
240 int neighbor_transform_size;
243 * block_offset[ 0..23] for frame macroblocks
244 * block_offset[24..47] for field macroblocks
246 int block_offset[2*(16+8)];
248 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
250 int b_stride; //FIXME use s->b4_stride
253 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff
262 int unknown_svq3_flag;
263 int next_slice_index;
265 SPS sps_buffer[MAX_SPS_COUNT];
266 SPS sps; ///< current sps
268 PPS pps_buffer[MAX_PPS_COUNT];
272 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
274 uint32_t dequant4_buffer[6][52][16];
275 uint32_t dequant8_buffer[2][52][64];
276 uint32_t (*dequant4_coeff[6])[16];
277 uint32_t (*dequant8_coeff[2])[64];
278 int dequant_coeff_pps; ///< reinit tables when pps changes
281 uint8_t *slice_table_base;
282 uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
284 int slice_type_fixed;
286 //interlacing specific flags
288 int mb_field_decoding_flag;
289 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
296 int delta_poc_bottom;
299 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
300 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
301 int frame_num_offset; ///< for POC type 2
302 int prev_frame_num_offset; ///< for POC type 2
303 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
306 * frame_num for frames or 2*frame_num for field pics.
311 * max_frame_num or 2*max_frame_num for field pics.
315 //Weighted pred stuff
317 int use_weight_chroma;
318 int luma_log2_weight_denom;
319 int chroma_log2_weight_denom;
320 int luma_weight[2][48];
321 int luma_offset[2][48];
322 int chroma_weight[2][48][2];
323 int chroma_offset[2][48][2];
324 int implicit_weight[48][48];
327 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
328 int slice_alpha_c0_offset;
329 int slice_beta_offset;
331 int redundant_pic_count;
333 int direct_spatial_mv_pred;
334 int dist_scale_factor[16];
335 int dist_scale_factor_field[32];
336 int map_col_to_list0[2][16];
337 int map_col_to_list0_field[2][32];
340 * num_ref_idx_l0/1_active_minus1 + 1
342 int ref_count[2]; ///< counts frames or fields, depending on current mb mode
343 Picture *short_ref[32];
344 Picture *long_ref[32];
345 Picture default_ref_list[2][32];
346 Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs
347 Picture *delayed_pic[16]; //FIXME size?
348 Picture *delayed_output_pic;
351 * memory management control operations buffer.
353 MMCO mmco[MAX_MMCO_COUNT];
356 int long_ref_count; ///< number of actual long term references
357 int short_ref_count; ///< number of actual short term references
360 GetBitContext intra_gb;
361 GetBitContext inter_gb;
362 GetBitContext *intra_gb_ptr;
363 GetBitContext *inter_gb_ptr;
365 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
371 uint8_t cabac_state[460];
374 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
379 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
380 uint8_t *chroma_pred_mode_table;
381 int last_qscale_diff;
382 int16_t (*mvd_table[2])[2];
383 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
384 uint8_t *direct_table;
385 uint8_t direct_cache[5*8];
387 uint8_t zigzag_scan[16];
388 uint8_t zigzag_scan8x8[64];
389 uint8_t zigzag_scan8x8_cavlc[64];
390 uint8_t field_scan[16];
391 uint8_t field_scan8x8[64];
392 uint8_t field_scan8x8_cavlc[64];
393 const uint8_t *zigzag_scan_q0;
394 const uint8_t *zigzag_scan8x8_q0;
395 const uint8_t *zigzag_scan8x8_cavlc_q0;
396 const uint8_t *field_scan_q0;
397 const uint8_t *field_scan8x8_q0;
398 const uint8_t *field_scan8x8_cavlc_q0;
403 static VLC coeff_token_vlc[4];
404 static VLC chroma_dc_coeff_token_vlc;
406 static VLC total_zeros_vlc[15];
407 static VLC chroma_dc_total_zeros_vlc[3];
409 static VLC run_vlc[6];
412 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
413 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
414 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
415 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
417 static always_inline uint32_t pack16to32(int a, int b){
418 #ifdef WORDS_BIGENDIAN
419 return (b&0xFFFF) + (a<<16);
421 return (a&0xFFFF) + (b<<16);
427 * @param h height of the rectangle, should be a constant
428 * @param w width of the rectangle, should be a constant
429 * @param size the size of val (1 or 4), should be a constant
431 static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
432 uint8_t *p= (uint8_t*)vp;
433 assert(size==1 || size==4);
439 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
440 assert((stride&(w-1))==0);
442 const uint16_t v= size==4 ? val : val*0x0101;
443 *(uint16_t*)(p + 0*stride)= v;
445 *(uint16_t*)(p + 1*stride)= v;
447 *(uint16_t*)(p + 2*stride)=
448 *(uint16_t*)(p + 3*stride)= v;
450 const uint32_t v= size==4 ? val : val*0x01010101;
451 *(uint32_t*)(p + 0*stride)= v;
453 *(uint32_t*)(p + 1*stride)= v;
455 *(uint32_t*)(p + 2*stride)=
456 *(uint32_t*)(p + 3*stride)= v;
458 //gcc can't optimize 64bit math on x86_32
459 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
460 const uint64_t v= val*0x0100000001ULL;
461 *(uint64_t*)(p + 0*stride)= v;
463 *(uint64_t*)(p + 1*stride)= v;
465 *(uint64_t*)(p + 2*stride)=
466 *(uint64_t*)(p + 3*stride)= v;
468 const uint64_t v= val*0x0100000001ULL;
469 *(uint64_t*)(p + 0+0*stride)=
470 *(uint64_t*)(p + 8+0*stride)=
471 *(uint64_t*)(p + 0+1*stride)=
472 *(uint64_t*)(p + 8+1*stride)= v;
474 *(uint64_t*)(p + 0+2*stride)=
475 *(uint64_t*)(p + 8+2*stride)=
476 *(uint64_t*)(p + 0+3*stride)=
477 *(uint64_t*)(p + 8+3*stride)= v;
479 *(uint32_t*)(p + 0+0*stride)=
480 *(uint32_t*)(p + 4+0*stride)= val;
482 *(uint32_t*)(p + 0+1*stride)=
483 *(uint32_t*)(p + 4+1*stride)= val;
485 *(uint32_t*)(p + 0+2*stride)=
486 *(uint32_t*)(p + 4+2*stride)=
487 *(uint32_t*)(p + 0+3*stride)=
488 *(uint32_t*)(p + 4+3*stride)= val;
490 *(uint32_t*)(p + 0+0*stride)=
491 *(uint32_t*)(p + 4+0*stride)=
492 *(uint32_t*)(p + 8+0*stride)=
493 *(uint32_t*)(p +12+0*stride)=
494 *(uint32_t*)(p + 0+1*stride)=
495 *(uint32_t*)(p + 4+1*stride)=
496 *(uint32_t*)(p + 8+1*stride)=
497 *(uint32_t*)(p +12+1*stride)= val;
499 *(uint32_t*)(p + 0+2*stride)=
500 *(uint32_t*)(p + 4+2*stride)=
501 *(uint32_t*)(p + 8+2*stride)=
502 *(uint32_t*)(p +12+2*stride)=
503 *(uint32_t*)(p + 0+3*stride)=
504 *(uint32_t*)(p + 4+3*stride)=
505 *(uint32_t*)(p + 8+3*stride)=
506 *(uint32_t*)(p +12+3*stride)= val;
513 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
514 MpegEncContext * const s = &h->s;
515 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
516 int topleft_xy, top_xy, topright_xy, left_xy[2];
517 int topleft_type, top_type, topright_type, left_type[2];
521 //FIXME deblocking could skip the intra and nnz parts.
522 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
525 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
527 top_xy = mb_xy - s->mb_stride;
528 topleft_xy = top_xy - 1;
529 topright_xy= top_xy + 1;
530 left_xy[1] = left_xy[0] = mb_xy-1;
540 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
541 const int top_pair_xy = pair_xy - s->mb_stride;
542 const int topleft_pair_xy = top_pair_xy - 1;
543 const int topright_pair_xy = top_pair_xy + 1;
544 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
545 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
546 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
547 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
548 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
549 const int bottom = (s->mb_y & 1);
550 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
552 ? !curr_mb_frame_flag // bottom macroblock
553 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
555 top_xy -= s->mb_stride;
558 ? !curr_mb_frame_flag // bottom macroblock
559 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
561 topleft_xy -= s->mb_stride;
564 ? !curr_mb_frame_flag // bottom macroblock
565 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
567 topright_xy -= s->mb_stride;
569 if (left_mb_frame_flag != curr_mb_frame_flag) {
570 left_xy[1] = left_xy[0] = pair_xy - 1;
571 if (curr_mb_frame_flag) {
592 left_xy[1] += s->mb_stride;
605 h->top_mb_xy = top_xy;
606 h->left_mb_xy[0] = left_xy[0];
607 h->left_mb_xy[1] = left_xy[1];
611 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
612 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
613 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
615 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
617 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
619 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
620 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
621 if(USES_LIST(mb_type,list)){
622 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
623 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
624 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
625 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
631 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
632 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
634 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
635 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
637 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
638 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
643 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
644 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
645 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
646 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
647 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
650 if(IS_INTRA(mb_type)){
651 h->topleft_samples_available=
652 h->top_samples_available=
653 h->left_samples_available= 0xFFFF;
654 h->topright_samples_available= 0xEEEA;
656 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
657 h->topleft_samples_available= 0xB3FF;
658 h->top_samples_available= 0x33FF;
659 h->topright_samples_available= 0x26EA;
662 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
663 h->topleft_samples_available&= 0xDF5F;
664 h->left_samples_available&= 0x5F5F;
668 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
669 h->topleft_samples_available&= 0x7FFF;
671 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
672 h->topright_samples_available&= 0xFBFF;
674 if(IS_INTRA4x4(mb_type)){
675 if(IS_INTRA4x4(top_type)){
676 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
677 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
678 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
679 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
682 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
687 h->intra4x4_pred_mode_cache[4+8*0]=
688 h->intra4x4_pred_mode_cache[5+8*0]=
689 h->intra4x4_pred_mode_cache[6+8*0]=
690 h->intra4x4_pred_mode_cache[7+8*0]= pred;
693 if(IS_INTRA4x4(left_type[i])){
694 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
695 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
698 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
703 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
704 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
719 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
721 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
722 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
723 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
724 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
726 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
727 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
729 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
730 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
733 h->non_zero_count_cache[4+8*0]=
734 h->non_zero_count_cache[5+8*0]=
735 h->non_zero_count_cache[6+8*0]=
736 h->non_zero_count_cache[7+8*0]=
738 h->non_zero_count_cache[1+8*0]=
739 h->non_zero_count_cache[2+8*0]=
741 h->non_zero_count_cache[1+8*3]=
742 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
746 for (i=0; i<2; i++) {
748 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
749 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
750 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
751 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
753 h->non_zero_count_cache[3+8*1 + 2*8*i]=
754 h->non_zero_count_cache[3+8*2 + 2*8*i]=
755 h->non_zero_count_cache[0+8*1 + 8*i]=
756 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
763 h->top_cbp = h->cbp_table[top_xy];
764 } else if(IS_INTRA(mb_type)) {
771 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
772 } else if(IS_INTRA(mb_type)) {
778 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
781 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
786 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
788 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
789 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
790 /*if(!h->mv_cache_clean[list]){
791 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
792 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
793 h->mv_cache_clean[list]= 1;
797 h->mv_cache_clean[list]= 0;
799 if(USES_LIST(top_type, list)){
800 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
801 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
802 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
803 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
804 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
805 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
806 h->ref_cache[list][scan8[0] + 0 - 1*8]=
807 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
808 h->ref_cache[list][scan8[0] + 2 - 1*8]=
809 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
811 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
812 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
813 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
814 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
815 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
818 //FIXME unify cleanup or sth
819 if(USES_LIST(left_type[0], list)){
820 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
821 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
822 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
823 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
824 h->ref_cache[list][scan8[0] - 1 + 0*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
825 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1]>>1)];
827 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
828 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
829 h->ref_cache[list][scan8[0] - 1 + 0*8]=
830 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
833 if(USES_LIST(left_type[1], list)){
834 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
835 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
836 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
837 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
838 h->ref_cache[list][scan8[0] - 1 + 2*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
839 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[3]>>1)];
841 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
842 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
843 h->ref_cache[list][scan8[0] - 1 + 2*8]=
844 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
845 assert((!left_type[0]) == (!left_type[1]));
848 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
851 if(USES_LIST(topleft_type, list)){
852 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
853 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
854 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
855 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
857 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
858 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
861 if(USES_LIST(topright_type, list)){
862 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
863 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
864 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
865 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
867 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
868 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
871 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
874 h->ref_cache[list][scan8[5 ]+1] =
875 h->ref_cache[list][scan8[7 ]+1] =
876 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
877 h->ref_cache[list][scan8[4 ]] =
878 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
879 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
880 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
881 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
882 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
883 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
886 /* XXX beurk, Load mvd */
887 if(USES_LIST(top_type, list)){
888 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
889 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
890 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
891 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
892 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
894 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
895 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
896 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
897 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
899 if(USES_LIST(left_type[0], list)){
900 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
901 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
902 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
904 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
905 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
907 if(USES_LIST(left_type[1], list)){
908 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
909 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
910 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
912 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
913 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
915 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
916 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
917 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
918 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
919 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
921 if(h->slice_type == B_TYPE){
922 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
924 if(IS_DIRECT(top_type)){
925 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
926 }else if(IS_8X8(top_type)){
927 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
928 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
929 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
931 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
934 if(IS_DIRECT(left_type[0]))
935 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
936 else if(IS_8X8(left_type[0]))
937 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
939 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
941 if(IS_DIRECT(left_type[1]))
942 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
943 else if(IS_8X8(left_type[1]))
944 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
946 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
952 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
953 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
954 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
955 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
956 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
957 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
958 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
959 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
960 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
961 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
963 #define MAP_F2F(idx, mb_type)\
964 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
965 h->ref_cache[list][idx] <<= 1;\
966 h->mv_cache[list][idx][1] /= 2;\
967 h->mvd_cache[list][idx][1] /= 2;\
972 #define MAP_F2F(idx, mb_type)\
973 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
974 h->ref_cache[list][idx] >>= 1;\
975 h->mv_cache[list][idx][1] <<= 1;\
976 h->mvd_cache[list][idx][1] <<= 1;\
986 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
989 static inline void write_back_intra_pred_mode(H264Context *h){
990 MpegEncContext * const s = &h->s;
991 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
993 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
994 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
995 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
996 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
997 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
998 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
999 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
1003 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1005 static inline int check_intra4x4_pred_mode(H264Context *h){
1006 MpegEncContext * const s = &h->s;
1007 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
1008 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
1011 if(!(h->top_samples_available&0x8000)){
1013 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
1015 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1018 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
1023 if(!(h->left_samples_available&0x8000)){
1025 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
1027 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1030 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
1036 } //FIXME cleanup like next
1039 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1041 static inline int check_intra_pred_mode(H264Context *h, int mode){
1042 MpegEncContext * const s = &h->s;
1043 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
1044 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
1046 if(mode < 0 || mode > 6) {
1047 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
1051 if(!(h->top_samples_available&0x8000)){
1054 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1059 if(!(h->left_samples_available&0x8000)){
1062 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1071 * gets the predicted intra4x4 prediction mode.
1073 static inline int pred_intra_mode(H264Context *h, int n){
1074 const int index8= scan8[n];
1075 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
1076 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
1077 const int min= FFMIN(left, top);
1079 tprintf("mode:%d %d min:%d\n", left ,top, min);
1081 if(min<0) return DC_PRED;
1085 static inline void write_back_non_zero_count(H264Context *h){
1086 MpegEncContext * const s = &h->s;
1087 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1089 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
1090 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
1091 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
1092 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
1093 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
1094 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
1095 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
1097 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
1098 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
1099 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
1101 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1102 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1103 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1106 // store all luma nnzs, for deblocking
1109 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
1110 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
1115 * gets the predicted number of non zero coefficients.
1116 * @param n block index
1118 static inline int pred_non_zero_count(H264Context *h, int n){
1119 const int index8= scan8[n];
1120 const int left= h->non_zero_count_cache[index8 - 1];
1121 const int top = h->non_zero_count_cache[index8 - 8];
1124 if(i<64) i= (i+1)>>1;
1126 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1131 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1132 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1134 /* there is no consistent mapping of mvs to neighboring locations that will
1135 * make mbaff happy, so we can't move all this logic to fill_caches */
1137 MpegEncContext *s = &h->s;
1138 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
1140 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
1141 *C = h->mv_cache[list][scan8[0]-2];
1144 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
1145 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
1146 if(IS_INTERLACED(mb_types[topright_xy])){
1147 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
1148 const int x4 = X4, y4 = Y4;\
1149 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
1150 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
1151 return LIST_NOT_USED;\
1152 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
1153 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
1154 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
1155 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
1157 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
1160 if(topright_ref == PART_NOT_AVAILABLE
1161 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
1162 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
1164 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
1165 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
1168 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
1169 && i >= scan8[0]+8){
1170 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
1171 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
1177 if(topright_ref != PART_NOT_AVAILABLE){
1178 *C= h->mv_cache[list][ i - 8 + part_width ];
1179 return topright_ref;
1181 tprintf("topright MV not available\n");
1183 *C= h->mv_cache[list][ i - 8 - 1 ];
1184 return h->ref_cache[list][ i - 8 - 1 ];
1189 * gets the predicted MV.
1190 * @param n the block index
1191 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1192 * @param mx the x component of the predicted motion vector
1193 * @param my the y component of the predicted motion vector
1195 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1196 const int index8= scan8[n];
1197 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1198 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1199 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1200 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1202 int diagonal_ref, match_count;
1204 assert(part_width==1 || part_width==2 || part_width==4);
1214 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1215 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1216 tprintf("pred_motion match_count=%d\n", match_count);
1217 if(match_count > 1){ //most common
1218 *mx= mid_pred(A[0], B[0], C[0]);
1219 *my= mid_pred(A[1], B[1], C[1]);
1220 }else if(match_count==1){
1224 }else if(top_ref==ref){
1232 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1236 *mx= mid_pred(A[0], B[0], C[0]);
1237 *my= mid_pred(A[1], B[1], C[1]);
1241 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1245 * gets the directionally predicted 16x8 MV.
1246 * @param n the block index
1247 * @param mx the x component of the predicted motion vector
1248 * @param my the y component of the predicted motion vector
1250 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1252 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1253 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1255 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1263 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1264 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1266 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1268 if(left_ref == ref){
1276 pred_motion(h, n, 4, list, ref, mx, my);
1280 * gets the directionally predicted 8x16 MV.
1281 * @param n the block index
1282 * @param mx the x component of the predicted motion vector
1283 * @param my the y component of the predicted motion vector
1285 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1287 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1288 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1290 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1292 if(left_ref == ref){
1301 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1303 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1305 if(diagonal_ref == ref){
1313 pred_motion(h, n, 2, list, ref, mx, my);
1316 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1317 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1318 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1320 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1322 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1323 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1324 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1330 pred_motion(h, 0, 4, 0, 0, mx, my);
1335 static inline void direct_dist_scale_factor(H264Context * const h){
1336 const int poc = h->s.current_picture_ptr->poc;
1337 const int poc1 = h->ref_list[1][0].poc;
1339 for(i=0; i<h->ref_count[0]; i++){
1340 int poc0 = h->ref_list[0][i].poc;
1341 int td = clip(poc1 - poc0, -128, 127);
1342 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1343 h->dist_scale_factor[i] = 256;
1345 int tb = clip(poc - poc0, -128, 127);
1346 int tx = (16384 + (ABS(td) >> 1)) / td;
1347 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1351 for(i=0; i<h->ref_count[0]; i++){
1352 h->dist_scale_factor_field[2*i] =
1353 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
1357 static inline void direct_ref_list_init(H264Context * const h){
1358 MpegEncContext * const s = &h->s;
1359 Picture * const ref1 = &h->ref_list[1][0];
1360 Picture * const cur = s->current_picture_ptr;
1362 if(cur->pict_type == I_TYPE)
1363 cur->ref_count[0] = 0;
1364 if(cur->pict_type != B_TYPE)
1365 cur->ref_count[1] = 0;
1366 for(list=0; list<2; list++){
1367 cur->ref_count[list] = h->ref_count[list];
1368 for(j=0; j<h->ref_count[list]; j++)
1369 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1371 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1373 for(list=0; list<2; list++){
1374 for(i=0; i<ref1->ref_count[list]; i++){
1375 const int poc = ref1->ref_poc[list][i];
1376 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1377 for(j=0; j<h->ref_count[list]; j++)
1378 if(h->ref_list[list][j].poc == poc){
1379 h->map_col_to_list0[list][i] = j;
1385 for(list=0; list<2; list++){
1386 for(i=0; i<ref1->ref_count[list]; i++){
1387 j = h->map_col_to_list0[list][i];
1388 h->map_col_to_list0_field[list][2*i] = 2*j;
1389 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1395 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1396 MpegEncContext * const s = &h->s;
1397 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1398 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1399 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1400 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1401 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1402 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1403 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1404 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1405 const int is_b8x8 = IS_8X8(*mb_type);
1409 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1410 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1411 /* FIXME save sub mb types from previous frames (or derive from MVs)
1412 * so we know exactly what block size to use */
1413 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1414 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1415 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1416 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1417 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1419 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1420 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1423 *mb_type |= MB_TYPE_DIRECT2;
1425 *mb_type |= MB_TYPE_INTERLACED;
1427 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1429 if(h->direct_spatial_mv_pred){
1434 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1436 /* ref = min(neighbors) */
1437 for(list=0; list<2; list++){
1438 int refa = h->ref_cache[list][scan8[0] - 1];
1439 int refb = h->ref_cache[list][scan8[0] - 8];
1440 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1442 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1444 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1446 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1452 if(ref[0] < 0 && ref[1] < 0){
1453 ref[0] = ref[1] = 0;
1454 mv[0][0] = mv[0][1] =
1455 mv[1][0] = mv[1][1] = 0;
1457 for(list=0; list<2; list++){
1459 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1461 mv[list][0] = mv[list][1] = 0;
1466 *mb_type &= ~MB_TYPE_P0L1;
1467 sub_mb_type &= ~MB_TYPE_P0L1;
1468 }else if(ref[0] < 0){
1469 *mb_type &= ~MB_TYPE_P0L0;
1470 sub_mb_type &= ~MB_TYPE_P0L0;
1473 if(IS_16X16(*mb_type)){
1474 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1475 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1476 if(!IS_INTRA(mb_type_col)
1477 && ( (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1)
1478 || (l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1
1479 && (h->x264_build>33 || !h->x264_build)))){
1481 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1483 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1485 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1487 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1489 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1490 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1493 for(i8=0; i8<4; i8++){
1494 const int x8 = i8&1;
1495 const int y8 = i8>>1;
1497 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1499 h->sub_mb_type[i8] = sub_mb_type;
1501 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1502 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1503 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1504 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1507 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1508 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1509 && (h->x264_build>33 || !h->x264_build)))){
1510 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1511 if(IS_SUB_8X8(sub_mb_type)){
1512 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1513 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1515 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1517 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1520 for(i4=0; i4<4; i4++){
1521 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1522 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1524 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1526 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1532 }else{ /* direct temporal mv pred */
1533 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1534 const int *dist_scale_factor = h->dist_scale_factor;
1537 if(IS_INTERLACED(*mb_type)){
1538 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1539 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1540 dist_scale_factor = h->dist_scale_factor_field;
1542 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1543 /* FIXME assumes direct_8x8_inference == 1 */
1544 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1545 int mb_types_col[2];
1548 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1549 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1550 | (*mb_type & MB_TYPE_INTERLACED);
1551 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1553 if(IS_INTERLACED(*mb_type)){
1554 /* frame to field scaling */
1555 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1556 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1558 l1ref0 -= 2*h->b8_stride;
1559 l1ref1 -= 2*h->b8_stride;
1560 l1mv0 -= 4*h->b_stride;
1561 l1mv1 -= 4*h->b_stride;
1565 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1566 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1568 *mb_type |= MB_TYPE_16x8;
1570 *mb_type |= MB_TYPE_8x8;
1572 /* field to frame scaling */
1573 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1574 * but in MBAFF, top and bottom POC are equal */
1575 int dy = (s->mb_y&1) ? 1 : 2;
1577 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1578 l1ref0 += dy*h->b8_stride;
1579 l1ref1 += dy*h->b8_stride;
1580 l1mv0 += 2*dy*h->b_stride;
1581 l1mv1 += 2*dy*h->b_stride;
1584 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1586 *mb_type |= MB_TYPE_16x16;
1588 *mb_type |= MB_TYPE_8x8;
1591 for(i8=0; i8<4; i8++){
1592 const int x8 = i8&1;
1593 const int y8 = i8>>1;
1595 const int16_t (*l1mv)[2]= l1mv0;
1597 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1599 h->sub_mb_type[i8] = sub_mb_type;
1601 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1602 if(IS_INTRA(mb_types_col[y8])){
1603 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1604 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1605 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1609 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1611 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1613 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1616 scale = dist_scale_factor[ref0];
1617 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1620 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1621 int my_col = (mv_col[1]<<y_shift)/2;
1622 int mx = (scale * mv_col[0] + 128) >> 8;
1623 int my = (scale * my_col + 128) >> 8;
1624 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1625 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1632 /* one-to-one mv scaling */
1634 if(IS_16X16(*mb_type)){
1635 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1636 if(IS_INTRA(mb_type_col)){
1637 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1638 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1639 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1641 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1642 : map_col_to_list0[1][l1ref1[0]];
1643 const int scale = dist_scale_factor[ref0];
1644 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1646 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1647 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1648 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1649 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1650 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1653 for(i8=0; i8<4; i8++){
1654 const int x8 = i8&1;
1655 const int y8 = i8>>1;
1657 const int16_t (*l1mv)[2]= l1mv0;
1659 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1661 h->sub_mb_type[i8] = sub_mb_type;
1662 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1663 if(IS_INTRA(mb_type_col)){
1664 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1665 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1666 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1670 ref0 = l1ref0[x8 + y8*h->b8_stride];
1672 ref0 = map_col_to_list0[0][ref0];
1674 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1677 scale = dist_scale_factor[ref0];
1679 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1680 if(IS_SUB_8X8(sub_mb_type)){
1681 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1682 int mx = (scale * mv_col[0] + 128) >> 8;
1683 int my = (scale * mv_col[1] + 128) >> 8;
1684 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1685 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1687 for(i4=0; i4<4; i4++){
1688 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1689 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1690 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1691 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1692 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1693 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1700 static inline void write_back_motion(H264Context *h, int mb_type){
1701 MpegEncContext * const s = &h->s;
1702 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1703 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1706 if(!USES_LIST(mb_type, 0))
1707 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1709 for(list=0; list<2; list++){
1711 if(!USES_LIST(mb_type, list))
1715 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1716 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1718 if( h->pps.cabac ) {
1719 if(IS_SKIP(mb_type))
1720 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1723 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1724 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1729 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1730 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1731 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1732 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1733 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1737 if(h->slice_type == B_TYPE && h->pps.cabac){
1738 if(IS_8X8(mb_type)){
1739 uint8_t *direct_table = &h->direct_table[b8_xy];
1740 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1741 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1742 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1748 * Decodes a network abstraction layer unit.
1749 * @param consumed is the number of bytes used as input
1750 * @param length is the length of the array
1751 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1752 * @returns decoded bytes, might be src+1 if no escapes
1754 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1758 // src[0]&0x80; //forbidden bit
1759 h->nal_ref_idc= src[0]>>5;
1760 h->nal_unit_type= src[0]&0x1F;
1764 for(i=0; i<length; i++)
1765 printf("%2X ", src[i]);
1767 for(i=0; i+1<length; i+=2){
1768 if(src[i]) continue;
1769 if(i>0 && src[i-1]==0) i--;
1770 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1772 /* startcode, so we must be past the end */
1779 if(i>=length-1){ //no escaped 0
1780 *dst_length= length;
1781 *consumed= length+1; //+1 for the header
1785 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1786 dst= h->rbsp_buffer;
1788 //printf("decoding esc\n");
1791 //remove escapes (very rare 1:2^22)
1792 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1793 if(src[si+2]==3){ //escape
1798 }else //next start code
1802 dst[di++]= src[si++];
1806 *consumed= si + 1;//+1 for the header
1807 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1813 * @param src the data which should be escaped
1814 * @param dst the target buffer, dst+1 == src is allowed as a special case
1815 * @param length the length of the src data
1816 * @param dst_length the length of the dst array
1817 * @returns length of escaped data in bytes or -1 if an error occured
1819 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1820 int i, escape_count, si, di;
1824 assert(dst_length>0);
1826 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1828 if(length==0) return 1;
1831 for(i=0; i<length; i+=2){
1832 if(src[i]) continue;
1833 if(i>0 && src[i-1]==0)
1835 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1841 if(escape_count==0){
1843 memcpy(dst+1, src, length);
1847 if(length + escape_count + 1> dst_length)
1850 //this should be damn rare (hopefully)
1852 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1853 temp= h->rbsp_buffer;
1854 //printf("encoding esc\n");
1859 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1860 temp[di++]= 0; si++;
1861 temp[di++]= 0; si++;
1863 temp[di++]= src[si++];
1866 temp[di++]= src[si++];
1868 memcpy(dst+1, temp, length+escape_count);
1870 assert(di == length+escape_count);
1876 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1878 static void encode_rbsp_trailing(PutBitContext *pb){
1881 length= (-put_bits_count(pb))&7;
1882 if(length) put_bits(pb, length, 0);
1887 * identifies the exact end of the bitstream
1888 * @return the length of the trailing, or 0 if damaged
1890 static int decode_rbsp_trailing(uint8_t *src){
1894 tprintf("rbsp trailing %X\n", v);
1904 * idct tranforms the 16 dc values and dequantize them.
1905 * @param qp quantization parameter
1907 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1910 int temp[16]; //FIXME check if this is a good idea
1911 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1912 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1914 //memset(block, 64, 2*256);
1917 const int offset= y_offset[i];
1918 const int z0= block[offset+stride*0] + block[offset+stride*4];
1919 const int z1= block[offset+stride*0] - block[offset+stride*4];
1920 const int z2= block[offset+stride*1] - block[offset+stride*5];
1921 const int z3= block[offset+stride*1] + block[offset+stride*5];
1930 const int offset= x_offset[i];
1931 const int z0= temp[4*0+i] + temp[4*2+i];
1932 const int z1= temp[4*0+i] - temp[4*2+i];
1933 const int z2= temp[4*1+i] - temp[4*3+i];
1934 const int z3= temp[4*1+i] + temp[4*3+i];
1936 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1937 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1938 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1939 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1945 * dct tranforms the 16 dc values.
1946 * @param qp quantization parameter ??? FIXME
1948 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1949 // const int qmul= dequant_coeff[qp][0];
1951 int temp[16]; //FIXME check if this is a good idea
1952 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1953 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1956 const int offset= y_offset[i];
1957 const int z0= block[offset+stride*0] + block[offset+stride*4];
1958 const int z1= block[offset+stride*0] - block[offset+stride*4];
1959 const int z2= block[offset+stride*1] - block[offset+stride*5];
1960 const int z3= block[offset+stride*1] + block[offset+stride*5];
1969 const int offset= x_offset[i];
1970 const int z0= temp[4*0+i] + temp[4*2+i];
1971 const int z1= temp[4*0+i] - temp[4*2+i];
1972 const int z2= temp[4*1+i] - temp[4*3+i];
1973 const int z3= temp[4*1+i] + temp[4*3+i];
1975 block[stride*0 +offset]= (z0 + z3)>>1;
1976 block[stride*2 +offset]= (z1 + z2)>>1;
1977 block[stride*8 +offset]= (z1 - z2)>>1;
1978 block[stride*10+offset]= (z0 - z3)>>1;
1986 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1987 const int stride= 16*2;
1988 const int xStride= 16;
1991 a= block[stride*0 + xStride*0];
1992 b= block[stride*0 + xStride*1];
1993 c= block[stride*1 + xStride*0];
1994 d= block[stride*1 + xStride*1];
2001 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
2002 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
2003 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
2004 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
2008 static void chroma_dc_dct_c(DCTELEM *block){
2009 const int stride= 16*2;
2010 const int xStride= 16;
2013 a= block[stride*0 + xStride*0];
2014 b= block[stride*0 + xStride*1];
2015 c= block[stride*1 + xStride*0];
2016 d= block[stride*1 + xStride*1];
2023 block[stride*0 + xStride*0]= (a+c);
2024 block[stride*0 + xStride*1]= (e+b);
2025 block[stride*1 + xStride*0]= (a-c);
2026 block[stride*1 + xStride*1]= (e-b);
2031 * gets the chroma qp.
2033 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
2035 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
2040 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
2042 //FIXME try int temp instead of block
2045 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
2046 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
2047 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
2048 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
2049 const int z0= d0 + d3;
2050 const int z3= d0 - d3;
2051 const int z1= d1 + d2;
2052 const int z2= d1 - d2;
2054 block[0 + 4*i]= z0 + z1;
2055 block[1 + 4*i]= 2*z3 + z2;
2056 block[2 + 4*i]= z0 - z1;
2057 block[3 + 4*i]= z3 - 2*z2;
2061 const int z0= block[0*4 + i] + block[3*4 + i];
2062 const int z3= block[0*4 + i] - block[3*4 + i];
2063 const int z1= block[1*4 + i] + block[2*4 + i];
2064 const int z2= block[1*4 + i] - block[2*4 + i];
2066 block[0*4 + i]= z0 + z1;
2067 block[1*4 + i]= 2*z3 + z2;
2068 block[2*4 + i]= z0 - z1;
2069 block[3*4 + i]= z3 - 2*z2;
2074 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
2075 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
2076 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
2078 const int * const quant_table= quant_coeff[qscale];
2079 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
2080 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
2081 const unsigned int threshold2= (threshold1<<1);
2087 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
2088 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
2089 const unsigned int dc_threshold2= (dc_threshold1<<1);
2091 int level= block[0]*quant_coeff[qscale+18][0];
2092 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2094 level= (dc_bias + level)>>(QUANT_SHIFT-2);
2097 level= (dc_bias - level)>>(QUANT_SHIFT-2);
2100 // last_non_zero = i;
2105 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
2106 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
2107 const unsigned int dc_threshold2= (dc_threshold1<<1);
2109 int level= block[0]*quant_table[0];
2110 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2112 level= (dc_bias + level)>>(QUANT_SHIFT+1);
2115 level= (dc_bias - level)>>(QUANT_SHIFT+1);
2118 // last_non_zero = i;
2131 const int j= scantable[i];
2132 int level= block[j]*quant_table[j];
2134 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
2135 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
2136 if(((unsigned)(level+threshold1))>threshold2){
2138 level= (bias + level)>>QUANT_SHIFT;
2141 level= (bias - level)>>QUANT_SHIFT;
2150 return last_non_zero;
2153 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
2154 const uint32_t a= ((uint32_t*)(src-stride))[0];
2155 ((uint32_t*)(src+0*stride))[0]= a;
2156 ((uint32_t*)(src+1*stride))[0]= a;
2157 ((uint32_t*)(src+2*stride))[0]= a;
2158 ((uint32_t*)(src+3*stride))[0]= a;
2161 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
2162 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
2163 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
2164 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
2165 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
2168 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
2169 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
2170 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
2172 ((uint32_t*)(src+0*stride))[0]=
2173 ((uint32_t*)(src+1*stride))[0]=
2174 ((uint32_t*)(src+2*stride))[0]=
2175 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2178 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
2179 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
2181 ((uint32_t*)(src+0*stride))[0]=
2182 ((uint32_t*)(src+1*stride))[0]=
2183 ((uint32_t*)(src+2*stride))[0]=
2184 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2187 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
2188 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
2190 ((uint32_t*)(src+0*stride))[0]=
2191 ((uint32_t*)(src+1*stride))[0]=
2192 ((uint32_t*)(src+2*stride))[0]=
2193 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2196 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
2197 ((uint32_t*)(src+0*stride))[0]=
2198 ((uint32_t*)(src+1*stride))[0]=
2199 ((uint32_t*)(src+2*stride))[0]=
2200 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
2204 #define LOAD_TOP_RIGHT_EDGE\
2205 const int t4= topright[0];\
2206 const int t5= topright[1];\
2207 const int t6= topright[2];\
2208 const int t7= topright[3];\
2210 #define LOAD_LEFT_EDGE\
2211 const int l0= src[-1+0*stride];\
2212 const int l1= src[-1+1*stride];\
2213 const int l2= src[-1+2*stride];\
2214 const int l3= src[-1+3*stride];\
2216 #define LOAD_TOP_EDGE\
2217 const int t0= src[ 0-1*stride];\
2218 const int t1= src[ 1-1*stride];\
2219 const int t2= src[ 2-1*stride];\
2220 const int t3= src[ 3-1*stride];\
2222 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
2223 const int lt= src[-1-1*stride];
2227 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
2229 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
2232 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
2236 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2239 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
2241 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2242 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2245 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
2250 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
2252 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
2255 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
2259 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
2262 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
2264 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
2265 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
2268 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
2269 const int lt= src[-1-1*stride];
2272 const __attribute__((unused)) int unu= l3;
2275 src[1+2*stride]=(lt + t0 + 1)>>1;
2277 src[2+2*stride]=(t0 + t1 + 1)>>1;
2279 src[3+2*stride]=(t1 + t2 + 1)>>1;
2280 src[3+0*stride]=(t2 + t3 + 1)>>1;
2282 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2284 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2286 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2287 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2288 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2289 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2292 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2295 const __attribute__((unused)) int unu= t7;
2297 src[0+0*stride]=(t0 + t1 + 1)>>1;
2299 src[0+2*stride]=(t1 + t2 + 1)>>1;
2301 src[1+2*stride]=(t2 + t3 + 1)>>1;
2303 src[2+2*stride]=(t3 + t4+ 1)>>1;
2304 src[3+2*stride]=(t4 + t5+ 1)>>1;
2305 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2307 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2309 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2311 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2312 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2315 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2318 src[0+0*stride]=(l0 + l1 + 1)>>1;
2319 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2321 src[0+1*stride]=(l1 + l2 + 1)>>1;
2323 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2325 src[0+2*stride]=(l2 + l3 + 1)>>1;
2327 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2336 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2337 const int lt= src[-1-1*stride];
2340 const __attribute__((unused)) int unu= t3;
2343 src[2+1*stride]=(lt + l0 + 1)>>1;
2345 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2346 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2347 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2349 src[2+2*stride]=(l0 + l1 + 1)>>1;
2351 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2353 src[2+3*stride]=(l1 + l2+ 1)>>1;
2355 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2356 src[0+3*stride]=(l2 + l3 + 1)>>1;
2357 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2360 static void pred16x16_vertical_c(uint8_t *src, int stride){
2362 const uint32_t a= ((uint32_t*)(src-stride))[0];
2363 const uint32_t b= ((uint32_t*)(src-stride))[1];
2364 const uint32_t c= ((uint32_t*)(src-stride))[2];
2365 const uint32_t d= ((uint32_t*)(src-stride))[3];
2367 for(i=0; i<16; i++){
2368 ((uint32_t*)(src+i*stride))[0]= a;
2369 ((uint32_t*)(src+i*stride))[1]= b;
2370 ((uint32_t*)(src+i*stride))[2]= c;
2371 ((uint32_t*)(src+i*stride))[3]= d;
2375 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2378 for(i=0; i<16; i++){
2379 ((uint32_t*)(src+i*stride))[0]=
2380 ((uint32_t*)(src+i*stride))[1]=
2381 ((uint32_t*)(src+i*stride))[2]=
2382 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2386 static void pred16x16_dc_c(uint8_t *src, int stride){
2390 dc+= src[-1+i*stride];
2397 dc= 0x01010101*((dc + 16)>>5);
2399 for(i=0; i<16; i++){
2400 ((uint32_t*)(src+i*stride))[0]=
2401 ((uint32_t*)(src+i*stride))[1]=
2402 ((uint32_t*)(src+i*stride))[2]=
2403 ((uint32_t*)(src+i*stride))[3]= dc;
2407 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2411 dc+= src[-1+i*stride];
2414 dc= 0x01010101*((dc + 8)>>4);
2416 for(i=0; i<16; i++){
2417 ((uint32_t*)(src+i*stride))[0]=
2418 ((uint32_t*)(src+i*stride))[1]=
2419 ((uint32_t*)(src+i*stride))[2]=
2420 ((uint32_t*)(src+i*stride))[3]= dc;
2424 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2430 dc= 0x01010101*((dc + 8)>>4);
2432 for(i=0; i<16; i++){
2433 ((uint32_t*)(src+i*stride))[0]=
2434 ((uint32_t*)(src+i*stride))[1]=
2435 ((uint32_t*)(src+i*stride))[2]=
2436 ((uint32_t*)(src+i*stride))[3]= dc;
2440 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2443 for(i=0; i<16; i++){
2444 ((uint32_t*)(src+i*stride))[0]=
2445 ((uint32_t*)(src+i*stride))[1]=
2446 ((uint32_t*)(src+i*stride))[2]=
2447 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2451 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2454 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2455 const uint8_t * const src0 = src+7-stride;
2456 const uint8_t *src1 = src+8*stride-1;
2457 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2458 int H = src0[1] - src0[-1];
2459 int V = src1[0] - src2[ 0];
2460 for(k=2; k<=8; ++k) {
2461 src1 += stride; src2 -= stride;
2462 H += k*(src0[k] - src0[-k]);
2463 V += k*(src1[0] - src2[ 0]);
2466 H = ( 5*(H/4) ) / 16;
2467 V = ( 5*(V/4) ) / 16;
2469 /* required for 100% accuracy */
2470 i = H; H = V; V = i;
2472 H = ( 5*H+32 ) >> 6;
2473 V = ( 5*V+32 ) >> 6;
2476 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2477 for(j=16; j>0; --j) {
2480 for(i=-16; i<0; i+=4) {
2481 src[16+i] = cm[ (b ) >> 5 ];
2482 src[17+i] = cm[ (b+ H) >> 5 ];
2483 src[18+i] = cm[ (b+2*H) >> 5 ];
2484 src[19+i] = cm[ (b+3*H) >> 5 ];
2491 static void pred16x16_plane_c(uint8_t *src, int stride){
2492 pred16x16_plane_compat_c(src, stride, 0);
2495 static void pred8x8_vertical_c(uint8_t *src, int stride){
2497 const uint32_t a= ((uint32_t*)(src-stride))[0];
2498 const uint32_t b= ((uint32_t*)(src-stride))[1];
2501 ((uint32_t*)(src+i*stride))[0]= a;
2502 ((uint32_t*)(src+i*stride))[1]= b;
2506 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2510 ((uint32_t*)(src+i*stride))[0]=
2511 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2515 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2519 ((uint32_t*)(src+i*stride))[0]=
2520 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2524 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2530 dc0+= src[-1+i*stride];
2531 dc2+= src[-1+(i+4)*stride];
2533 dc0= 0x01010101*((dc0 + 2)>>2);
2534 dc2= 0x01010101*((dc2 + 2)>>2);
2537 ((uint32_t*)(src+i*stride))[0]=
2538 ((uint32_t*)(src+i*stride))[1]= dc0;
2541 ((uint32_t*)(src+i*stride))[0]=
2542 ((uint32_t*)(src+i*stride))[1]= dc2;
2546 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2552 dc0+= src[i-stride];
2553 dc1+= src[4+i-stride];
2555 dc0= 0x01010101*((dc0 + 2)>>2);
2556 dc1= 0x01010101*((dc1 + 2)>>2);
2559 ((uint32_t*)(src+i*stride))[0]= dc0;
2560 ((uint32_t*)(src+i*stride))[1]= dc1;
2563 ((uint32_t*)(src+i*stride))[0]= dc0;
2564 ((uint32_t*)(src+i*stride))[1]= dc1;
2569 static void pred8x8_dc_c(uint8_t *src, int stride){
2571 int dc0, dc1, dc2, dc3;
2575 dc0+= src[-1+i*stride] + src[i-stride];
2576 dc1+= src[4+i-stride];
2577 dc2+= src[-1+(i+4)*stride];
2579 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2580 dc0= 0x01010101*((dc0 + 4)>>3);
2581 dc1= 0x01010101*((dc1 + 2)>>2);
2582 dc2= 0x01010101*((dc2 + 2)>>2);
2585 ((uint32_t*)(src+i*stride))[0]= dc0;
2586 ((uint32_t*)(src+i*stride))[1]= dc1;
2589 ((uint32_t*)(src+i*stride))[0]= dc2;
2590 ((uint32_t*)(src+i*stride))[1]= dc3;
2594 static void pred8x8_plane_c(uint8_t *src, int stride){
2597 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2598 const uint8_t * const src0 = src+3-stride;
2599 const uint8_t *src1 = src+4*stride-1;
2600 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2601 int H = src0[1] - src0[-1];
2602 int V = src1[0] - src2[ 0];
2603 for(k=2; k<=4; ++k) {
2604 src1 += stride; src2 -= stride;
2605 H += k*(src0[k] - src0[-k]);
2606 V += k*(src1[0] - src2[ 0]);
2608 H = ( 17*H+16 ) >> 5;
2609 V = ( 17*V+16 ) >> 5;
2611 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2612 for(j=8; j>0; --j) {
2615 src[0] = cm[ (b ) >> 5 ];
2616 src[1] = cm[ (b+ H) >> 5 ];
2617 src[2] = cm[ (b+2*H) >> 5 ];
2618 src[3] = cm[ (b+3*H) >> 5 ];
2619 src[4] = cm[ (b+4*H) >> 5 ];
2620 src[5] = cm[ (b+5*H) >> 5 ];
2621 src[6] = cm[ (b+6*H) >> 5 ];
2622 src[7] = cm[ (b+7*H) >> 5 ];
2627 #define SRC(x,y) src[(x)+(y)*stride]
2629 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2630 #define PREDICT_8x8_LOAD_LEFT \
2631 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2632 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2633 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2634 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2637 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2638 #define PREDICT_8x8_LOAD_TOP \
2639 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2640 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2641 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2642 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2643 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2646 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2647 #define PREDICT_8x8_LOAD_TOPRIGHT \
2648 int t8, t9, t10, t11, t12, t13, t14, t15; \
2649 if(has_topright) { \
2650 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2651 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2652 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2654 #define PREDICT_8x8_LOAD_TOPLEFT \
2655 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2657 #define PREDICT_8x8_DC(v) \
2659 for( y = 0; y < 8; y++ ) { \
2660 ((uint32_t*)src)[0] = \
2661 ((uint32_t*)src)[1] = v; \
2665 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2667 PREDICT_8x8_DC(0x80808080);
2669 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2671 PREDICT_8x8_LOAD_LEFT;
2672 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2675 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2677 PREDICT_8x8_LOAD_TOP;
2678 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2681 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2683 PREDICT_8x8_LOAD_LEFT;
2684 PREDICT_8x8_LOAD_TOP;
2685 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2686 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2689 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2691 PREDICT_8x8_LOAD_LEFT;
2692 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2693 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2694 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2697 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2700 PREDICT_8x8_LOAD_TOP;
2709 for( y = 1; y < 8; y++ )
2710 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2712 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2714 PREDICT_8x8_LOAD_TOP;
2715 PREDICT_8x8_LOAD_TOPRIGHT;
2716 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2717 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2718 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2719 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2720 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2721 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2722 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2723 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2724 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2725 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2726 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2727 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2728 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2729 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2730 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2732 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2734 PREDICT_8x8_LOAD_TOP;
2735 PREDICT_8x8_LOAD_LEFT;
2736 PREDICT_8x8_LOAD_TOPLEFT;
2737 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2738 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2739 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2740 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2741 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2742 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2743 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2744 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2745 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2746 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2747 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2748 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2749 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2750 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2751 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2754 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2756 PREDICT_8x8_LOAD_TOP;
2757 PREDICT_8x8_LOAD_LEFT;
2758 PREDICT_8x8_LOAD_TOPLEFT;
2759 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2760 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2761 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2762 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2763 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2764 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2765 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2766 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2767 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2768 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2769 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2770 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2771 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2772 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2773 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2774 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2775 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2776 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2777 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2778 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2779 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2780 SRC(7,0)= (t6 + t7 + 1) >> 1;
2782 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2784 PREDICT_8x8_LOAD_TOP;
2785 PREDICT_8x8_LOAD_LEFT;
2786 PREDICT_8x8_LOAD_TOPLEFT;
2787 SRC(0,7)= (l6 + l7 + 1) >> 1;
2788 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2789 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2790 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2791 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2792 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2793 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2794 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2795 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2796 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2797 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2798 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2799 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2800 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2801 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2802 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2803 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2804 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2805 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2806 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2807 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2808 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2810 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2812 PREDICT_8x8_LOAD_TOP;
2813 PREDICT_8x8_LOAD_TOPRIGHT;
2814 SRC(0,0)= (t0 + t1 + 1) >> 1;
2815 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2816 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2817 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2818 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2819 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2820 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2821 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2822 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2823 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2824 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2825 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2826 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2827 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2828 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2829 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2830 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2831 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2832 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2833 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2834 SRC(7,6)= (t10 + t11 + 1) >> 1;
2835 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2837 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2839 PREDICT_8x8_LOAD_LEFT;
2840 SRC(0,0)= (l0 + l1 + 1) >> 1;
2841 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2842 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2843 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2844 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2845 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2846 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2847 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2848 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2849 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2850 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2851 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2852 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2853 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2854 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2855 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2856 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2857 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2859 #undef PREDICT_8x8_LOAD_LEFT
2860 #undef PREDICT_8x8_LOAD_TOP
2861 #undef PREDICT_8x8_LOAD_TOPLEFT
2862 #undef PREDICT_8x8_LOAD_TOPRIGHT
2863 #undef PREDICT_8x8_DC
2869 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2870 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2871 int src_x_offset, int src_y_offset,
2872 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2873 MpegEncContext * const s = &h->s;
2874 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2875 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2876 const int luma_xy= (mx&3) + ((my&3)<<2);
2877 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2878 uint8_t * src_cb, * src_cr;
2879 int extra_width= h->emu_edge_width;
2880 int extra_height= h->emu_edge_height;
2882 const int full_mx= mx>>2;
2883 const int full_my= my>>2;
2884 const int pic_width = 16*s->mb_width;
2885 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2890 if(mx&7) extra_width -= 3;
2891 if(my&7) extra_height -= 3;
2893 if( full_mx < 0-extra_width
2894 || full_my < 0-extra_height
2895 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2896 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2897 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2898 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2902 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2904 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2907 if(s->flags&CODEC_FLAG_GRAY) return;
2910 // chroma offset when predicting from a field of opposite parity
2911 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2912 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2914 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2915 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2918 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2919 src_cb= s->edge_emu_buffer;
2921 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2924 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2925 src_cr= s->edge_emu_buffer;
2927 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2930 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2931 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2932 int x_offset, int y_offset,
2933 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2934 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2935 int list0, int list1){
2936 MpegEncContext * const s = &h->s;
2937 qpel_mc_func *qpix_op= qpix_put;
2938 h264_chroma_mc_func chroma_op= chroma_put;
2940 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2941 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2942 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2943 x_offset += 8*s->mb_x;
2944 y_offset += 8*(s->mb_y >> MB_MBAFF);
2947 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2948 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2949 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2950 qpix_op, chroma_op);
2953 chroma_op= chroma_avg;
2957 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2958 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2959 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2960 qpix_op, chroma_op);
2964 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2965 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2966 int x_offset, int y_offset,
2967 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2968 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2969 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2970 int list0, int list1){
2971 MpegEncContext * const s = &h->s;
2973 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2974 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2975 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2976 x_offset += 8*s->mb_x;
2977 y_offset += 8*(s->mb_y >> MB_MBAFF);
2980 /* don't optimize for luma-only case, since B-frames usually
2981 * use implicit weights => chroma too. */
2982 uint8_t *tmp_cb = s->obmc_scratchpad;
2983 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2984 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2985 int refn0 = h->ref_cache[0][ scan8[n] ];
2986 int refn1 = h->ref_cache[1][ scan8[n] ];
2988 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2989 dest_y, dest_cb, dest_cr,
2990 x_offset, y_offset, qpix_put, chroma_put);
2991 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2992 tmp_y, tmp_cb, tmp_cr,
2993 x_offset, y_offset, qpix_put, chroma_put);
2995 if(h->use_weight == 2){
2996 int weight0 = h->implicit_weight[refn0][refn1];
2997 int weight1 = 64 - weight0;
2998 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2999 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
3000 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
3002 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
3003 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
3004 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
3005 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3006 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
3007 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
3008 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3009 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
3010 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
3013 int list = list1 ? 1 : 0;
3014 int refn = h->ref_cache[list][ scan8[n] ];
3015 Picture *ref= &h->ref_list[list][refn];
3016 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
3017 dest_y, dest_cb, dest_cr, x_offset, y_offset,
3018 qpix_put, chroma_put);
3020 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
3021 h->luma_weight[list][refn], h->luma_offset[list][refn]);
3022 if(h->use_weight_chroma){
3023 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3024 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
3025 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3026 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
3031 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
3032 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3033 int x_offset, int y_offset,
3034 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
3035 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
3036 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
3037 int list0, int list1){
3038 if((h->use_weight==2 && list0 && list1
3039 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
3040 || h->use_weight==1)
3041 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3042 x_offset, y_offset, qpix_put, chroma_put,
3043 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
3045 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3046 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
3049 static inline void prefetch_motion(H264Context *h, int list){
3050 /* fetch pixels for estimated mv 4 macroblocks ahead
3051 * optimized for 64byte cache lines */
3052 MpegEncContext * const s = &h->s;
3053 const int refn = h->ref_cache[list][scan8[0]];
3055 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
3056 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
3057 uint8_t **src= h->ref_list[list][refn].data;
3058 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
3059 s->dsp.prefetch(src[0]+off, s->linesize, 4);
3060 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3061 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
3065 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3066 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
3067 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
3068 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
3069 MpegEncContext * const s = &h->s;
3070 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
3071 const int mb_type= s->current_picture.mb_type[mb_xy];
3073 assert(IS_INTER(mb_type));
3075 prefetch_motion(h, 0);
3077 if(IS_16X16(mb_type)){
3078 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
3079 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
3080 &weight_op[0], &weight_avg[0],
3081 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3082 }else if(IS_16X8(mb_type)){
3083 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
3084 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3085 &weight_op[1], &weight_avg[1],
3086 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3087 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
3088 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3089 &weight_op[1], &weight_avg[1],
3090 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3091 }else if(IS_8X16(mb_type)){
3092 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
3093 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3094 &weight_op[2], &weight_avg[2],
3095 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3096 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
3097 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3098 &weight_op[2], &weight_avg[2],
3099 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3103 assert(IS_8X8(mb_type));
3106 const int sub_mb_type= h->sub_mb_type[i];
3108 int x_offset= (i&1)<<2;
3109 int y_offset= (i&2)<<1;
3111 if(IS_SUB_8X8(sub_mb_type)){
3112 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3113 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3114 &weight_op[3], &weight_avg[3],
3115 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3116 }else if(IS_SUB_8X4(sub_mb_type)){
3117 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3118 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3119 &weight_op[4], &weight_avg[4],
3120 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3121 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
3122 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3123 &weight_op[4], &weight_avg[4],
3124 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3125 }else if(IS_SUB_4X8(sub_mb_type)){
3126 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3127 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3128 &weight_op[5], &weight_avg[5],
3129 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3130 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
3131 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3132 &weight_op[5], &weight_avg[5],
3133 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3136 assert(IS_SUB_4X4(sub_mb_type));
3138 int sub_x_offset= x_offset + 2*(j&1);
3139 int sub_y_offset= y_offset + (j&2);
3140 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
3141 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3142 &weight_op[6], &weight_avg[6],
3143 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3149 prefetch_motion(h, 1);
3152 static void decode_init_vlc(H264Context *h){
3153 static int done = 0;
3159 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
3160 &chroma_dc_coeff_token_len [0], 1, 1,
3161 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
3164 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
3165 &coeff_token_len [i][0], 1, 1,
3166 &coeff_token_bits[i][0], 1, 1, 1);
3170 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
3171 &chroma_dc_total_zeros_len [i][0], 1, 1,
3172 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
3174 for(i=0; i<15; i++){
3175 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
3176 &total_zeros_len [i][0], 1, 1,
3177 &total_zeros_bits[i][0], 1, 1, 1);
3181 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
3182 &run_len [i][0], 1, 1,
3183 &run_bits[i][0], 1, 1, 1);
3185 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
3186 &run_len [6][0], 1, 1,
3187 &run_bits[6][0], 1, 1, 1);
3192 * Sets the intra prediction function pointers.
3194 static void init_pred_ptrs(H264Context *h){
3195 // MpegEncContext * const s = &h->s;
3197 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
3198 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
3199 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
3200 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
3201 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
3202 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
3203 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
3204 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
3205 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
3206 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
3207 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
3208 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
3210 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
3211 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
3212 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
3213 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
3214 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
3215 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
3216 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
3217 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
3218 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
3219 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
3220 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
3221 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
3223 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
3224 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
3225 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
3226 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
3227 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
3228 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
3229 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
3231 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
3232 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
3233 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
3234 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
3235 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
3236 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
3237 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
3240 static void free_tables(H264Context *h){
3241 av_freep(&h->intra4x4_pred_mode);
3242 av_freep(&h->chroma_pred_mode_table);
3243 av_freep(&h->cbp_table);
3244 av_freep(&h->mvd_table[0]);
3245 av_freep(&h->mvd_table[1]);
3246 av_freep(&h->direct_table);
3247 av_freep(&h->non_zero_count);
3248 av_freep(&h->slice_table_base);
3249 av_freep(&h->top_borders[1]);
3250 av_freep(&h->top_borders[0]);
3251 h->slice_table= NULL;
3253 av_freep(&h->mb2b_xy);
3254 av_freep(&h->mb2b8_xy);
3256 av_freep(&h->s.obmc_scratchpad);
3259 static void init_dequant8_coeff_table(H264Context *h){
3261 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
3262 h->dequant8_coeff[0] = h->dequant8_buffer[0];
3263 h->dequant8_coeff[1] = h->dequant8_buffer[1];
3265 for(i=0; i<2; i++ ){
3266 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
3267 h->dequant8_coeff[1] = h->dequant8_buffer[0];
3271 for(q=0; q<52; q++){
3272 int shift = div6[q];
3275 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
3276 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
3277 h->pps.scaling_matrix8[i][x]) << shift;
3282 static void init_dequant4_coeff_table(H264Context *h){
3284 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
3285 for(i=0; i<6; i++ ){
3286 h->dequant4_coeff[i] = h->dequant4_buffer[i];
3288 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
3289 h->dequant4_coeff[i] = h->dequant4_buffer[j];
3296 for(q=0; q<52; q++){
3297 int shift = div6[q] + 2;
3300 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
3301 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
3302 h->pps.scaling_matrix4[i][x]) << shift;
3307 static void init_dequant_tables(H264Context *h){
3309 init_dequant4_coeff_table(h);
3310 if(h->pps.transform_8x8_mode)
3311 init_dequant8_coeff_table(h);
3312 if(h->sps.transform_bypass){
3315 h->dequant4_coeff[i][0][x] = 1<<6;
3316 if(h->pps.transform_8x8_mode)
3319 h->dequant8_coeff[i][0][x] = 1<<6;
3326 * needs width/height
3328 static int alloc_tables(H264Context *h){
3329 MpegEncContext * const s = &h->s;
3330 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3333 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3335 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3336 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
3337 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3338 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3339 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3341 if( h->pps.cabac ) {
3342 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3343 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3344 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3345 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3348 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
3349 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
3351 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3352 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3353 for(y=0; y<s->mb_height; y++){
3354 for(x=0; x<s->mb_width; x++){
3355 const int mb_xy= x + y*s->mb_stride;
3356 const int b_xy = 4*x + 4*y*h->b_stride;
3357 const int b8_xy= 2*x + 2*y*h->b8_stride;
3359 h->mb2b_xy [mb_xy]= b_xy;
3360 h->mb2b8_xy[mb_xy]= b8_xy;
3364 s->obmc_scratchpad = NULL;
3366 if(!h->dequant4_coeff[0])
3367 init_dequant_tables(h);
3375 static void common_init(H264Context *h){
3376 MpegEncContext * const s = &h->s;
3378 s->width = s->avctx->width;
3379 s->height = s->avctx->height;
3380 s->codec_id= s->avctx->codec->id;
3384 h->dequant_coeff_pps= -1;
3385 s->unrestricted_mv=1;
3386 s->decode=1; //FIXME
3388 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3389 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3392 static int decode_init(AVCodecContext *avctx){
3393 H264Context *h= avctx->priv_data;
3394 MpegEncContext * const s = &h->s;
3396 MPV_decode_defaults(s);
3401 s->out_format = FMT_H264;
3402 s->workaround_bugs= avctx->workaround_bugs;
3405 // s->decode_mb= ff_h263_decode_mb;
3407 avctx->pix_fmt= PIX_FMT_YUV420P;
3411 if(avctx->extradata_size > 0 && avctx->extradata &&
3412 *(char *)avctx->extradata == 1){
3422 static int frame_start(H264Context *h){
3423 MpegEncContext * const s = &h->s;
3426 if(MPV_frame_start(s, s->avctx) < 0)
3428 ff_er_frame_start(s);
3430 assert(s->linesize && s->uvlinesize);
3432 for(i=0; i<16; i++){
3433 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3434 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3437 h->block_offset[16+i]=
3438 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3439 h->block_offset[24+16+i]=
3440 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3443 /* can't be in alloc_tables because linesize isn't known there.
3444 * FIXME: redo bipred weight to not require extra buffer? */
3445 if(!s->obmc_scratchpad)
3446 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
3448 /* some macroblocks will be accessed before they're available */
3450 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
3452 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3456 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3457 MpegEncContext * const s = &h->s;
3461 src_cb -= uvlinesize;
3462 src_cr -= uvlinesize;
3464 // There are two lines saved, the line above the the top macroblock of a pair,
3465 // and the line above the bottom macroblock
3466 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3467 for(i=1; i<17; i++){
3468 h->left_border[i]= src_y[15+i* linesize];
3471 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3472 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3474 if(!(s->flags&CODEC_FLAG_GRAY)){
3475 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3476 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3478 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3479 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3481 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3482 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3486 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3487 MpegEncContext * const s = &h->s;
3490 int deblock_left = (s->mb_x > 0);
3491 int deblock_top = (s->mb_y > 0);
3493 src_y -= linesize + 1;
3494 src_cb -= uvlinesize + 1;
3495 src_cr -= uvlinesize + 1;
3497 #define XCHG(a,b,t,xchg)\
3504 for(i = !deblock_top; i<17; i++){
3505 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3510 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3511 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3512 if(s->mb_x+1 < s->mb_width){
3513 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3517 if(!(s->flags&CODEC_FLAG_GRAY)){
3519 for(i = !deblock_top; i<9; i++){
3520 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3521 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3525 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3526 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3531 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3532 MpegEncContext * const s = &h->s;
3535 src_y -= 2 * linesize;
3536 src_cb -= 2 * uvlinesize;
3537 src_cr -= 2 * uvlinesize;
3539 // There are two lines saved, the line above the the top macroblock of a pair,
3540 // and the line above the bottom macroblock
3541 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3542 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3543 for(i=2; i<34; i++){
3544 h->left_border[i]= src_y[15+i* linesize];
3547 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3548 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3549 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3550 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3552 if(!(s->flags&CODEC_FLAG_GRAY)){
3553 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3554 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3555 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3556 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3557 for(i=2; i<18; i++){
3558 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3559 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3561 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3562 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3563 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3564 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3568 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3569 MpegEncContext * const s = &h->s;
3572 int deblock_left = (s->mb_x > 0);
3573 int deblock_top = (s->mb_y > 1);
3575 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3577 src_y -= 2 * linesize + 1;
3578 src_cb -= 2 * uvlinesize + 1;
3579 src_cr -= 2 * uvlinesize + 1;
3581 #define XCHG(a,b,t,xchg)\
3588 for(i = (!deblock_top)<<1; i<34; i++){
3589 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3594 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3595 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3596 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3597 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3598 if(s->mb_x+1 < s->mb_width){
3599 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3600 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3604 if(!(s->flags&CODEC_FLAG_GRAY)){
3606 for(i = (!deblock_top) << 1; i<18; i++){
3607 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3608 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3612 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3613 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3614 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3615 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3620 static void hl_decode_mb(H264Context *h){
3621 MpegEncContext * const s = &h->s;
3622 const int mb_x= s->mb_x;
3623 const int mb_y= s->mb_y;
3624 const int mb_xy= mb_x + mb_y*s->mb_stride;
3625 const int mb_type= s->current_picture.mb_type[mb_xy];
3626 uint8_t *dest_y, *dest_cb, *dest_cr;
3627 int linesize, uvlinesize /*dct_offset*/;
3629 int *block_offset = &h->block_offset[0];
3630 const unsigned int bottom = mb_y & 1;
3631 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3632 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3633 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3638 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3639 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3640 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3643 linesize = h->mb_linesize = s->linesize * 2;
3644 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3645 block_offset = &h->block_offset[24];
3646 if(mb_y&1){ //FIXME move out of this func?
3647 dest_y -= s->linesize*15;
3648 dest_cb-= s->uvlinesize*7;
3649 dest_cr-= s->uvlinesize*7;
3653 for(list=0; list<2; list++){
3654 if(!USES_LIST(mb_type, list))
3656 if(IS_16X16(mb_type)){
3657 int8_t *ref = &h->ref_cache[list][scan8[0]];
3658 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3660 for(i=0; i<16; i+=4){
3661 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3662 int ref = h->ref_cache[list][scan8[i]];
3664 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3670 linesize = h->mb_linesize = s->linesize;
3671 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3672 // dct_offset = s->linesize * 16;
3675 if(transform_bypass){
3677 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3678 }else if(IS_8x8DCT(mb_type)){
3679 idct_dc_add = s->dsp.h264_idct8_dc_add;
3680 idct_add = s->dsp.h264_idct8_add;
3682 idct_dc_add = s->dsp.h264_idct_dc_add;
3683 idct_add = s->dsp.h264_idct_add;
3686 if(FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3687 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3688 int mbt_y = mb_y&~1;
3689 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3690 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3691 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3692 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3695 if (IS_INTRA_PCM(mb_type)) {
3698 // The pixels are stored in h->mb array in the same order as levels,
3699 // copy them in output in the correct order.
3700 for(i=0; i<16; i++) {
3701 for (y=0; y<4; y++) {
3702 for (x=0; x<4; x++) {
3703 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3707 for(i=16; i<16+4; i++) {
3708 for (y=0; y<4; y++) {
3709 for (x=0; x<4; x++) {
3710 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3714 for(i=20; i<20+4; i++) {
3715 for (y=0; y<4; y++) {
3716 for (x=0; x<4; x++) {
3717 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3722 if(IS_INTRA(mb_type)){
3723 if(h->deblocking_filter && !FRAME_MBAFF)
3724 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3726 if(!(s->flags&CODEC_FLAG_GRAY)){
3727 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3728 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3731 if(IS_INTRA4x4(mb_type)){
3733 if(IS_8x8DCT(mb_type)){
3734 for(i=0; i<16; i+=4){
3735 uint8_t * const ptr= dest_y + block_offset[i];
3736 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3737 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3738 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3739 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3741 if(nnz == 1 && h->mb[i*16])
3742 idct_dc_add(ptr, h->mb + i*16, linesize);
3744 idct_add(ptr, h->mb + i*16, linesize);
3748 for(i=0; i<16; i++){
3749 uint8_t * const ptr= dest_y + block_offset[i];
3751 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3754 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3755 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3756 assert(mb_y || linesize <= block_offset[i]);
3757 if(!topright_avail){
3758 tr= ptr[3 - linesize]*0x01010101;
3759 topright= (uint8_t*) &tr;
3761 topright= ptr + 4 - linesize;
3765 h->pred4x4[ dir ](ptr, topright, linesize);
3766 nnz = h->non_zero_count_cache[ scan8[i] ];
3768 if(s->codec_id == CODEC_ID_H264){
3769 if(nnz == 1 && h->mb[i*16])
3770 idct_dc_add(ptr, h->mb + i*16, linesize);
3772 idct_add(ptr, h->mb + i*16, linesize);
3774 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3779 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3780 if(s->codec_id == CODEC_ID_H264){
3781 if(!transform_bypass)
3782 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3784 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3786 if(h->deblocking_filter && !FRAME_MBAFF)
3787 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3788 }else if(s->codec_id == CODEC_ID_H264){
3789 hl_motion(h, dest_y, dest_cb, dest_cr,
3790 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
3791 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
3792 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3796 if(!IS_INTRA4x4(mb_type)){
3797 if(s->codec_id == CODEC_ID_H264){
3798 if(IS_INTRA16x16(mb_type)){
3799 for(i=0; i<16; i++){
3800 if(h->non_zero_count_cache[ scan8[i] ])
3801 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3802 else if(h->mb[i*16])
3803 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3806 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3807 for(i=0; i<16; i+=di){
3808 int nnz = h->non_zero_count_cache[ scan8[i] ];
3810 if(nnz==1 && h->mb[i*16])
3811 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3813 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3818 for(i=0; i<16; i++){
3819 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3820 uint8_t * const ptr= dest_y + block_offset[i];
3821 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3827 if(!(s->flags&CODEC_FLAG_GRAY)){
3828 uint8_t *dest[2] = {dest_cb, dest_cr};
3829 if(transform_bypass){
3830 idct_add = idct_dc_add = s->dsp.add_pixels4;
3832 idct_add = s->dsp.h264_idct_add;
3833 idct_dc_add = s->dsp.h264_idct_dc_add;
3834 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3835 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3837 if(s->codec_id == CODEC_ID_H264){
3838 for(i=16; i<16+8; i++){
3839 if(h->non_zero_count_cache[ scan8[i] ])
3840 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3841 else if(h->mb[i*16])
3842 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3845 for(i=16; i<16+8; i++){
3846 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3847 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3848 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3854 if(h->deblocking_filter) {
3856 //FIXME try deblocking one mb at a time?
3857 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3858 const int mb_y = s->mb_y - 1;
3859 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3860 const int mb_xy= mb_x + mb_y*s->mb_stride;
3861 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3862 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3863 if (!bottom) return;
3864 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3865 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3866 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3868 if(IS_INTRA(mb_type_top | mb_type_bottom))
3869 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3871 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3875 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3876 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3877 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
3878 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3881 tprintf("call mbaff filter_mb\n");
3882 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3883 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3884 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3886 tprintf("call filter_mb\n");
3887 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3888 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3889 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3895 * fills the default_ref_list.
3897 static int fill_default_ref_list(H264Context *h){
3898 MpegEncContext * const s = &h->s;
3900 int smallest_poc_greater_than_current = -1;
3901 Picture sorted_short_ref[32];
3903 if(h->slice_type==B_TYPE){
3907 /* sort frame according to poc in B slice */
3908 for(out_i=0; out_i<h->short_ref_count; out_i++){
3910 int best_poc=INT_MAX;
3912 for(i=0; i<h->short_ref_count; i++){
3913 const int poc= h->short_ref[i]->poc;
3914 if(poc > limit && poc < best_poc){
3920 assert(best_i != INT_MIN);
3923 sorted_short_ref[out_i]= *h->short_ref[best_i];
3924 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3925 if (-1 == smallest_poc_greater_than_current) {
3926 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3927 smallest_poc_greater_than_current = out_i;
3933 if(s->picture_structure == PICT_FRAME){
3934 if(h->slice_type==B_TYPE){
3936 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3938 // find the largest poc
3939 for(list=0; list<2; list++){
3942 int step= list ? -1 : 1;
3944 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3945 while(j<0 || j>= h->short_ref_count){
3946 if(j != -99 && step == (list ? -1 : 1))
3949 j= smallest_poc_greater_than_current + (step>>1);
3951 if(sorted_short_ref[j].reference != 3) continue;
3952 h->default_ref_list[list][index ]= sorted_short_ref[j];
3953 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3956 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3957 if(h->long_ref[i] == NULL) continue;
3958 if(h->long_ref[i]->reference != 3) continue;
3960 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3961 h->default_ref_list[ list ][index++].pic_id= i;;
3964 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3965 // swap the two first elements of L1 when
3966 // L0 and L1 are identical
3967 Picture temp= h->default_ref_list[1][0];
3968 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3969 h->default_ref_list[1][1] = temp;
3972 if(index < h->ref_count[ list ])
3973 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3977 for(i=0; i<h->short_ref_count; i++){
3978 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3979 h->default_ref_list[0][index ]= *h->short_ref[i];
3980 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3982 for(i = 0; i < 16; i++){
3983 if(h->long_ref[i] == NULL) continue;
3984 if(h->long_ref[i]->reference != 3) continue;
3985 h->default_ref_list[0][index ]= *h->long_ref[i];
3986 h->default_ref_list[0][index++].pic_id= i;;
3988 if(index < h->ref_count[0])
3989 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3992 if(h->slice_type==B_TYPE){
3994 //FIXME second field balh
3998 for (i=0; i<h->ref_count[0]; i++) {
3999 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
4001 if(h->slice_type==B_TYPE){
4002 for (i=0; i<h->ref_count[1]; i++) {
4003 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
4010 static void print_short_term(H264Context *h);
4011 static void print_long_term(H264Context *h);
4013 static int decode_ref_pic_list_reordering(H264Context *h){
4014 MpegEncContext * const s = &h->s;
4017 print_short_term(h);
4019 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
4021 for(list=0; list<2; list++){
4022 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
4024 if(get_bits1(&s->gb)){
4025 int pred= h->curr_pic_num;
4027 for(index=0; ; index++){
4028 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
4031 Picture *ref = NULL;
4033 if(reordering_of_pic_nums_idc==3)
4036 if(index >= h->ref_count[list]){
4037 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
4041 if(reordering_of_pic_nums_idc<3){
4042 if(reordering_of_pic_nums_idc<2){
4043 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
4045 if(abs_diff_pic_num >= h->max_pic_num){
4046 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
4050 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
4051 else pred+= abs_diff_pic_num;
4052 pred &= h->max_pic_num - 1;
4054 for(i= h->short_ref_count-1; i>=0; i--){
4055 ref = h->short_ref[i];
4056 assert(ref->reference == 3);
4057 assert(!ref->long_ref);
4058 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
4062 ref->pic_id= ref->frame_num;
4064 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
4065 ref = h->long_ref[pic_id];
4066 ref->pic_id= pic_id;
4067 assert(ref->reference == 3);
4068 assert(ref->long_ref);
4073 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
4074 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
4076 for(i=index; i+1<h->ref_count[list]; i++){
4077 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
4080 for(; i > index; i--){
4081 h->ref_list[list][i]= h->ref_list[list][i-1];
4083 h->ref_list[list][index]= *ref;
4086 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
4092 if(h->slice_type!=B_TYPE) break;
4094 for(list=0; list<2; list++){
4095 for(index= 0; index < h->ref_count[list]; index++){
4096 if(!h->ref_list[list][index].data[0])
4097 h->ref_list[list][index]= s->current_picture;
4099 if(h->slice_type!=B_TYPE) break;
4102 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
4103 direct_dist_scale_factor(h);
4104 direct_ref_list_init(h);
4108 static void fill_mbaff_ref_list(H264Context *h){
4110 for(list=0; list<2; list++){
4111 for(i=0; i<h->ref_count[list]; i++){
4112 Picture *frame = &h->ref_list[list][i];
4113 Picture *field = &h->ref_list[list][16+2*i];
4116 field[0].linesize[j] <<= 1;
4117 field[1] = field[0];
4119 field[1].data[j] += frame->linesize[j];
4121 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
4122 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
4124 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
4125 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
4129 for(j=0; j<h->ref_count[1]; j++){
4130 for(i=0; i<h->ref_count[0]; i++)
4131 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
4132 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
4133 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
4137 static int pred_weight_table(H264Context *h){
4138 MpegEncContext * const s = &h->s;
4140 int luma_def, chroma_def;
4143 h->use_weight_chroma= 0;
4144 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
4145 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
4146 luma_def = 1<<h->luma_log2_weight_denom;
4147 chroma_def = 1<<h->chroma_log2_weight_denom;
4149 for(list=0; list<2; list++){
4150 for(i=0; i<h->ref_count[list]; i++){
4151 int luma_weight_flag, chroma_weight_flag;
4153 luma_weight_flag= get_bits1(&s->gb);
4154 if(luma_weight_flag){
4155 h->luma_weight[list][i]= get_se_golomb(&s->gb);
4156 h->luma_offset[list][i]= get_se_golomb(&s->gb);
4157 if( h->luma_weight[list][i] != luma_def
4158 || h->luma_offset[list][i] != 0)
4161 h->luma_weight[list][i]= luma_def;
4162 h->luma_offset[list][i]= 0;
4165 chroma_weight_flag= get_bits1(&s->gb);
4166 if(chroma_weight_flag){
4169 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
4170 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
4171 if( h->chroma_weight[list][i][j] != chroma_def
4172 || h->chroma_offset[list][i][j] != 0)
4173 h->use_weight_chroma= 1;
4178 h->chroma_weight[list][i][j]= chroma_def;
4179 h->chroma_offset[list][i][j]= 0;
4183 if(h->slice_type != B_TYPE) break;
4185 h->use_weight= h->use_weight || h->use_weight_chroma;
4189 static void implicit_weight_table(H264Context *h){
4190 MpegEncContext * const s = &h->s;
4192 int cur_poc = s->current_picture_ptr->poc;
4194 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
4195 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
4197 h->use_weight_chroma= 0;
4202 h->use_weight_chroma= 2;
4203 h->luma_log2_weight_denom= 5;
4204 h->chroma_log2_weight_denom= 5;
4206 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
4207 int poc0 = h->ref_list[0][ref0].poc;
4208 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
4209 int poc1 = h->ref_list[1][ref1].poc;
4210 int td = clip(poc1 - poc0, -128, 127);
4212 int tb = clip(cur_poc - poc0, -128, 127);
4213 int tx = (16384 + (ABS(td) >> 1)) / td;
4214 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
4215 if(dist_scale_factor < -64 || dist_scale_factor > 128)
4216 h->implicit_weight[ref0][ref1] = 32;
4218 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
4220 h->implicit_weight[ref0][ref1] = 32;
4225 static inline void unreference_pic(H264Context *h, Picture *pic){
4228 if(pic == h->delayed_output_pic)
4231 for(i = 0; h->delayed_pic[i]; i++)
4232 if(pic == h->delayed_pic[i]){
4240 * instantaneous decoder refresh.
4242 static void idr(H264Context *h){
4245 for(i=0; i<16; i++){
4246 if (h->long_ref[i] != NULL) {
4247 unreference_pic(h, h->long_ref[i]);
4248 h->long_ref[i]= NULL;
4251 h->long_ref_count=0;
4253 for(i=0; i<h->short_ref_count; i++){
4254 unreference_pic(h, h->short_ref[i]);
4255 h->short_ref[i]= NULL;
4257 h->short_ref_count=0;
4260 /* forget old pics after a seek */
4261 static void flush_dpb(AVCodecContext *avctx){
4262 H264Context *h= avctx->priv_data;
4264 for(i=0; i<16; i++) {
4265 if(h->delayed_pic[i])
4266 h->delayed_pic[i]->reference= 0;
4267 h->delayed_pic[i]= NULL;
4269 if(h->delayed_output_pic)
4270 h->delayed_output_pic->reference= 0;
4271 h->delayed_output_pic= NULL;
4273 if(h->s.current_picture_ptr)
4274 h->s.current_picture_ptr->reference= 0;
4279 * @return the removed picture or NULL if an error occurs
4281 static Picture * remove_short(H264Context *h, int frame_num){
4282 MpegEncContext * const s = &h->s;
4285 if(s->avctx->debug&FF_DEBUG_MMCO)
4286 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
4288 for(i=0; i<h->short_ref_count; i++){
4289 Picture *pic= h->short_ref[i];
4290 if(s->avctx->debug&FF_DEBUG_MMCO)
4291 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
4292 if(pic->frame_num == frame_num){
4293 h->short_ref[i]= NULL;
4294 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
4295 h->short_ref_count--;
4304 * @return the removed picture or NULL if an error occurs
4306 static Picture * remove_long(H264Context *h, int i){
4309 pic= h->long_ref[i];
4310 h->long_ref[i]= NULL;
4311 if(pic) h->long_ref_count--;
4317 * print short term list
4319 static void print_short_term(H264Context *h) {
4321 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4322 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
4323 for(i=0; i<h->short_ref_count; i++){
4324 Picture *pic= h->short_ref[i];
4325 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4331 * print long term list
4333 static void print_long_term(H264Context *h) {
4335 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4336 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
4337 for(i = 0; i < 16; i++){
4338 Picture *pic= h->long_ref[i];
4340 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4347 * Executes the reference picture marking (memory management control operations).
4349 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
4350 MpegEncContext * const s = &h->s;
4352 int current_is_long=0;
4355 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
4356 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4358 for(i=0; i<mmco_count; i++){
4359 if(s->avctx->debug&FF_DEBUG_MMCO)
4360 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4362 switch(mmco[i].opcode){
4363 case MMCO_SHORT2UNUSED:
4364 pic= remove_short(h, mmco[i].short_frame_num);
4366 unreference_pic(h, pic);
4367 else if(s->avctx->debug&FF_DEBUG_MMCO)
4368 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4370 case MMCO_SHORT2LONG:
4371 pic= remove_long(h, mmco[i].long_index);
4372 if(pic) unreference_pic(h, pic);
4374 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4375 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4376 h->long_ref_count++;
4378 case MMCO_LONG2UNUSED:
4379 pic= remove_long(h, mmco[i].long_index);
4381 unreference_pic(h, pic);
4382 else if(s->avctx->debug&FF_DEBUG_MMCO)
4383 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4386 pic= remove_long(h, mmco[i].long_index);
4387 if(pic) unreference_pic(h, pic);
4389 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4390 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4391 h->long_ref_count++;
4395 case MMCO_SET_MAX_LONG:
4396 assert(mmco[i].long_index <= 16);
4397 // just remove the long term which index is greater than new max
4398 for(j = mmco[i].long_index; j<16; j++){
4399 pic = remove_long(h, j);
4400 if (pic) unreference_pic(h, pic);
4404 while(h->short_ref_count){
4405 pic= remove_short(h, h->short_ref[0]->frame_num);
4406 unreference_pic(h, pic);
4408 for(j = 0; j < 16; j++) {
4409 pic= remove_long(h, j);
4410 if(pic) unreference_pic(h, pic);
4417 if(!current_is_long){
4418 pic= remove_short(h, s->current_picture_ptr->frame_num);
4420 unreference_pic(h, pic);
4421 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4424 if(h->short_ref_count)
4425 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4427 h->short_ref[0]= s->current_picture_ptr;
4428 h->short_ref[0]->long_ref=0;
4429 h->short_ref_count++;
4432 print_short_term(h);
4437 static int decode_ref_pic_marking(H264Context *h){
4438 MpegEncContext * const s = &h->s;
4441 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4442 s->broken_link= get_bits1(&s->gb) -1;
4443 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4444 if(h->mmco[0].long_index == -1)
4447 h->mmco[0].opcode= MMCO_LONG;
4451 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4452 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4453 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4455 h->mmco[i].opcode= opcode;
4456 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4457 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4458 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4459 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4463 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4464 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4465 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4466 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4471 if(opcode > MMCO_LONG){
4472 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4475 if(opcode == MMCO_END)
4480 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4482 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4483 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4484 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4494 static int init_poc(H264Context *h){
4495 MpegEncContext * const s = &h->s;
4496 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4499 if(h->nal_unit_type == NAL_IDR_SLICE){
4500 h->frame_num_offset= 0;
4502 if(h->frame_num < h->prev_frame_num)
4503 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4505 h->frame_num_offset= h->prev_frame_num_offset;
4508 if(h->sps.poc_type==0){
4509 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4511 if(h->nal_unit_type == NAL_IDR_SLICE){
4516 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4517 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4518 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4519 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4521 h->poc_msb = h->prev_poc_msb;
4522 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4524 field_poc[1] = h->poc_msb + h->poc_lsb;
4525 if(s->picture_structure == PICT_FRAME)
4526 field_poc[1] += h->delta_poc_bottom;
4527 }else if(h->sps.poc_type==1){
4528 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4531 if(h->sps.poc_cycle_length != 0)
4532 abs_frame_num = h->frame_num_offset + h->frame_num;
4536 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4539 expected_delta_per_poc_cycle = 0;
4540 for(i=0; i < h->sps.poc_cycle_length; i++)
4541 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4543 if(abs_frame_num > 0){
4544 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4545 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4547 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4548 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4549 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4553 if(h->nal_ref_idc == 0)
4554 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4556 field_poc[0] = expectedpoc + h->delta_poc[0];
4557 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4559 if(s->picture_structure == PICT_FRAME)
4560 field_poc[1] += h->delta_poc[1];
4563 if(h->nal_unit_type == NAL_IDR_SLICE){
4566 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4567 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4573 if(s->picture_structure != PICT_BOTTOM_FIELD)
4574 s->current_picture_ptr->field_poc[0]= field_poc[0];
4575 if(s->picture_structure != PICT_TOP_FIELD)
4576 s->current_picture_ptr->field_poc[1]= field_poc[1];
4577 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4578 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4584 * decodes a slice header.
4585 * this will allso call MPV_common_init() and frame_start() as needed
4587 static int decode_slice_header(H264Context *h){
4588 MpegEncContext * const s = &h->s;
4589 int first_mb_in_slice, pps_id;
4590 int num_ref_idx_active_override_flag;
4591 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4593 int default_ref_list_done = 0;
4595 s->current_picture.reference= h->nal_ref_idc != 0;
4596 s->dropable= h->nal_ref_idc == 0;
4598 first_mb_in_slice= get_ue_golomb(&s->gb);
4600 slice_type= get_ue_golomb(&s->gb);
4602 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4607 h->slice_type_fixed=1;
4609 h->slice_type_fixed=0;
4611 slice_type= slice_type_map[ slice_type ];
4612 if (slice_type == I_TYPE
4613 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4614 default_ref_list_done = 1;
4616 h->slice_type= slice_type;
4618 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4620 pps_id= get_ue_golomb(&s->gb);
4622 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4625 h->pps= h->pps_buffer[pps_id];
4626 if(h->pps.slice_group_count == 0){
4627 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4631 h->sps= h->sps_buffer[ h->pps.sps_id ];
4632 if(h->sps.log2_max_frame_num == 0){
4633 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4637 if(h->dequant_coeff_pps != pps_id){
4638 h->dequant_coeff_pps = pps_id;
4639 init_dequant_tables(h);
4642 s->mb_width= h->sps.mb_width;
4643 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4645 h->b_stride= s->mb_width*4;
4646 h->b8_stride= s->mb_width*2;
4648 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4649 if(h->sps.frame_mbs_only_flag)
4650 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4652 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4654 if (s->context_initialized
4655 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4659 if (!s->context_initialized) {
4660 if (MPV_common_init(s) < 0)
4663 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4664 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4665 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4668 for(i=0; i<16; i++){
4669 #define T(x) (x>>2) | ((x<<2) & 0xF)
4670 h->zigzag_scan[i] = T(zigzag_scan[i]);
4671 h-> field_scan[i] = T( field_scan[i]);
4675 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4676 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4677 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4678 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4679 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4682 for(i=0; i<64; i++){
4683 #define T(x) (x>>3) | ((x&7)<<3)
4684 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4685 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4686 h->field_scan8x8[i] = T(field_scan8x8[i]);
4687 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4691 if(h->sps.transform_bypass){ //FIXME same ugly
4692 h->zigzag_scan_q0 = zigzag_scan;
4693 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4694 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4695 h->field_scan_q0 = field_scan;
4696 h->field_scan8x8_q0 = field_scan8x8;
4697 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4699 h->zigzag_scan_q0 = h->zigzag_scan;
4700 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4701 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4702 h->field_scan_q0 = h->field_scan;
4703 h->field_scan8x8_q0 = h->field_scan8x8;
4704 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4709 s->avctx->width = s->width;
4710 s->avctx->height = s->height;
4711 s->avctx->sample_aspect_ratio= h->sps.sar;
4712 if(!s->avctx->sample_aspect_ratio.den)
4713 s->avctx->sample_aspect_ratio.den = 1;
4715 if(h->sps.timing_info_present_flag){
4716 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4717 if(h->x264_build > 0 && h->x264_build < 44)
4718 s->avctx->time_base.den *= 2;
4719 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4720 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4724 if(h->slice_num == 0){
4725 if(frame_start(h) < 0)
4729 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4730 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4733 h->mb_aff_frame = 0;
4734 if(h->sps.frame_mbs_only_flag){
4735 s->picture_structure= PICT_FRAME;
4737 if(get_bits1(&s->gb)) { //field_pic_flag
4738 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4739 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4741 s->picture_structure= PICT_FRAME;
4742 h->mb_aff_frame = h->sps.mb_aff;
4746 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4747 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4748 if(s->mb_y >= s->mb_height){
4752 if(s->picture_structure==PICT_FRAME){
4753 h->curr_pic_num= h->frame_num;
4754 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4756 h->curr_pic_num= 2*h->frame_num;
4757 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4760 if(h->nal_unit_type == NAL_IDR_SLICE){
4761 get_ue_golomb(&s->gb); /* idr_pic_id */
4764 if(h->sps.poc_type==0){
4765 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4767 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4768 h->delta_poc_bottom= get_se_golomb(&s->gb);
4772 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4773 h->delta_poc[0]= get_se_golomb(&s->gb);
4775 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4776 h->delta_poc[1]= get_se_golomb(&s->gb);
4781 if(h->pps.redundant_pic_cnt_present){
4782 h->redundant_pic_count= get_ue_golomb(&s->gb);
4785 //set defaults, might be overriden a few line later
4786 h->ref_count[0]= h->pps.ref_count[0];
4787 h->ref_count[1]= h->pps.ref_count[1];
4789 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4790 if(h->slice_type == B_TYPE){
4791 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4792 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4793 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4795 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4797 if(num_ref_idx_active_override_flag){
4798 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4799 if(h->slice_type==B_TYPE)
4800 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4802 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4803 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4809 if(!default_ref_list_done){
4810 fill_default_ref_list(h);
4813 if(decode_ref_pic_list_reordering(h) < 0)
4816 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4817 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4818 pred_weight_table(h);
4819 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4820 implicit_weight_table(h);
4824 if(s->current_picture.reference)
4825 decode_ref_pic_marking(h);
4828 fill_mbaff_ref_list(h);
4830 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4831 h->cabac_init_idc = get_ue_golomb(&s->gb);
4833 h->last_qscale_diff = 0;
4834 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4835 if(s->qscale<0 || s->qscale>51){
4836 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4839 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4840 //FIXME qscale / qp ... stuff
4841 if(h->slice_type == SP_TYPE){
4842 get_bits1(&s->gb); /* sp_for_switch_flag */
4844 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4845 get_se_golomb(&s->gb); /* slice_qs_delta */
4848 h->deblocking_filter = 1;
4849 h->slice_alpha_c0_offset = 0;
4850 h->slice_beta_offset = 0;
4851 if( h->pps.deblocking_filter_parameters_present ) {
4852 h->deblocking_filter= get_ue_golomb(&s->gb);
4853 if(h->deblocking_filter < 2)
4854 h->deblocking_filter^= 1; // 1<->0
4856 if( h->deblocking_filter ) {
4857 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4858 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4861 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4862 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4863 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4864 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4865 h->deblocking_filter= 0;
4868 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4869 slice_group_change_cycle= get_bits(&s->gb, ?);
4874 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4875 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4877 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4878 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4880 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4882 av_get_pict_type_char(h->slice_type),
4883 pps_id, h->frame_num,
4884 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4885 h->ref_count[0], h->ref_count[1],
4887 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4889 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4893 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
4894 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4895 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4897 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4898 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4907 static inline int get_level_prefix(GetBitContext *gb){
4911 OPEN_READER(re, gb);
4912 UPDATE_CACHE(re, gb);
4913 buf=GET_CACHE(re, gb);
4915 log= 32 - av_log2(buf);
4917 print_bin(buf>>(32-log), log);
4918 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4921 LAST_SKIP_BITS(re, gb, log);
4922 CLOSE_READER(re, gb);
4927 static inline int get_dct8x8_allowed(H264Context *h){
4930 if(!IS_SUB_8X8(h->sub_mb_type[i])
4931 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4938 * decodes a residual block.
4939 * @param n block index
4940 * @param scantable scantable
4941 * @param max_coeff number of coefficients in the block
4942 * @return <0 if an error occured
4944 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4945 MpegEncContext * const s = &h->s;
4946 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4948 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4950 //FIXME put trailing_onex into the context
4952 if(n == CHROMA_DC_BLOCK_INDEX){
4953 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4954 total_coeff= coeff_token>>2;
4956 if(n == LUMA_DC_BLOCK_INDEX){
4957 total_coeff= pred_non_zero_count(h, 0);
4958 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4959 total_coeff= coeff_token>>2;
4961 total_coeff= pred_non_zero_count(h, n);
4962 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4963 total_coeff= coeff_token>>2;
4964 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4968 //FIXME set last_non_zero?
4973 trailing_ones= coeff_token&3;
4974 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4975 assert(total_coeff<=16);
4977 for(i=0; i<trailing_ones; i++){
4978 level[i]= 1 - 2*get_bits1(gb);
4982 int level_code, mask;
4983 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4984 int prefix= get_level_prefix(gb);
4986 //first coefficient has suffix_length equal to 0 or 1
4987 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4989 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4991 level_code= (prefix<<suffix_length); //part
4992 }else if(prefix==14){
4994 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4996 level_code= prefix + get_bits(gb, 4); //part
4997 }else if(prefix==15){
4998 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4999 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
5001 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
5005 if(trailing_ones < 3) level_code += 2;
5010 mask= -(level_code&1);
5011 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5014 //remaining coefficients have suffix_length > 0
5015 for(;i<total_coeff;i++) {
5016 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
5017 prefix = get_level_prefix(gb);
5019 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
5020 }else if(prefix==15){
5021 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
5023 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
5026 mask= -(level_code&1);
5027 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5028 if(level_code > suffix_limit[suffix_length])
5033 if(total_coeff == max_coeff)
5036 if(n == CHROMA_DC_BLOCK_INDEX)
5037 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
5039 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
5042 coeff_num = zeros_left + total_coeff - 1;
5043 j = scantable[coeff_num];
5045 block[j] = level[0];
5046 for(i=1;i<total_coeff;i++) {
5049 else if(zeros_left < 7){
5050 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5052 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5054 zeros_left -= run_before;
5055 coeff_num -= 1 + run_before;
5056 j= scantable[ coeff_num ];
5061 block[j] = (level[0] * qmul[j] + 32)>>6;
5062 for(i=1;i<total_coeff;i++) {
5065 else if(zeros_left < 7){
5066 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5068 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5070 zeros_left -= run_before;
5071 coeff_num -= 1 + run_before;
5072 j= scantable[ coeff_num ];
5074 block[j]= (level[i] * qmul[j] + 32)>>6;
5079 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
5086 static void predict_field_decoding_flag(H264Context *h){
5087 MpegEncContext * const s = &h->s;
5088 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5089 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
5090 ? s->current_picture.mb_type[mb_xy-1]
5091 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
5092 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
5094 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
5098 * decodes a P_SKIP or B_SKIP macroblock
5100 static void decode_mb_skip(H264Context *h){
5101 MpegEncContext * const s = &h->s;
5102 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5105 memset(h->non_zero_count[mb_xy], 0, 16);
5106 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
5109 mb_type|= MB_TYPE_INTERLACED;
5111 if( h->slice_type == B_TYPE )
5113 // just for fill_caches. pred_direct_motion will set the real mb_type
5114 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
5116 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5117 pred_direct_motion(h, &mb_type);
5118 mb_type|= MB_TYPE_SKIP;
5123 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
5125 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5126 pred_pskip_motion(h, &mx, &my);
5127 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
5128 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
5131 write_back_motion(h, mb_type);
5132 s->current_picture.mb_type[mb_xy]= mb_type;
5133 s->current_picture.qscale_table[mb_xy]= s->qscale;
5134 h->slice_table[ mb_xy ]= h->slice_num;
5135 h->prev_mb_skipped= 1;
5139 * decodes a macroblock
5140 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5142 static int decode_mb_cavlc(H264Context *h){
5143 MpegEncContext * const s = &h->s;
5144 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5145 int mb_type, partition_count, cbp;
5146 int dct8x8_allowed= h->pps.transform_8x8_mode;
5148 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
5150 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5151 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
5153 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
5154 if(s->mb_skip_run==-1)
5155 s->mb_skip_run= get_ue_golomb(&s->gb);
5157 if (s->mb_skip_run--) {
5158 if(FRAME_MBAFF && (s->mb_y&1) == 0){
5159 if(s->mb_skip_run==0)
5160 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5162 predict_field_decoding_flag(h);
5169 if( (s->mb_y&1) == 0 )
5170 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5172 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5174 h->prev_mb_skipped= 0;
5176 mb_type= get_ue_golomb(&s->gb);
5177 if(h->slice_type == B_TYPE){
5179 partition_count= b_mb_type_info[mb_type].partition_count;
5180 mb_type= b_mb_type_info[mb_type].type;
5183 goto decode_intra_mb;
5185 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
5187 partition_count= p_mb_type_info[mb_type].partition_count;
5188 mb_type= p_mb_type_info[mb_type].type;
5191 goto decode_intra_mb;
5194 assert(h->slice_type == I_TYPE);
5197 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
5201 cbp= i_mb_type_info[mb_type].cbp;
5202 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5203 mb_type= i_mb_type_info[mb_type].type;
5207 mb_type |= MB_TYPE_INTERLACED;
5209 h->slice_table[ mb_xy ]= h->slice_num;
5211 if(IS_INTRA_PCM(mb_type)){
5214 // we assume these blocks are very rare so we dont optimize it
5215 align_get_bits(&s->gb);
5217 // The pixels are stored in the same order as levels in h->mb array.
5218 for(y=0; y<16; y++){
5219 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5220 for(x=0; x<16; x++){
5221 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5222 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
5226 const int index= 256 + 4*(y&3) + 32*(y>>2);
5228 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5229 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5233 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5235 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5236 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5240 // In deblocking, the quantizer is 0
5241 s->current_picture.qscale_table[mb_xy]= 0;
5242 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5243 // All coeffs are present
5244 memset(h->non_zero_count[mb_xy], 16, 16);
5246 s->current_picture.mb_type[mb_xy]= mb_type;
5251 h->ref_count[0] <<= 1;
5252 h->ref_count[1] <<= 1;
5255 fill_caches(h, mb_type, 0);
5258 if(IS_INTRA(mb_type)){
5259 // init_top_left_availability(h);
5260 if(IS_INTRA4x4(mb_type)){
5263 if(dct8x8_allowed && get_bits1(&s->gb)){
5264 mb_type |= MB_TYPE_8x8DCT;
5268 // fill_intra4x4_pred_table(h);
5269 for(i=0; i<16; i+=di){
5270 int mode= pred_intra_mode(h, i);
5272 if(!get_bits1(&s->gb)){
5273 const int rem_mode= get_bits(&s->gb, 3);
5274 mode = rem_mode + (rem_mode >= mode);
5278 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5280 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
5282 write_back_intra_pred_mode(h);
5283 if( check_intra4x4_pred_mode(h) < 0)
5286 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
5287 if(h->intra16x16_pred_mode < 0)
5290 h->chroma_pred_mode= get_ue_golomb(&s->gb);
5292 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
5293 if(h->chroma_pred_mode < 0)
5295 }else if(partition_count==4){
5296 int i, j, sub_partition_count[4], list, ref[2][4];
5298 if(h->slice_type == B_TYPE){
5300 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5301 if(h->sub_mb_type[i] >=13){
5302 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5305 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5306 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5308 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5309 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5310 pred_direct_motion(h, &mb_type);
5311 h->ref_cache[0][scan8[4]] =
5312 h->ref_cache[1][scan8[4]] =
5313 h->ref_cache[0][scan8[12]] =
5314 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5317 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
5319 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5320 if(h->sub_mb_type[i] >=4){
5321 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5324 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5325 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5329 for(list=0; list<2; list++){
5330 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5331 if(ref_count == 0) continue;
5333 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5334 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5335 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
5344 dct8x8_allowed = get_dct8x8_allowed(h);
5346 for(list=0; list<2; list++){
5347 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5348 if(ref_count == 0) continue;
5351 if(IS_DIRECT(h->sub_mb_type[i])) {
5352 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
5355 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
5356 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5358 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5359 const int sub_mb_type= h->sub_mb_type[i];
5360 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5361 for(j=0; j<sub_partition_count[i]; j++){
5363 const int index= 4*i + block_width*j;
5364 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5365 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
5366 mx += get_se_golomb(&s->gb);
5367 my += get_se_golomb(&s->gb);
5368 tprintf("final mv:%d %d\n", mx, my);
5370 if(IS_SUB_8X8(sub_mb_type)){
5371 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5372 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5373 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5374 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5375 }else if(IS_SUB_8X4(sub_mb_type)){
5376 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5377 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5378 }else if(IS_SUB_4X8(sub_mb_type)){
5379 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5380 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5382 assert(IS_SUB_4X4(sub_mb_type));
5383 mv_cache[ 0 ][0]= mx;
5384 mv_cache[ 0 ][1]= my;
5388 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5394 }else if(IS_DIRECT(mb_type)){
5395 pred_direct_motion(h, &mb_type);
5396 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5398 int list, mx, my, i;
5399 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5400 if(IS_16X16(mb_type)){
5401 for(list=0; list<2; list++){
5402 if(h->ref_count[list]>0){
5403 if(IS_DIR(mb_type, 0, list)){
5404 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5405 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5407 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
5410 for(list=0; list<2; list++){
5411 if(IS_DIR(mb_type, 0, list)){
5412 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5413 mx += get_se_golomb(&s->gb);
5414 my += get_se_golomb(&s->gb);
5415 tprintf("final mv:%d %d\n", mx, my);
5417 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5419 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5422 else if(IS_16X8(mb_type)){
5423 for(list=0; list<2; list++){
5424 if(h->ref_count[list]>0){
5426 if(IS_DIR(mb_type, i, list)){
5427 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5428 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5430 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5434 for(list=0; list<2; list++){
5436 if(IS_DIR(mb_type, i, list)){
5437 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5438 mx += get_se_golomb(&s->gb);
5439 my += get_se_golomb(&s->gb);
5440 tprintf("final mv:%d %d\n", mx, my);
5442 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5444 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5448 assert(IS_8X16(mb_type));
5449 for(list=0; list<2; list++){
5450 if(h->ref_count[list]>0){
5452 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5453 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5454 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5456 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5460 for(list=0; list<2; list++){
5462 if(IS_DIR(mb_type, i, list)){
5463 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5464 mx += get_se_golomb(&s->gb);
5465 my += get_se_golomb(&s->gb);
5466 tprintf("final mv:%d %d\n", mx, my);
5468 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5470 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5476 if(IS_INTER(mb_type))
5477 write_back_motion(h, mb_type);
5479 if(!IS_INTRA16x16(mb_type)){
5480 cbp= get_ue_golomb(&s->gb);
5482 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5486 if(IS_INTRA4x4(mb_type))
5487 cbp= golomb_to_intra4x4_cbp[cbp];
5489 cbp= golomb_to_inter_cbp[cbp];
5493 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5494 if(get_bits1(&s->gb))
5495 mb_type |= MB_TYPE_8x8DCT;
5497 s->current_picture.mb_type[mb_xy]= mb_type;
5499 if(cbp || IS_INTRA16x16(mb_type)){
5500 int i8x8, i4x4, chroma_idx;
5501 int chroma_qp, dquant;
5502 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5503 const uint8_t *scan, *scan8x8, *dc_scan;
5505 // fill_non_zero_count_cache(h);
5507 if(IS_INTERLACED(mb_type)){
5508 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5509 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5510 dc_scan= luma_dc_field_scan;
5512 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5513 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5514 dc_scan= luma_dc_zigzag_scan;
5517 dquant= get_se_golomb(&s->gb);
5519 if( dquant > 25 || dquant < -26 ){
5520 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5524 s->qscale += dquant;
5525 if(((unsigned)s->qscale) > 51){
5526 if(s->qscale<0) s->qscale+= 52;
5527 else s->qscale-= 52;
5530 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5531 if(IS_INTRA16x16(mb_type)){
5532 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5533 return -1; //FIXME continue if partitioned and other return -1 too
5536 assert((cbp&15) == 0 || (cbp&15) == 15);
5539 for(i8x8=0; i8x8<4; i8x8++){
5540 for(i4x4=0; i4x4<4; i4x4++){
5541 const int index= i4x4 + 4*i8x8;
5542 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5548 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5551 for(i8x8=0; i8x8<4; i8x8++){
5552 if(cbp & (1<<i8x8)){
5553 if(IS_8x8DCT(mb_type)){
5554 DCTELEM *buf = &h->mb[64*i8x8];
5556 for(i4x4=0; i4x4<4; i4x4++){
5557 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5558 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5561 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5562 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5564 for(i4x4=0; i4x4<4; i4x4++){
5565 const int index= i4x4 + 4*i8x8;
5567 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5573 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5574 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5580 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5581 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5587 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5588 for(i4x4=0; i4x4<4; i4x4++){
5589 const int index= 16 + 4*chroma_idx + i4x4;
5590 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5596 uint8_t * const nnz= &h->non_zero_count_cache[0];
5597 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5598 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5601 uint8_t * const nnz= &h->non_zero_count_cache[0];
5602 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5603 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5604 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5606 s->current_picture.qscale_table[mb_xy]= s->qscale;
5607 write_back_non_zero_count(h);
5610 h->ref_count[0] >>= 1;
5611 h->ref_count[1] >>= 1;
5617 static int decode_cabac_field_decoding_flag(H264Context *h) {
5618 MpegEncContext * const s = &h->s;
5619 const int mb_x = s->mb_x;
5620 const int mb_y = s->mb_y & ~1;
5621 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5622 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5624 unsigned int ctx = 0;
5626 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5629 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5633 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5636 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5637 uint8_t *state= &h->cabac_state[ctx_base];
5641 MpegEncContext * const s = &h->s;
5642 const int mba_xy = h->left_mb_xy[0];
5643 const int mbb_xy = h->top_mb_xy;
5645 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5647 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5649 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5650 return 0; /* I4x4 */
5653 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5654 return 0; /* I4x4 */
5657 if( get_cabac_terminate( &h->cabac ) )
5658 return 25; /* PCM */
5660 mb_type = 1; /* I16x16 */
5661 mb_type += 12 * get_cabac( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5662 if( get_cabac( &h->cabac, &state[2] ) ) /* cbp_chroma */
5663 mb_type += 4 + 4 * get_cabac( &h->cabac, &state[2+intra_slice] );
5664 mb_type += 2 * get_cabac( &h->cabac, &state[3+intra_slice] );
5665 mb_type += 1 * get_cabac( &h->cabac, &state[3+2*intra_slice] );
5669 static int decode_cabac_mb_type( H264Context *h ) {
5670 MpegEncContext * const s = &h->s;
5672 if( h->slice_type == I_TYPE ) {
5673 return decode_cabac_intra_mb_type(h, 3, 1);
5674 } else if( h->slice_type == P_TYPE ) {
5675 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5677 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5678 /* P_L0_D16x16, P_8x8 */
5679 return 3 * get_cabac( &h->cabac, &h->cabac_state[16] );
5681 /* P_L0_D8x16, P_L0_D16x8 */
5682 return 2 - get_cabac( &h->cabac, &h->cabac_state[17] );
5685 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5687 } else if( h->slice_type == B_TYPE ) {
5688 const int mba_xy = h->left_mb_xy[0];
5689 const int mbb_xy = h->top_mb_xy;
5693 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5695 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5698 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5699 return 0; /* B_Direct_16x16 */
5701 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5702 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5705 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5706 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5707 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5708 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5710 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5711 else if( bits == 13 ) {
5712 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5713 } else if( bits == 14 )
5714 return 11; /* B_L1_L0_8x16 */
5715 else if( bits == 15 )
5716 return 22; /* B_8x8 */
5718 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5719 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5721 /* TODO SI/SP frames? */
5726 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5727 MpegEncContext * const s = &h->s;
5731 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5732 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5735 && h->slice_table[mba_xy] == h->slice_num
5736 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5737 mba_xy += s->mb_stride;
5739 mbb_xy = mb_xy - s->mb_stride;
5741 && h->slice_table[mbb_xy] == h->slice_num
5742 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5743 mbb_xy -= s->mb_stride;
5745 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5747 int mb_xy = mb_x + mb_y*s->mb_stride;
5749 mbb_xy = mb_xy - s->mb_stride;
5752 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5754 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5757 if( h->slice_type == B_TYPE )
5759 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5762 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5765 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5768 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5769 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5770 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5772 if( mode >= pred_mode )
5778 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5779 const int mba_xy = h->left_mb_xy[0];
5780 const int mbb_xy = h->top_mb_xy;
5784 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5785 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5788 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5791 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5794 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5796 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5802 static const uint8_t block_idx_x[16] = {
5803 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5805 static const uint8_t block_idx_y[16] = {
5806 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5808 static const uint8_t block_idx_xy[4][4] = {
5815 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5820 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5822 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5825 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5830 x = block_idx_x[4*i8x8];
5831 y = block_idx_y[4*i8x8];
5835 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5836 cbp_a = h->left_cbp;
5837 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5843 /* No need to test for skip as we put 0 for skip block */
5844 /* No need to test for IPCM as we put 1 for IPCM block */
5846 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5847 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5852 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5853 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5857 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5863 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5867 cbp_a = (h->left_cbp>>4)&0x03;
5868 cbp_b = (h-> top_cbp>>4)&0x03;
5871 if( cbp_a > 0 ) ctx++;
5872 if( cbp_b > 0 ) ctx += 2;
5873 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5877 if( cbp_a == 2 ) ctx++;
5878 if( cbp_b == 2 ) ctx += 2;
5879 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5881 static int decode_cabac_mb_dqp( H264Context *h) {
5882 MpegEncContext * const s = &h->s;
5888 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5890 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5892 if( h->last_qscale_diff != 0 )
5895 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5901 if(val > 102) //prevent infinite loop
5908 return -(val + 1)/2;
5910 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5911 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5913 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5915 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5919 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5921 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5922 return 0; /* B_Direct_8x8 */
5923 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5924 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5926 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5927 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5928 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5931 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5932 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5936 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5937 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5940 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5941 int refa = h->ref_cache[list][scan8[n] - 1];
5942 int refb = h->ref_cache[list][scan8[n] - 8];
5946 if( h->slice_type == B_TYPE) {
5947 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5949 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5958 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5968 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5969 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5970 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5971 int ctxbase = (l == 0) ? 40 : 47;
5976 else if( amvd > 32 )
5981 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5986 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5994 while( get_cabac_bypass( &h->cabac ) ) {
5999 if( get_cabac_bypass( &h->cabac ) )
6003 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
6007 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
6012 nza = h->left_cbp&0x100;
6013 nzb = h-> top_cbp&0x100;
6014 } else if( cat == 1 || cat == 2 ) {
6015 nza = h->non_zero_count_cache[scan8[idx] - 1];
6016 nzb = h->non_zero_count_cache[scan8[idx] - 8];
6017 } else if( cat == 3 ) {
6018 nza = (h->left_cbp>>(6+idx))&0x01;
6019 nzb = (h-> top_cbp>>(6+idx))&0x01;
6022 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
6023 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
6032 return ctx + 4 * cat;
6035 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
6036 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
6037 static const int significant_coeff_flag_offset[2][6] = {
6038 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
6039 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
6041 static const int last_coeff_flag_offset[2][6] = {
6042 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
6043 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
6045 static const int coeff_abs_level_m1_offset[6] = {
6046 227+0, 227+10, 227+20, 227+30, 227+39, 426
6048 static const int significant_coeff_flag_offset_8x8[2][63] = {
6049 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
6050 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
6051 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
6052 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
6053 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
6054 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
6055 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
6056 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
6058 static const int last_coeff_flag_offset_8x8[63] = {
6059 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6060 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
6061 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
6062 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
6068 int coeff_count = 0;
6071 int abslevelgt1 = 0;
6073 uint8_t *significant_coeff_ctx_base;
6074 uint8_t *last_coeff_ctx_base;
6075 uint8_t *abs_level_m1_ctx_base;
6077 /* cat: 0-> DC 16x16 n = 0
6078 * 1-> AC 16x16 n = luma4x4idx
6079 * 2-> Luma4x4 n = luma4x4idx
6080 * 3-> DC Chroma n = iCbCr
6081 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
6082 * 5-> Luma8x8 n = 4 * luma8x8idx
6085 /* read coded block flag */
6087 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
6088 if( cat == 1 || cat == 2 )
6089 h->non_zero_count_cache[scan8[n]] = 0;
6091 h->non_zero_count_cache[scan8[16+n]] = 0;
6097 significant_coeff_ctx_base = h->cabac_state
6098 + significant_coeff_flag_offset[MB_FIELD][cat];
6099 last_coeff_ctx_base = h->cabac_state
6100 + last_coeff_flag_offset[MB_FIELD][cat];
6101 abs_level_m1_ctx_base = h->cabac_state
6102 + coeff_abs_level_m1_offset[cat];
6105 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
6106 for(last= 0; last < coefs; last++) { \
6107 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
6108 if( get_cabac( &h->cabac, sig_ctx )) { \
6109 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
6110 index[coeff_count++] = last; \
6111 if( get_cabac( &h->cabac, last_ctx ) ) { \
6117 const int *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
6118 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
6120 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
6122 if( last == max_coeff -1 ) {
6123 index[coeff_count++] = last;
6125 assert(coeff_count > 0);
6128 h->cbp_table[mb_xy] |= 0x100;
6129 else if( cat == 1 || cat == 2 )
6130 h->non_zero_count_cache[scan8[n]] = coeff_count;
6132 h->cbp_table[mb_xy] |= 0x40 << n;
6134 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
6137 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
6140 for( i = coeff_count - 1; i >= 0; i-- ) {
6141 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
6142 int j= scantable[index[i]];
6144 if( get_cabac( &h->cabac, ctx ) == 0 ) {
6146 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
6149 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6;
6150 else block[j] = ( qmul[j] + 32) >> 6;
6156 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
6157 while( coeff_abs < 15 && get_cabac( &h->cabac, ctx ) ) {
6161 if( coeff_abs >= 15 ) {
6163 while( get_cabac_bypass( &h->cabac ) ) {
6164 coeff_abs += 1 << j;
6169 if( get_cabac_bypass( &h->cabac ) )
6170 coeff_abs += 1 << j ;
6175 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
6176 else block[j] = coeff_abs;
6178 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
6179 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
6188 static void inline compute_mb_neighbors(H264Context *h)
6190 MpegEncContext * const s = &h->s;
6191 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
6192 h->top_mb_xy = mb_xy - s->mb_stride;
6193 h->left_mb_xy[0] = mb_xy - 1;
6195 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
6196 const int top_pair_xy = pair_xy - s->mb_stride;
6197 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
6198 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
6199 const int curr_mb_frame_flag = !MB_FIELD;
6200 const int bottom = (s->mb_y & 1);
6202 ? !curr_mb_frame_flag // bottom macroblock
6203 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
6205 h->top_mb_xy -= s->mb_stride;
6207 if (left_mb_frame_flag != curr_mb_frame_flag) {
6208 h->left_mb_xy[0] = pair_xy - 1;
6215 * decodes a macroblock
6216 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
6218 static int decode_mb_cabac(H264Context *h) {
6219 MpegEncContext * const s = &h->s;
6220 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
6221 int mb_type, partition_count, cbp = 0;
6222 int dct8x8_allowed= h->pps.transform_8x8_mode;
6224 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
6226 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
6227 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
6229 /* a skipped mb needs the aff flag from the following mb */
6230 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
6231 predict_field_decoding_flag(h);
6232 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
6233 skip = h->next_mb_skipped;
6235 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
6236 /* read skip flags */
6238 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
6239 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
6240 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
6241 if(h->next_mb_skipped)
6242 predict_field_decoding_flag(h);
6244 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6249 h->cbp_table[mb_xy] = 0;
6250 h->chroma_pred_mode_table[mb_xy] = 0;
6251 h->last_qscale_diff = 0;
6258 if( (s->mb_y&1) == 0 )
6260 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6262 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
6264 h->prev_mb_skipped = 0;
6266 compute_mb_neighbors(h);
6267 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
6268 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
6272 if( h->slice_type == B_TYPE ) {
6274 partition_count= b_mb_type_info[mb_type].partition_count;
6275 mb_type= b_mb_type_info[mb_type].type;
6278 goto decode_intra_mb;
6280 } else if( h->slice_type == P_TYPE ) {
6282 partition_count= p_mb_type_info[mb_type].partition_count;
6283 mb_type= p_mb_type_info[mb_type].type;
6286 goto decode_intra_mb;
6289 assert(h->slice_type == I_TYPE);
6291 partition_count = 0;
6292 cbp= i_mb_type_info[mb_type].cbp;
6293 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
6294 mb_type= i_mb_type_info[mb_type].type;
6297 mb_type |= MB_TYPE_INTERLACED;
6299 h->slice_table[ mb_xy ]= h->slice_num;
6301 if(IS_INTRA_PCM(mb_type)) {
6305 // We assume these blocks are very rare so we dont optimize it.
6306 // FIXME The two following lines get the bitstream position in the cabac
6307 // decode, I think it should be done by a function in cabac.h (or cabac.c).
6308 ptr= h->cabac.bytestream;
6309 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
6311 // The pixels are stored in the same order as levels in h->mb array.
6312 for(y=0; y<16; y++){
6313 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
6314 for(x=0; x<16; x++){
6315 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
6316 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
6320 const int index= 256 + 4*(y&3) + 32*(y>>2);
6322 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
6323 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6327 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
6329 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
6330 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6334 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6336 // All blocks are present
6337 h->cbp_table[mb_xy] = 0x1ef;
6338 h->chroma_pred_mode_table[mb_xy] = 0;
6339 // In deblocking, the quantizer is 0
6340 s->current_picture.qscale_table[mb_xy]= 0;
6341 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
6342 // All coeffs are present
6343 memset(h->non_zero_count[mb_xy], 16, 16);
6344 s->current_picture.mb_type[mb_xy]= mb_type;
6349 h->ref_count[0] <<= 1;
6350 h->ref_count[1] <<= 1;
6353 fill_caches(h, mb_type, 0);
6355 if( IS_INTRA( mb_type ) ) {
6357 if( IS_INTRA4x4( mb_type ) ) {
6358 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6359 mb_type |= MB_TYPE_8x8DCT;
6360 for( i = 0; i < 16; i+=4 ) {
6361 int pred = pred_intra_mode( h, i );
6362 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6363 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6366 for( i = 0; i < 16; i++ ) {
6367 int pred = pred_intra_mode( h, i );
6368 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6370 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6373 write_back_intra_pred_mode(h);
6374 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6376 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6377 if( h->intra16x16_pred_mode < 0 ) return -1;
6379 h->chroma_pred_mode_table[mb_xy] =
6380 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6382 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
6383 if( h->chroma_pred_mode < 0 ) return -1;
6384 } else if( partition_count == 4 ) {
6385 int i, j, sub_partition_count[4], list, ref[2][4];
6387 if( h->slice_type == B_TYPE ) {
6388 for( i = 0; i < 4; i++ ) {
6389 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6390 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6391 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6393 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
6394 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
6395 pred_direct_motion(h, &mb_type);
6396 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6397 for( i = 0; i < 4; i++ )
6398 if( IS_DIRECT(h->sub_mb_type[i]) )
6399 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6403 for( i = 0; i < 4; i++ ) {
6404 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6405 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6406 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6410 for( list = 0; list < 2; list++ ) {
6411 if( h->ref_count[list] > 0 ) {
6412 for( i = 0; i < 4; i++ ) {
6413 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6414 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6415 if( h->ref_count[list] > 1 )
6416 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6422 h->ref_cache[list][ scan8[4*i]+1 ]=
6423 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6429 dct8x8_allowed = get_dct8x8_allowed(h);
6431 for(list=0; list<2; list++){
6433 if(IS_DIRECT(h->sub_mb_type[i])){
6434 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6437 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6439 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6440 const int sub_mb_type= h->sub_mb_type[i];
6441 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6442 for(j=0; j<sub_partition_count[i]; j++){
6445 const int index= 4*i + block_width*j;
6446 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6447 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6448 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6450 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6451 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6452 tprintf("final mv:%d %d\n", mx, my);
6454 if(IS_SUB_8X8(sub_mb_type)){
6455 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
6456 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6457 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
6458 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6460 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
6461 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6462 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
6463 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6464 }else if(IS_SUB_8X4(sub_mb_type)){
6465 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
6466 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
6468 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
6469 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
6470 }else if(IS_SUB_4X8(sub_mb_type)){
6471 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
6472 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
6474 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
6475 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
6477 assert(IS_SUB_4X4(sub_mb_type));
6478 mv_cache[ 0 ][0]= mx;
6479 mv_cache[ 0 ][1]= my;
6481 mvd_cache[ 0 ][0]= mx - mpx;
6482 mvd_cache[ 0 ][1]= my - mpy;
6486 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6487 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6488 p[0] = p[1] = p[8] = p[9] = 0;
6489 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6493 } else if( IS_DIRECT(mb_type) ) {
6494 pred_direct_motion(h, &mb_type);
6495 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6496 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6497 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6499 int list, mx, my, i, mpx, mpy;
6500 if(IS_16X16(mb_type)){
6501 for(list=0; list<2; list++){
6502 if(IS_DIR(mb_type, 0, list)){
6503 if(h->ref_count[list] > 0 ){
6504 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6505 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6508 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6510 for(list=0; list<2; list++){
6511 if(IS_DIR(mb_type, 0, list)){
6512 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6514 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6515 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6516 tprintf("final mv:%d %d\n", mx, my);
6518 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6519 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6521 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6524 else if(IS_16X8(mb_type)){
6525 for(list=0; list<2; list++){
6526 if(h->ref_count[list]>0){
6528 if(IS_DIR(mb_type, i, list)){
6529 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6530 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6532 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6536 for(list=0; list<2; list++){
6538 if(IS_DIR(mb_type, i, list)){
6539 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6540 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6541 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6542 tprintf("final mv:%d %d\n", mx, my);
6544 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6545 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6547 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6548 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6553 assert(IS_8X16(mb_type));
6554 for(list=0; list<2; list++){
6555 if(h->ref_count[list]>0){
6557 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6558 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6559 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6561 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6565 for(list=0; list<2; list++){
6567 if(IS_DIR(mb_type, i, list)){
6568 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6569 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6570 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6572 tprintf("final mv:%d %d\n", mx, my);
6573 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6574 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6576 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6577 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6584 if( IS_INTER( mb_type ) ) {
6585 h->chroma_pred_mode_table[mb_xy] = 0;
6586 write_back_motion( h, mb_type );
6589 if( !IS_INTRA16x16( mb_type ) ) {
6590 cbp = decode_cabac_mb_cbp_luma( h );
6591 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6594 h->cbp_table[mb_xy] = h->cbp = cbp;
6596 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6597 if( decode_cabac_mb_transform_size( h ) )
6598 mb_type |= MB_TYPE_8x8DCT;
6600 s->current_picture.mb_type[mb_xy]= mb_type;
6602 if( cbp || IS_INTRA16x16( mb_type ) ) {
6603 const uint8_t *scan, *scan8x8, *dc_scan;
6606 if(IS_INTERLACED(mb_type)){
6607 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6608 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6609 dc_scan= luma_dc_field_scan;
6611 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6612 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6613 dc_scan= luma_dc_zigzag_scan;
6616 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6617 if( dqp == INT_MIN ){
6618 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6622 if(((unsigned)s->qscale) > 51){
6623 if(s->qscale<0) s->qscale+= 52;
6624 else s->qscale-= 52;
6626 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6628 if( IS_INTRA16x16( mb_type ) ) {
6630 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6631 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6634 for( i = 0; i < 16; i++ ) {
6635 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6636 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6640 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6644 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6645 if( cbp & (1<<i8x8) ) {
6646 if( IS_8x8DCT(mb_type) ) {
6647 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6648 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6651 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6652 const int index = 4*i8x8 + i4x4;
6653 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6654 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6658 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6659 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6666 for( c = 0; c < 2; c++ ) {
6667 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6668 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6675 for( c = 0; c < 2; c++ ) {
6676 for( i = 0; i < 4; i++ ) {
6677 const int index = 16 + 4 * c + i;
6678 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6679 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6684 uint8_t * const nnz= &h->non_zero_count_cache[0];
6685 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6686 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6689 uint8_t * const nnz= &h->non_zero_count_cache[0];
6690 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6691 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6692 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6693 h->last_qscale_diff = 0;
6696 s->current_picture.qscale_table[mb_xy]= s->qscale;
6697 write_back_non_zero_count(h);
6700 h->ref_count[0] >>= 1;
6701 h->ref_count[1] >>= 1;
6708 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6710 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6711 const int alpha = alpha_table[index_a];
6712 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6717 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6718 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6720 /* 16px edge length, because bS=4 is triggered by being at
6721 * the edge of an intra MB, so all 4 bS are the same */
6722 for( d = 0; d < 16; d++ ) {
6723 const int p0 = pix[-1];
6724 const int p1 = pix[-2];
6725 const int p2 = pix[-3];
6727 const int q0 = pix[0];
6728 const int q1 = pix[1];
6729 const int q2 = pix[2];
6731 if( ABS( p0 - q0 ) < alpha &&
6732 ABS( p1 - p0 ) < beta &&
6733 ABS( q1 - q0 ) < beta ) {
6735 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6736 if( ABS( p2 - p0 ) < beta)
6738 const int p3 = pix[-4];
6740 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6741 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6742 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6745 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6747 if( ABS( q2 - q0 ) < beta)
6749 const int q3 = pix[3];
6751 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6752 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6753 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6756 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6760 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6761 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6763 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6769 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6771 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6772 const int alpha = alpha_table[index_a];
6773 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6778 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6779 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6781 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6785 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6787 for( i = 0; i < 16; i++, pix += stride) {
6793 int bS_index = (i >> 1);
6796 bS_index |= (i & 1);
6799 if( bS[bS_index] == 0 ) {
6803 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6804 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6805 alpha = alpha_table[index_a];
6806 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6808 if( bS[bS_index] < 4 ) {
6809 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6810 const int p0 = pix[-1];
6811 const int p1 = pix[-2];
6812 const int p2 = pix[-3];
6813 const int q0 = pix[0];
6814 const int q1 = pix[1];
6815 const int q2 = pix[2];
6817 if( ABS( p0 - q0 ) < alpha &&
6818 ABS( p1 - p0 ) < beta &&
6819 ABS( q1 - q0 ) < beta ) {
6823 if( ABS( p2 - p0 ) < beta ) {
6824 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6827 if( ABS( q2 - q0 ) < beta ) {
6828 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6832 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6833 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6834 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6835 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6838 const int p0 = pix[-1];
6839 const int p1 = pix[-2];
6840 const int p2 = pix[-3];
6842 const int q0 = pix[0];
6843 const int q1 = pix[1];
6844 const int q2 = pix[2];
6846 if( ABS( p0 - q0 ) < alpha &&
6847 ABS( p1 - p0 ) < beta &&
6848 ABS( q1 - q0 ) < beta ) {
6850 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6851 if( ABS( p2 - p0 ) < beta)
6853 const int p3 = pix[-4];
6855 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6856 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6857 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6860 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6862 if( ABS( q2 - q0 ) < beta)
6864 const int q3 = pix[3];
6866 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6867 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6868 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6871 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6875 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6876 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6878 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6883 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6885 for( i = 0; i < 8; i++, pix += stride) {
6893 if( bS[bS_index] == 0 ) {
6897 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6898 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6899 alpha = alpha_table[index_a];
6900 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6902 if( bS[bS_index] < 4 ) {
6903 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6904 const int p0 = pix[-1];
6905 const int p1 = pix[-2];
6906 const int q0 = pix[0];
6907 const int q1 = pix[1];
6909 if( ABS( p0 - q0 ) < alpha &&
6910 ABS( p1 - p0 ) < beta &&
6911 ABS( q1 - q0 ) < beta ) {
6912 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6914 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6915 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6916 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6919 const int p0 = pix[-1];
6920 const int p1 = pix[-2];
6921 const int q0 = pix[0];
6922 const int q1 = pix[1];
6924 if( ABS( p0 - q0 ) < alpha &&
6925 ABS( p1 - p0 ) < beta &&
6926 ABS( q1 - q0 ) < beta ) {
6928 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6929 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6930 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6936 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6938 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6939 const int alpha = alpha_table[index_a];
6940 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6941 const int pix_next = stride;
6946 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6947 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6949 /* 16px edge length, see filter_mb_edgev */
6950 for( d = 0; d < 16; d++ ) {
6951 const int p0 = pix[-1*pix_next];
6952 const int p1 = pix[-2*pix_next];
6953 const int p2 = pix[-3*pix_next];
6954 const int q0 = pix[0];
6955 const int q1 = pix[1*pix_next];
6956 const int q2 = pix[2*pix_next];
6958 if( ABS( p0 - q0 ) < alpha &&
6959 ABS( p1 - p0 ) < beta &&
6960 ABS( q1 - q0 ) < beta ) {
6962 const int p3 = pix[-4*pix_next];
6963 const int q3 = pix[ 3*pix_next];
6965 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6966 if( ABS( p2 - p0 ) < beta) {
6968 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6969 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6970 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6973 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6975 if( ABS( q2 - q0 ) < beta) {
6977 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6978 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6979 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6982 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6986 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6987 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6989 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6996 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6998 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6999 const int alpha = alpha_table[index_a];
7000 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
7005 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
7006 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
7008 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
7012 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7013 MpegEncContext * const s = &h->s;
7015 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
7017 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) {
7018 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
7021 assert(!FRAME_MBAFF);
7023 mb_xy = mb_x + mb_y*s->mb_stride;
7024 mb_type = s->current_picture.mb_type[mb_xy];
7025 qp = s->current_picture.qscale_table[mb_xy];
7026 qp0 = s->current_picture.qscale_table[mb_xy-1];
7027 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
7028 qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
7029 qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
7030 qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
7031 qp0 = (qp + qp0 + 1) >> 1;
7032 qp1 = (qp + qp1 + 1) >> 1;
7033 qpc0 = (qpc + qpc0 + 1) >> 1;
7034 qpc1 = (qpc + qpc1 + 1) >> 1;
7035 qp_thresh = 15 - h->slice_alpha_c0_offset;
7036 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
7037 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
7040 if( IS_INTRA(mb_type) ) {
7041 int16_t bS4[4] = {4,4,4,4};
7042 int16_t bS3[4] = {3,3,3,3};
7043 if( IS_8x8DCT(mb_type) ) {
7044 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7045 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7046 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7047 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7049 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7050 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
7051 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7052 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
7053 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7054 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
7055 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7056 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
7058 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
7059 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
7060 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
7061 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
7062 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7063 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
7064 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7065 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
7068 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
7069 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
7071 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
7073 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
7075 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
7076 (mb_type & MB_TYPE_16x8) ? 1 : 0;
7077 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
7078 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
7080 int step = IS_8x8DCT(mb_type) ? 2 : 1;
7081 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
7082 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
7083 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
7085 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
7086 bSv[0][0] = 0x0004000400040004ULL;
7087 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
7088 bSv[1][0] = 0x0004000400040004ULL;
7090 #define FILTER(hv,dir,edge)\
7091 if(bSv[dir][edge]) {\
7092 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
7094 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7095 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7101 } else if( IS_8x8DCT(mb_type) ) {
7120 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7121 MpegEncContext * const s = &h->s;
7122 const int mb_xy= mb_x + mb_y*s->mb_stride;
7123 const int mb_type = s->current_picture.mb_type[mb_xy];
7124 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
7125 int first_vertical_edge_done = 0;
7127 /* FIXME: A given frame may occupy more than one position in
7128 * the reference list. So ref2frm should be populated with
7129 * frame numbers, not indices. */
7130 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
7131 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7133 //for sufficiently low qp, filtering wouldn't do anything
7134 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
7136 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
7137 int qp = s->current_picture.qscale_table[mb_xy];
7139 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
7140 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
7146 // left mb is in picture
7147 && h->slice_table[mb_xy-1] != 255
7148 // and current and left pair do not have the same interlaced type
7149 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
7150 // and left mb is in the same slice if deblocking_filter == 2
7151 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
7152 /* First vertical edge is different in MBAFF frames
7153 * There are 8 different bS to compute and 2 different Qp
7155 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
7156 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
7160 int mb_qp, mbn0_qp, mbn1_qp;
7162 first_vertical_edge_done = 1;
7164 if( IS_INTRA(mb_type) )
7165 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
7167 for( i = 0; i < 8; i++ ) {
7168 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
7170 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
7172 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
7173 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
7174 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
7181 mb_qp = s->current_picture.qscale_table[mb_xy];
7182 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
7183 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
7184 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
7185 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7186 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
7187 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
7188 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7189 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
7192 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
7193 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7194 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
7195 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
7196 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
7198 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
7199 for( dir = 0; dir < 2; dir++ )
7202 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
7203 const int mbm_type = s->current_picture.mb_type[mbm_xy];
7204 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
7206 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
7207 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
7208 // how often to recheck mv-based bS when iterating between edges
7209 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
7210 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
7211 // how often to recheck mv-based bS when iterating along each edge
7212 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
7214 if (first_vertical_edge_done) {
7216 first_vertical_edge_done = 0;
7219 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
7222 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
7223 && !IS_INTERLACED(mb_type)
7224 && IS_INTERLACED(mbm_type)
7226 // This is a special case in the norm where the filtering must
7227 // be done twice (one each of the field) even if we are in a
7228 // frame macroblock.
7230 static const int nnz_idx[4] = {4,5,6,3};
7231 unsigned int tmp_linesize = 2 * linesize;
7232 unsigned int tmp_uvlinesize = 2 * uvlinesize;
7233 int mbn_xy = mb_xy - 2 * s->mb_stride;
7238 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
7239 if( IS_INTRA(mb_type) ||
7240 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
7241 bS[0] = bS[1] = bS[2] = bS[3] = 3;
7243 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
7244 for( i = 0; i < 4; i++ ) {
7245 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
7246 mbn_nnz[nnz_idx[i]] != 0 )
7252 // Do not use s->qscale as luma quantizer because it has not the same
7253 // value in IPCM macroblocks.
7254 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7255 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
7256 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7257 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
7258 chroma_qp = ( h->chroma_qp +
7259 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7260 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7261 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7268 for( edge = start; edge < edges; edge++ ) {
7269 /* mbn_xy: neighbor macroblock */
7270 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
7271 const int mbn_type = s->current_picture.mb_type[mbn_xy];
7275 if( (edge&1) && IS_8x8DCT(mb_type) )
7278 if( IS_INTRA(mb_type) ||
7279 IS_INTRA(mbn_type) ) {
7282 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
7283 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
7292 bS[0] = bS[1] = bS[2] = bS[3] = value;
7297 if( edge & mask_edge ) {
7298 bS[0] = bS[1] = bS[2] = bS[3] = 0;
7301 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
7302 bS[0] = bS[1] = bS[2] = bS[3] = 1;
7305 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
7306 int b_idx= 8 + 4 + edge * (dir ? 8:1);
7307 int bn_idx= b_idx - (dir ? 8:1);
7309 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
7310 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7311 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7312 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
7314 bS[0] = bS[1] = bS[2] = bS[3] = v;
7320 for( i = 0; i < 4; i++ ) {
7321 int x = dir == 0 ? edge : i;
7322 int y = dir == 0 ? i : edge;
7323 int b_idx= 8 + 4 + x + 8*y;
7324 int bn_idx= b_idx - (dir ? 8:1);
7326 if( h->non_zero_count_cache[b_idx] != 0 ||
7327 h->non_zero_count_cache[bn_idx] != 0 ) {
7333 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
7334 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7335 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7336 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7344 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7349 // Do not use s->qscale as luma quantizer because it has not the same
7350 // value in IPCM macroblocks.
7351 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7352 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7353 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7354 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7356 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7357 if( (edge&1) == 0 ) {
7358 int chroma_qp = ( h->chroma_qp +
7359 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7360 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
7361 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
7364 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7365 if( (edge&1) == 0 ) {
7366 int chroma_qp = ( h->chroma_qp +
7367 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7368 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7369 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7376 static int decode_slice(H264Context *h){
7377 MpegEncContext * const s = &h->s;
7378 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7382 if( h->pps.cabac ) {
7386 align_get_bits( &s->gb );
7389 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
7390 ff_init_cabac_decoder( &h->cabac,
7391 s->gb.buffer + get_bits_count(&s->gb)/8,
7392 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7393 /* calculate pre-state */
7394 for( i= 0; i < 460; i++ ) {
7396 if( h->slice_type == I_TYPE )
7397 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7399 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7402 h->cabac_state[i] = 2 * ( 63 - pre ) + 2;
7404 h->cabac_state[i] = 2 * ( pre - 64 ) + 3;
7408 int ret = decode_mb_cabac(h);
7411 if(ret>=0) hl_decode_mb(h);
7413 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7416 if(ret>=0) ret = decode_mb_cabac(h);
7418 if(ret>=0) hl_decode_mb(h);
7421 eos = get_cabac_terminate( &h->cabac );
7423 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
7424 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7425 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7429 if( ++s->mb_x >= s->mb_width ) {
7431 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7438 if( eos || s->mb_y >= s->mb_height ) {
7439 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7440 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7447 int ret = decode_mb_cavlc(h);
7449 if(ret>=0) hl_decode_mb(h);
7451 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7453 ret = decode_mb_cavlc(h);
7455 if(ret>=0) hl_decode_mb(h);
7460 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7461 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7466 if(++s->mb_x >= s->mb_width){
7468 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7473 if(s->mb_y >= s->mb_height){
7474 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7476 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7477 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7481 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7488 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7489 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7490 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7491 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7495 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7504 for(;s->mb_y < s->mb_height; s->mb_y++){
7505 for(;s->mb_x < s->mb_width; s->mb_x++){
7506 int ret= decode_mb(h);
7511 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7512 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7517 if(++s->mb_x >= s->mb_width){
7519 if(++s->mb_y >= s->mb_height){
7520 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7521 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7525 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7532 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7533 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7534 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7538 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7545 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7548 return -1; //not reached
7551 static int decode_unregistered_user_data(H264Context *h, int size){
7552 MpegEncContext * const s = &h->s;
7553 uint8_t user_data[16+256];
7559 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7560 user_data[i]= get_bits(&s->gb, 8);
7564 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7565 if(e==1 && build>=0)
7566 h->x264_build= build;
7568 if(s->avctx->debug & FF_DEBUG_BUGS)
7569 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7572 skip_bits(&s->gb, 8);
7577 static int decode_sei(H264Context *h){
7578 MpegEncContext * const s = &h->s;
7580 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7585 type+= show_bits(&s->gb, 8);
7586 }while(get_bits(&s->gb, 8) == 255);
7590 size+= show_bits(&s->gb, 8);
7591 }while(get_bits(&s->gb, 8) == 255);
7595 if(decode_unregistered_user_data(h, size) < 0)
7599 skip_bits(&s->gb, 8*size);
7602 //FIXME check bits here
7603 align_get_bits(&s->gb);
7609 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7610 MpegEncContext * const s = &h->s;
7612 cpb_count = get_ue_golomb(&s->gb) + 1;
7613 get_bits(&s->gb, 4); /* bit_rate_scale */
7614 get_bits(&s->gb, 4); /* cpb_size_scale */
7615 for(i=0; i<cpb_count; i++){
7616 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7617 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7618 get_bits1(&s->gb); /* cbr_flag */
7620 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7621 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7622 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7623 get_bits(&s->gb, 5); /* time_offset_length */
7626 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7627 MpegEncContext * const s = &h->s;
7628 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7629 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7631 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7633 if( aspect_ratio_info_present_flag ) {
7634 aspect_ratio_idc= get_bits(&s->gb, 8);
7635 if( aspect_ratio_idc == EXTENDED_SAR ) {
7636 sps->sar.num= get_bits(&s->gb, 16);
7637 sps->sar.den= get_bits(&s->gb, 16);
7638 }else if(aspect_ratio_idc < 14){
7639 sps->sar= pixel_aspect[aspect_ratio_idc];
7641 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7648 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7650 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7651 get_bits1(&s->gb); /* overscan_appropriate_flag */
7654 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7655 get_bits(&s->gb, 3); /* video_format */
7656 get_bits1(&s->gb); /* video_full_range_flag */
7657 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7658 get_bits(&s->gb, 8); /* colour_primaries */
7659 get_bits(&s->gb, 8); /* transfer_characteristics */
7660 get_bits(&s->gb, 8); /* matrix_coefficients */
7664 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7665 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7666 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7669 sps->timing_info_present_flag = get_bits1(&s->gb);
7670 if(sps->timing_info_present_flag){
7671 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7672 sps->time_scale = get_bits_long(&s->gb, 32);
7673 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7676 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7677 if(nal_hrd_parameters_present_flag)
7678 decode_hrd_parameters(h, sps);
7679 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7680 if(vcl_hrd_parameters_present_flag)
7681 decode_hrd_parameters(h, sps);
7682 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7683 get_bits1(&s->gb); /* low_delay_hrd_flag */
7684 get_bits1(&s->gb); /* pic_struct_present_flag */
7686 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7687 if(sps->bitstream_restriction_flag){
7688 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7689 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7690 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7691 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7692 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7693 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7694 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7700 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7701 const uint8_t *jvt_list, const uint8_t *fallback_list){
7702 MpegEncContext * const s = &h->s;
7703 int i, last = 8, next = 8;
7704 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7705 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7706 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7708 for(i=0;i<size;i++){
7710 next = (last + get_se_golomb(&s->gb)) & 0xff;
7711 if(!i && !next){ /* matrix not written, we use the preset one */
7712 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7715 last = factors[scan[i]] = next ? next : last;
7719 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7720 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7721 MpegEncContext * const s = &h->s;
7722 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7723 const uint8_t *fallback[4] = {
7724 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7725 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7726 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7727 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7729 if(get_bits1(&s->gb)){
7730 sps->scaling_matrix_present |= is_sps;
7731 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7732 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7733 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7734 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7735 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7736 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7737 if(is_sps || pps->transform_8x8_mode){
7738 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7739 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7741 } else if(fallback_sps) {
7742 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7743 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7747 static inline int decode_seq_parameter_set(H264Context *h){
7748 MpegEncContext * const s = &h->s;
7749 int profile_idc, level_idc;
7753 profile_idc= get_bits(&s->gb, 8);
7754 get_bits1(&s->gb); //constraint_set0_flag
7755 get_bits1(&s->gb); //constraint_set1_flag
7756 get_bits1(&s->gb); //constraint_set2_flag
7757 get_bits1(&s->gb); //constraint_set3_flag
7758 get_bits(&s->gb, 4); // reserved
7759 level_idc= get_bits(&s->gb, 8);
7760 sps_id= get_ue_golomb(&s->gb);
7762 sps= &h->sps_buffer[ sps_id ];
7763 sps->profile_idc= profile_idc;
7764 sps->level_idc= level_idc;
7766 if(sps->profile_idc >= 100){ //high profile
7767 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7768 get_bits1(&s->gb); //residual_color_transform_flag
7769 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7770 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7771 sps->transform_bypass = get_bits1(&s->gb);
7772 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7774 sps->scaling_matrix_present = 0;
7776 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7777 sps->poc_type= get_ue_golomb(&s->gb);
7779 if(sps->poc_type == 0){ //FIXME #define
7780 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7781 } else if(sps->poc_type == 1){//FIXME #define
7782 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7783 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7784 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7785 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7787 for(i=0; i<sps->poc_cycle_length; i++)
7788 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7790 if(sps->poc_type > 2){
7791 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7795 sps->ref_frame_count= get_ue_golomb(&s->gb);
7796 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7797 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7799 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7800 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7801 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7802 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7803 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7806 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7807 if(!sps->frame_mbs_only_flag)
7808 sps->mb_aff= get_bits1(&s->gb);
7812 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7814 #ifndef ALLOW_INTERLACE
7816 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7818 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7819 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7821 sps->crop= get_bits1(&s->gb);
7823 sps->crop_left = get_ue_golomb(&s->gb);
7824 sps->crop_right = get_ue_golomb(&s->gb);
7825 sps->crop_top = get_ue_golomb(&s->gb);
7826 sps->crop_bottom= get_ue_golomb(&s->gb);
7827 if(sps->crop_left || sps->crop_top){
7828 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7834 sps->crop_bottom= 0;
7837 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7838 if( sps->vui_parameters_present_flag )
7839 decode_vui_parameters(h, sps);
7841 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7842 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7843 sps_id, sps->profile_idc, sps->level_idc,
7845 sps->ref_frame_count,
7846 sps->mb_width, sps->mb_height,
7847 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7848 sps->direct_8x8_inference_flag ? "8B8" : "",
7849 sps->crop_left, sps->crop_right,
7850 sps->crop_top, sps->crop_bottom,
7851 sps->vui_parameters_present_flag ? "VUI" : ""
7857 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7858 MpegEncContext * const s = &h->s;
7859 int pps_id= get_ue_golomb(&s->gb);
7860 PPS *pps= &h->pps_buffer[pps_id];
7862 pps->sps_id= get_ue_golomb(&s->gb);
7863 pps->cabac= get_bits1(&s->gb);
7864 pps->pic_order_present= get_bits1(&s->gb);
7865 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7866 if(pps->slice_group_count > 1 ){
7867 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7868 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7869 switch(pps->mb_slice_group_map_type){
7872 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7873 | run_length[ i ] |1 |ue(v) |
7878 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7880 | top_left_mb[ i ] |1 |ue(v) |
7881 | bottom_right_mb[ i ] |1 |ue(v) |
7889 | slice_group_change_direction_flag |1 |u(1) |
7890 | slice_group_change_rate_minus1 |1 |ue(v) |
7895 | slice_group_id_cnt_minus1 |1 |ue(v) |
7896 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7898 | slice_group_id[ i ] |1 |u(v) |
7903 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7904 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7905 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7906 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7910 pps->weighted_pred= get_bits1(&s->gb);
7911 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7912 pps->init_qp= get_se_golomb(&s->gb) + 26;
7913 pps->init_qs= get_se_golomb(&s->gb) + 26;
7914 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7915 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7916 pps->constrained_intra_pred= get_bits1(&s->gb);
7917 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7919 pps->transform_8x8_mode= 0;
7920 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7921 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7922 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7924 if(get_bits_count(&s->gb) < bit_length){
7925 pps->transform_8x8_mode= get_bits1(&s->gb);
7926 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7927 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7930 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7931 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7932 pps_id, pps->sps_id,
7933 pps->cabac ? "CABAC" : "CAVLC",
7934 pps->slice_group_count,
7935 pps->ref_count[0], pps->ref_count[1],
7936 pps->weighted_pred ? "weighted" : "",
7937 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7938 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7939 pps->constrained_intra_pred ? "CONSTR" : "",
7940 pps->redundant_pic_cnt_present ? "REDU" : "",
7941 pps->transform_8x8_mode ? "8x8DCT" : ""
7949 * finds the end of the current frame in the bitstream.
7950 * @return the position of the first byte of the next frame, or -1
7952 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7955 ParseContext *pc = &(h->s.parse_context);
7956 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7957 // mb_addr= pc->mb_addr - 1;
7959 for(i=0; i<=buf_size; i++){
7960 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7961 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7962 if(pc->frame_start_found){
7963 // If there isn't one more byte in the buffer
7964 // the test on first_mb_in_slice cannot be done yet
7965 // do it at next call.
7966 if (i >= buf_size) break;
7967 if (buf[i] & 0x80) {
7968 // first_mb_in_slice is 0, probably the first nal of a new
7970 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7972 pc->frame_start_found= 0;
7976 pc->frame_start_found = 1;
7978 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7979 if(pc->frame_start_found){
7981 pc->frame_start_found= 0;
7986 state= (state<<8) | buf[i];
7990 return END_NOT_FOUND;
7993 #ifdef CONFIG_H264_PARSER
7994 static int h264_parse(AVCodecParserContext *s,
7995 AVCodecContext *avctx,
7996 uint8_t **poutbuf, int *poutbuf_size,
7997 const uint8_t *buf, int buf_size)
7999 H264Context *h = s->priv_data;
8000 ParseContext *pc = &h->s.parse_context;
8003 next= find_frame_end(h, buf, buf_size);
8005 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
8011 *poutbuf = (uint8_t *)buf;
8012 *poutbuf_size = buf_size;
8016 static int h264_split(AVCodecContext *avctx,
8017 const uint8_t *buf, int buf_size)
8020 uint32_t state = -1;
8023 for(i=0; i<=buf_size; i++){
8024 if((state&0xFFFFFF1F) == 0x107)
8026 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
8028 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
8030 while(i>4 && buf[i-5]==0) i--;
8035 state= (state<<8) | buf[i];
8039 #endif /* CONFIG_H264_PARSER */
8041 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
8042 MpegEncContext * const s = &h->s;
8043 AVCodecContext * const avctx= s->avctx;
8047 for(i=0; i<50; i++){
8048 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
8052 s->current_picture_ptr= NULL;
8061 if(buf_index >= buf_size) break;
8063 for(i = 0; i < h->nal_length_size; i++)
8064 nalsize = (nalsize << 8) | buf[buf_index++];
8070 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
8075 // start code prefix search
8076 for(; buf_index + 3 < buf_size; buf_index++){
8077 // this should allways succeed in the first iteration
8078 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
8082 if(buf_index+3 >= buf_size) break;
8087 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
8088 while(ptr[dst_length - 1] == 0 && dst_length > 1)
8090 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
8092 if(s->avctx->debug&FF_DEBUG_STARTCODE){
8093 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
8096 if (h->is_avc && (nalsize != consumed))
8097 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
8099 buf_index += consumed;
8101 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
8102 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
8105 switch(h->nal_unit_type){
8107 idr(h); //FIXME ensure we don't loose some frames if there is reordering
8109 init_get_bits(&s->gb, ptr, bit_length);
8111 h->inter_gb_ptr= &s->gb;
8112 s->data_partitioning = 0;
8114 if(decode_slice_header(h) < 0){
8115 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8118 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
8119 if(h->redundant_pic_count==0 && s->hurry_up < 5
8120 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8121 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8122 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8123 && avctx->skip_frame < AVDISCARD_ALL)
8127 init_get_bits(&s->gb, ptr, bit_length);
8129 h->inter_gb_ptr= NULL;
8130 s->data_partitioning = 1;
8132 if(decode_slice_header(h) < 0){
8133 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8137 init_get_bits(&h->intra_gb, ptr, bit_length);
8138 h->intra_gb_ptr= &h->intra_gb;
8141 init_get_bits(&h->inter_gb, ptr, bit_length);
8142 h->inter_gb_ptr= &h->inter_gb;
8144 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
8146 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8147 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8148 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8149 && avctx->skip_frame < AVDISCARD_ALL)
8153 init_get_bits(&s->gb, ptr, bit_length);
8157 init_get_bits(&s->gb, ptr, bit_length);
8158 decode_seq_parameter_set(h);
8160 if(s->flags& CODEC_FLAG_LOW_DELAY)
8163 if(avctx->has_b_frames < 2)
8164 avctx->has_b_frames= !s->low_delay;
8167 init_get_bits(&s->gb, ptr, bit_length);
8169 decode_picture_parameter_set(h, bit_length);
8173 case NAL_END_SEQUENCE:
8174 case NAL_END_STREAM:
8175 case NAL_FILLER_DATA:
8177 case NAL_AUXILIARY_SLICE:
8180 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
8184 if(!s->current_picture_ptr) return buf_index; //no frame
8186 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
8187 s->current_picture_ptr->pict_type= s->pict_type;
8189 h->prev_frame_num_offset= h->frame_num_offset;
8190 h->prev_frame_num= h->frame_num;
8191 if(s->current_picture_ptr->reference){
8192 h->prev_poc_msb= h->poc_msb;
8193 h->prev_poc_lsb= h->poc_lsb;
8195 if(s->current_picture_ptr->reference)
8196 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
8206 * returns the number of bytes consumed for building the current frame
8208 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
8209 if(s->flags&CODEC_FLAG_TRUNCATED){
8210 pos -= s->parse_context.last_index;
8211 if(pos<0) pos=0; // FIXME remove (unneeded?)
8215 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
8216 if(pos+10>buf_size) pos=buf_size; // oops ;)
8222 static int decode_frame(AVCodecContext *avctx,
8223 void *data, int *data_size,
8224 uint8_t *buf, int buf_size)
8226 H264Context *h = avctx->priv_data;
8227 MpegEncContext *s = &h->s;
8228 AVFrame *pict = data;
8231 s->flags= avctx->flags;
8232 s->flags2= avctx->flags2;
8234 /* no supplementary picture */
8235 if (buf_size == 0) {
8239 if(s->flags&CODEC_FLAG_TRUNCATED){
8240 int next= find_frame_end(h, buf, buf_size);
8242 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
8244 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
8247 if(h->is_avc && !h->got_avcC) {
8248 int i, cnt, nalsize;
8249 unsigned char *p = avctx->extradata;
8250 if(avctx->extradata_size < 7) {
8251 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
8255 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
8258 /* sps and pps in the avcC always have length coded with 2 bytes,
8259 so put a fake nal_length_size = 2 while parsing them */
8260 h->nal_length_size = 2;
8261 // Decode sps from avcC
8262 cnt = *(p+5) & 0x1f; // Number of sps
8264 for (i = 0; i < cnt; i++) {
8265 nalsize = BE_16(p) + 2;
8266 if(decode_nal_units(h, p, nalsize) < 0) {
8267 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
8272 // Decode pps from avcC
8273 cnt = *(p++); // Number of pps
8274 for (i = 0; i < cnt; i++) {
8275 nalsize = BE_16(p) + 2;
8276 if(decode_nal_units(h, p, nalsize) != nalsize) {
8277 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
8282 // Now store right nal length size, that will be use to parse all other nals
8283 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
8284 // Do not reparse avcC
8288 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
8289 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
8293 buf_index=decode_nal_units(h, buf, buf_size);
8297 //FIXME do something with unavailable reference frames
8299 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
8300 if(!s->current_picture_ptr){
8301 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
8306 Picture *out = s->current_picture_ptr;
8307 #if 0 //decode order
8308 *data_size = sizeof(AVFrame);
8310 /* Sort B-frames into display order */
8311 Picture *cur = s->current_picture_ptr;
8312 Picture *prev = h->delayed_output_pic;
8313 int i, pics, cross_idr, out_of_order, out_idx;
8315 if(h->sps.bitstream_restriction_flag
8316 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
8317 s->avctx->has_b_frames = h->sps.num_reorder_frames;
8322 while(h->delayed_pic[pics]) pics++;
8323 h->delayed_pic[pics++] = cur;
8324 if(cur->reference == 0)
8328 for(i=0; h->delayed_pic[i]; i++)
8329 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
8332 out = h->delayed_pic[0];
8334 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8335 if(h->delayed_pic[i]->poc < out->poc){
8336 out = h->delayed_pic[i];
8340 out_of_order = !cross_idr && prev && out->poc < prev->poc;
8341 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
8343 else if(prev && pics <= s->avctx->has_b_frames)
8345 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8347 ((!cross_idr && prev && out->poc > prev->poc + 2)
8348 || cur->pict_type == B_TYPE)))
8351 s->avctx->has_b_frames++;
8354 else if(out_of_order)
8357 if(out_of_order || pics > s->avctx->has_b_frames){
8358 for(i=out_idx; h->delayed_pic[i]; i++)
8359 h->delayed_pic[i] = h->delayed_pic[i+1];
8365 *data_size = sizeof(AVFrame);
8366 if(prev && prev != out && prev->reference == 1)
8367 prev->reference = 0;
8368 h->delayed_output_pic = out;
8372 *pict= *(AVFrame*)out;
8374 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8377 assert(pict->data[0] || !*data_size);
8378 ff_print_debug_info(s, pict);
8379 //printf("out %d\n", (int)pict->data[0]);
8382 /* Return the Picture timestamp as the frame number */
8383 /* we substract 1 because it is added on utils.c */
8384 avctx->frame_number = s->picture_number - 1;
8386 return get_consumed_bytes(s, buf_index, buf_size);
8389 static inline void fill_mb_avail(H264Context *h){
8390 MpegEncContext * const s = &h->s;
8391 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8394 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8395 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8396 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8402 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8403 h->mb_avail[4]= 1; //FIXME move out
8404 h->mb_avail[5]= 0; //FIXME move out
8410 #define SIZE (COUNT*40)
8416 // int int_temp[10000];
8418 AVCodecContext avctx;
8420 dsputil_init(&dsp, &avctx);
8422 init_put_bits(&pb, temp, SIZE);
8423 printf("testing unsigned exp golomb\n");
8424 for(i=0; i<COUNT; i++){
8426 set_ue_golomb(&pb, i);
8427 STOP_TIMER("set_ue_golomb");
8429 flush_put_bits(&pb);
8431 init_get_bits(&gb, temp, 8*SIZE);
8432 for(i=0; i<COUNT; i++){
8435 s= show_bits(&gb, 24);
8438 j= get_ue_golomb(&gb);
8440 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8443 STOP_TIMER("get_ue_golomb");
8447 init_put_bits(&pb, temp, SIZE);
8448 printf("testing signed exp golomb\n");
8449 for(i=0; i<COUNT; i++){
8451 set_se_golomb(&pb, i - COUNT/2);
8452 STOP_TIMER("set_se_golomb");
8454 flush_put_bits(&pb);
8456 init_get_bits(&gb, temp, 8*SIZE);
8457 for(i=0; i<COUNT; i++){
8460 s= show_bits(&gb, 24);
8463 j= get_se_golomb(&gb);
8464 if(j != i - COUNT/2){
8465 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8468 STOP_TIMER("get_se_golomb");
8471 printf("testing 4x4 (I)DCT\n");
8474 uint8_t src[16], ref[16];
8475 uint64_t error= 0, max_error=0;
8477 for(i=0; i<COUNT; i++){
8479 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8480 for(j=0; j<16; j++){
8481 ref[j]= random()%255;
8482 src[j]= random()%255;
8485 h264_diff_dct_c(block, src, ref, 4);
8488 for(j=0; j<16; j++){
8489 // printf("%d ", block[j]);
8490 block[j]= block[j]*4;
8491 if(j&1) block[j]= (block[j]*4 + 2)/5;
8492 if(j&4) block[j]= (block[j]*4 + 2)/5;
8496 s->dsp.h264_idct_add(ref, block, 4);
8497 /* for(j=0; j<16; j++){
8498 printf("%d ", ref[j]);
8502 for(j=0; j<16; j++){
8503 int diff= ABS(src[j] - ref[j]);
8506 max_error= FFMAX(max_error, diff);
8509 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8511 printf("testing quantizer\n");
8512 for(qp=0; qp<52; qp++){
8514 src1_block[i]= src2_block[i]= random()%255;
8518 printf("Testing NAL layer\n");
8520 uint8_t bitstream[COUNT];
8521 uint8_t nal[COUNT*2];
8523 memset(&h, 0, sizeof(H264Context));
8525 for(i=0; i<COUNT; i++){
8533 for(j=0; j<COUNT; j++){
8534 bitstream[j]= (random() % 255) + 1;
8537 for(j=0; j<zeros; j++){
8538 int pos= random() % COUNT;
8539 while(bitstream[pos] == 0){
8548 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8550 printf("encoding failed\n");
8554 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8558 if(out_length != COUNT){
8559 printf("incorrect length %d %d\n", out_length, COUNT);
8563 if(consumed != nal_length){
8564 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8568 if(memcmp(bitstream, out, COUNT)){
8569 printf("missmatch\n");
8574 printf("Testing RBSP\n");
8582 static int decode_end(AVCodecContext *avctx)
8584 H264Context *h = avctx->priv_data;
8585 MpegEncContext *s = &h->s;
8587 av_freep(&h->rbsp_buffer);
8588 free_tables(h); //FIXME cleanup init stuff perhaps
8591 // memset(h, 0, sizeof(H264Context));
8597 AVCodec h264_decoder = {
8601 sizeof(H264Context),
8606 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8610 #ifdef CONFIG_H264_PARSER
8611 AVCodecParser h264_parser = {
8613 sizeof(H264Context),