2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * H.264 / AVC / MPEG4 part10 codec.
26 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
41 #define interlaced_dct interlaced_dct_is_a_bad_name
42 #define mb_intra mb_intra_isnt_initalized_see_mb_type
44 #define LUMA_DC_BLOCK_INDEX 25
45 #define CHROMA_DC_BLOCK_INDEX 26
47 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
48 #define COEFF_TOKEN_VLC_BITS 8
49 #define TOTAL_ZEROS_VLC_BITS 9
50 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
51 #define RUN_VLC_BITS 3
52 #define RUN7_VLC_BITS 6
54 #define MAX_SPS_COUNT 32
55 #define MAX_PPS_COUNT 256
57 #define MAX_MMCO_COUNT 66
59 /* Compiling in interlaced support reduces the speed
60 * of progressive decoding by about 2%. */
61 #define ALLOW_INTERLACE
63 #ifdef ALLOW_INTERLACE
64 #define MB_MBAFF h->mb_mbaff
65 #define MB_FIELD h->mb_field_decoding_flag
66 #define FRAME_MBAFF h->mb_aff_frame
72 #define IS_INTERLACED(mb_type) 0
76 * Sequence parameter set
82 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
83 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
84 int poc_type; ///< pic_order_cnt_type
85 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
86 int delta_pic_order_always_zero_flag;
87 int offset_for_non_ref_pic;
88 int offset_for_top_to_bottom_field;
89 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
90 int ref_frame_count; ///< num_ref_frames
91 int gaps_in_frame_num_allowed_flag;
92 int mb_width; ///< frame_width_in_mbs_minus1 + 1
93 int mb_height; ///< frame_height_in_mbs_minus1 + 1
94 int frame_mbs_only_flag;
95 int mb_aff; ///<mb_adaptive_frame_field_flag
96 int direct_8x8_inference_flag;
97 int crop; ///< frame_cropping_flag
98 int crop_left; ///< frame_cropping_rect_left_offset
99 int crop_right; ///< frame_cropping_rect_right_offset
100 int crop_top; ///< frame_cropping_rect_top_offset
101 int crop_bottom; ///< frame_cropping_rect_bottom_offset
102 int vui_parameters_present_flag;
104 int timing_info_present_flag;
105 uint32_t num_units_in_tick;
107 int fixed_frame_rate_flag;
108 short offset_for_ref_frame[256]; //FIXME dyn aloc?
109 int bitstream_restriction_flag;
110 int num_reorder_frames;
111 int scaling_matrix_present;
112 uint8_t scaling_matrix4[6][16];
113 uint8_t scaling_matrix8[2][64];
117 * Picture parameter set
121 int cabac; ///< entropy_coding_mode_flag
122 int pic_order_present; ///< pic_order_present_flag
123 int slice_group_count; ///< num_slice_groups_minus1 + 1
124 int mb_slice_group_map_type;
125 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
126 int weighted_pred; ///< weighted_pred_flag
127 int weighted_bipred_idc;
128 int init_qp; ///< pic_init_qp_minus26 + 26
129 int init_qs; ///< pic_init_qs_minus26 + 26
130 int chroma_qp_index_offset;
131 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
132 int constrained_intra_pred; ///< constrained_intra_pred_flag
133 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
134 int transform_8x8_mode; ///< transform_8x8_mode_flag
135 uint8_t scaling_matrix4[6][16];
136 uint8_t scaling_matrix8[2][64];
140 * Memory management control operation opcode.
142 typedef enum MMCOOpcode{
153 * Memory management control operation.
164 typedef struct H264Context{
172 #define NAL_IDR_SLICE 5
177 #define NAL_END_SEQUENCE 10
178 #define NAL_END_STREAM 11
179 #define NAL_FILLER_DATA 12
180 #define NAL_SPS_EXT 13
181 #define NAL_AUXILIARY_SLICE 19
182 uint8_t *rbsp_buffer;
183 unsigned int rbsp_buffer_size;
186 * Used to parse AVC variant of h264
188 int is_avc; ///< this flag is != 0 if codec is avc1
189 int got_avcC; ///< flag used to parse avcC data only once
190 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
198 int chroma_pred_mode;
199 int intra16x16_pred_mode;
204 int8_t intra4x4_pred_mode_cache[5*8];
205 int8_t (*intra4x4_pred_mode)[8];
206 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
207 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
208 void (*pred8x8 [4+3])(uint8_t *src, int stride);
209 void (*pred16x16[4+3])(uint8_t *src, int stride);
210 unsigned int topleft_samples_available;
211 unsigned int top_samples_available;
212 unsigned int topright_samples_available;
213 unsigned int left_samples_available;
214 uint8_t (*top_borders[2])[16+2*8];
215 uint8_t left_border[2*(17+2*9)];
218 * non zero coeff count cache.
219 * is 64 if not available.
221 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
222 uint8_t (*non_zero_count)[16];
225 * Motion vector cache.
227 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
228 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
229 #define LIST_NOT_USED -1 //FIXME rename?
230 #define PART_NOT_AVAILABLE -2
233 * is 1 if the specific list MV&references are set to 0,0,-2.
235 int mv_cache_clean[2];
238 * number of neighbors (top and/or left) that used 8x8 dct
240 int neighbor_transform_size;
243 * block_offset[ 0..23] for frame macroblocks
244 * block_offset[24..47] for field macroblocks
246 int block_offset[2*(16+8)];
248 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
250 int b_stride; //FIXME use s->b4_stride
253 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff
262 int unknown_svq3_flag;
263 int next_slice_index;
265 SPS sps_buffer[MAX_SPS_COUNT];
266 SPS sps; ///< current sps
268 PPS pps_buffer[MAX_PPS_COUNT];
272 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
274 uint32_t dequant4_buffer[6][52][16];
275 uint32_t dequant8_buffer[2][52][64];
276 uint32_t (*dequant4_coeff[6])[16];
277 uint32_t (*dequant8_coeff[2])[64];
278 int dequant_coeff_pps; ///< reinit tables when pps changes
281 uint8_t *slice_table_base;
282 uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
284 int slice_type_fixed;
286 //interlacing specific flags
288 int mb_field_decoding_flag;
289 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
296 int delta_poc_bottom;
299 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
300 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
301 int frame_num_offset; ///< for POC type 2
302 int prev_frame_num_offset; ///< for POC type 2
303 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
306 * frame_num for frames or 2*frame_num for field pics.
311 * max_frame_num or 2*max_frame_num for field pics.
315 //Weighted pred stuff
317 int use_weight_chroma;
318 int luma_log2_weight_denom;
319 int chroma_log2_weight_denom;
320 int luma_weight[2][48];
321 int luma_offset[2][48];
322 int chroma_weight[2][48][2];
323 int chroma_offset[2][48][2];
324 int implicit_weight[48][48];
327 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
328 int slice_alpha_c0_offset;
329 int slice_beta_offset;
331 int redundant_pic_count;
333 int direct_spatial_mv_pred;
334 int dist_scale_factor[16];
335 int dist_scale_factor_field[32];
336 int map_col_to_list0[2][16];
337 int map_col_to_list0_field[2][32];
340 * num_ref_idx_l0/1_active_minus1 + 1
342 int ref_count[2]; ///< counts frames or fields, depending on current mb mode
343 Picture *short_ref[32];
344 Picture *long_ref[32];
345 Picture default_ref_list[2][32];
346 Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs
347 Picture *delayed_pic[16]; //FIXME size?
348 Picture *delayed_output_pic;
351 * memory management control operations buffer.
353 MMCO mmco[MAX_MMCO_COUNT];
356 int long_ref_count; ///< number of actual long term references
357 int short_ref_count; ///< number of actual short term references
360 GetBitContext intra_gb;
361 GetBitContext inter_gb;
362 GetBitContext *intra_gb_ptr;
363 GetBitContext *inter_gb_ptr;
365 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
371 uint8_t cabac_state[460];
374 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
379 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
380 uint8_t *chroma_pred_mode_table;
381 int last_qscale_diff;
382 int16_t (*mvd_table[2])[2];
383 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
384 uint8_t *direct_table;
385 uint8_t direct_cache[5*8];
387 uint8_t zigzag_scan[16];
388 uint8_t zigzag_scan8x8[64];
389 uint8_t zigzag_scan8x8_cavlc[64];
390 uint8_t field_scan[16];
391 uint8_t field_scan8x8[64];
392 uint8_t field_scan8x8_cavlc[64];
393 const uint8_t *zigzag_scan_q0;
394 const uint8_t *zigzag_scan8x8_q0;
395 const uint8_t *zigzag_scan8x8_cavlc_q0;
396 const uint8_t *field_scan_q0;
397 const uint8_t *field_scan8x8_q0;
398 const uint8_t *field_scan8x8_cavlc_q0;
403 static VLC coeff_token_vlc[4];
404 static VLC chroma_dc_coeff_token_vlc;
406 static VLC total_zeros_vlc[15];
407 static VLC chroma_dc_total_zeros_vlc[3];
409 static VLC run_vlc[6];
412 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
413 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
414 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
415 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
417 static always_inline uint32_t pack16to32(int a, int b){
418 #ifdef WORDS_BIGENDIAN
419 return (b&0xFFFF) + (a<<16);
421 return (a&0xFFFF) + (b<<16);
427 * @param h height of the rectangle, should be a constant
428 * @param w width of the rectangle, should be a constant
429 * @param size the size of val (1 or 4), should be a constant
431 static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
432 uint8_t *p= (uint8_t*)vp;
433 assert(size==1 || size==4);
439 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
440 assert((stride&(w-1))==0);
442 const uint16_t v= size==4 ? val : val*0x0101;
443 *(uint16_t*)(p + 0*stride)= v;
445 *(uint16_t*)(p + 1*stride)= v;
447 *(uint16_t*)(p + 2*stride)=
448 *(uint16_t*)(p + 3*stride)= v;
450 const uint32_t v= size==4 ? val : val*0x01010101;
451 *(uint32_t*)(p + 0*stride)= v;
453 *(uint32_t*)(p + 1*stride)= v;
455 *(uint32_t*)(p + 2*stride)=
456 *(uint32_t*)(p + 3*stride)= v;
458 //gcc can't optimize 64bit math on x86_32
459 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
460 const uint64_t v= val*0x0100000001ULL;
461 *(uint64_t*)(p + 0*stride)= v;
463 *(uint64_t*)(p + 1*stride)= v;
465 *(uint64_t*)(p + 2*stride)=
466 *(uint64_t*)(p + 3*stride)= v;
468 const uint64_t v= val*0x0100000001ULL;
469 *(uint64_t*)(p + 0+0*stride)=
470 *(uint64_t*)(p + 8+0*stride)=
471 *(uint64_t*)(p + 0+1*stride)=
472 *(uint64_t*)(p + 8+1*stride)= v;
474 *(uint64_t*)(p + 0+2*stride)=
475 *(uint64_t*)(p + 8+2*stride)=
476 *(uint64_t*)(p + 0+3*stride)=
477 *(uint64_t*)(p + 8+3*stride)= v;
479 *(uint32_t*)(p + 0+0*stride)=
480 *(uint32_t*)(p + 4+0*stride)= val;
482 *(uint32_t*)(p + 0+1*stride)=
483 *(uint32_t*)(p + 4+1*stride)= val;
485 *(uint32_t*)(p + 0+2*stride)=
486 *(uint32_t*)(p + 4+2*stride)=
487 *(uint32_t*)(p + 0+3*stride)=
488 *(uint32_t*)(p + 4+3*stride)= val;
490 *(uint32_t*)(p + 0+0*stride)=
491 *(uint32_t*)(p + 4+0*stride)=
492 *(uint32_t*)(p + 8+0*stride)=
493 *(uint32_t*)(p +12+0*stride)=
494 *(uint32_t*)(p + 0+1*stride)=
495 *(uint32_t*)(p + 4+1*stride)=
496 *(uint32_t*)(p + 8+1*stride)=
497 *(uint32_t*)(p +12+1*stride)= val;
499 *(uint32_t*)(p + 0+2*stride)=
500 *(uint32_t*)(p + 4+2*stride)=
501 *(uint32_t*)(p + 8+2*stride)=
502 *(uint32_t*)(p +12+2*stride)=
503 *(uint32_t*)(p + 0+3*stride)=
504 *(uint32_t*)(p + 4+3*stride)=
505 *(uint32_t*)(p + 8+3*stride)=
506 *(uint32_t*)(p +12+3*stride)= val;
513 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
514 MpegEncContext * const s = &h->s;
515 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
516 int topleft_xy, top_xy, topright_xy, left_xy[2];
517 int topleft_type, top_type, topright_type, left_type[2];
521 //FIXME deblocking could skip the intra and nnz parts.
522 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
525 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
527 top_xy = mb_xy - s->mb_stride;
528 topleft_xy = top_xy - 1;
529 topright_xy= top_xy + 1;
530 left_xy[1] = left_xy[0] = mb_xy-1;
540 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
541 const int top_pair_xy = pair_xy - s->mb_stride;
542 const int topleft_pair_xy = top_pair_xy - 1;
543 const int topright_pair_xy = top_pair_xy + 1;
544 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
545 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
546 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
547 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
548 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
549 const int bottom = (s->mb_y & 1);
550 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
552 ? !curr_mb_frame_flag // bottom macroblock
553 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
555 top_xy -= s->mb_stride;
558 ? !curr_mb_frame_flag // bottom macroblock
559 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
561 topleft_xy -= s->mb_stride;
564 ? !curr_mb_frame_flag // bottom macroblock
565 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
567 topright_xy -= s->mb_stride;
569 if (left_mb_frame_flag != curr_mb_frame_flag) {
570 left_xy[1] = left_xy[0] = pair_xy - 1;
571 if (curr_mb_frame_flag) {
592 left_xy[1] += s->mb_stride;
605 h->top_mb_xy = top_xy;
606 h->left_mb_xy[0] = left_xy[0];
607 h->left_mb_xy[1] = left_xy[1];
611 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
612 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
613 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
615 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
617 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
619 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
620 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
621 if(USES_LIST(mb_type,list)){
622 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
623 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
624 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
625 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
631 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
632 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
634 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
635 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
637 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
638 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
643 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
644 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
645 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
646 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
647 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
650 if(IS_INTRA(mb_type)){
651 h->topleft_samples_available=
652 h->top_samples_available=
653 h->left_samples_available= 0xFFFF;
654 h->topright_samples_available= 0xEEEA;
656 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
657 h->topleft_samples_available= 0xB3FF;
658 h->top_samples_available= 0x33FF;
659 h->topright_samples_available= 0x26EA;
662 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
663 h->topleft_samples_available&= 0xDF5F;
664 h->left_samples_available&= 0x5F5F;
668 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
669 h->topleft_samples_available&= 0x7FFF;
671 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
672 h->topright_samples_available&= 0xFBFF;
674 if(IS_INTRA4x4(mb_type)){
675 if(IS_INTRA4x4(top_type)){
676 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
677 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
678 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
679 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
682 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
687 h->intra4x4_pred_mode_cache[4+8*0]=
688 h->intra4x4_pred_mode_cache[5+8*0]=
689 h->intra4x4_pred_mode_cache[6+8*0]=
690 h->intra4x4_pred_mode_cache[7+8*0]= pred;
693 if(IS_INTRA4x4(left_type[i])){
694 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
695 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
698 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
703 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
704 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
719 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
721 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
722 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
723 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
724 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
726 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
727 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
729 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
730 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
733 h->non_zero_count_cache[4+8*0]=
734 h->non_zero_count_cache[5+8*0]=
735 h->non_zero_count_cache[6+8*0]=
736 h->non_zero_count_cache[7+8*0]=
738 h->non_zero_count_cache[1+8*0]=
739 h->non_zero_count_cache[2+8*0]=
741 h->non_zero_count_cache[1+8*3]=
742 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
746 for (i=0; i<2; i++) {
748 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
749 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
750 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
751 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
753 h->non_zero_count_cache[3+8*1 + 2*8*i]=
754 h->non_zero_count_cache[3+8*2 + 2*8*i]=
755 h->non_zero_count_cache[0+8*1 + 8*i]=
756 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
763 h->top_cbp = h->cbp_table[top_xy];
764 } else if(IS_INTRA(mb_type)) {
771 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
772 } else if(IS_INTRA(mb_type)) {
778 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
781 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
786 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
788 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
789 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
790 /*if(!h->mv_cache_clean[list]){
791 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
792 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
793 h->mv_cache_clean[list]= 1;
797 h->mv_cache_clean[list]= 0;
799 if(USES_LIST(top_type, list)){
800 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
801 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
802 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
803 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
804 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
805 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
806 h->ref_cache[list][scan8[0] + 0 - 1*8]=
807 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
808 h->ref_cache[list][scan8[0] + 2 - 1*8]=
809 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
811 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
812 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
813 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
814 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
815 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
818 //FIXME unify cleanup or sth
819 if(USES_LIST(left_type[0], list)){
820 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
821 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
822 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
823 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
824 h->ref_cache[list][scan8[0] - 1 + 0*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
825 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1]>>1)];
827 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
828 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
829 h->ref_cache[list][scan8[0] - 1 + 0*8]=
830 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
833 if(USES_LIST(left_type[1], list)){
834 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
835 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
836 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
837 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
838 h->ref_cache[list][scan8[0] - 1 + 2*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
839 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[3]>>1)];
841 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
842 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
843 h->ref_cache[list][scan8[0] - 1 + 2*8]=
844 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
845 assert((!left_type[0]) == (!left_type[1]));
848 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
851 if(USES_LIST(topleft_type, list)){
852 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
853 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
854 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
855 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
857 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
858 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
861 if(USES_LIST(topright_type, list)){
862 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
863 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
864 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
865 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
867 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
868 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
871 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
874 h->ref_cache[list][scan8[5 ]+1] =
875 h->ref_cache[list][scan8[7 ]+1] =
876 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
877 h->ref_cache[list][scan8[4 ]] =
878 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
879 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
880 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
881 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
882 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
883 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
886 /* XXX beurk, Load mvd */
887 if(USES_LIST(top_type, list)){
888 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
889 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
890 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
891 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
892 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
894 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
895 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
896 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
897 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
899 if(USES_LIST(left_type[0], list)){
900 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
901 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
902 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
904 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
905 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
907 if(USES_LIST(left_type[1], list)){
908 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
909 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
910 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
912 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
913 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
915 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
916 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
917 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
918 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
919 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
921 if(h->slice_type == B_TYPE){
922 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
924 if(IS_DIRECT(top_type)){
925 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
926 }else if(IS_8X8(top_type)){
927 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
928 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
929 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
931 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
934 if(IS_DIRECT(left_type[0]))
935 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
936 else if(IS_8X8(left_type[0]))
937 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
939 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
941 if(IS_DIRECT(left_type[1]))
942 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
943 else if(IS_8X8(left_type[1]))
944 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
946 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
952 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
953 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
954 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
955 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
956 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
957 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
958 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
959 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
960 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
961 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
963 #define MAP_F2F(idx, mb_type)\
964 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
965 h->ref_cache[list][idx] <<= 1;\
966 h->mv_cache[list][idx][1] /= 2;\
967 h->mvd_cache[list][idx][1] /= 2;\
972 #define MAP_F2F(idx, mb_type)\
973 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
974 h->ref_cache[list][idx] >>= 1;\
975 h->mv_cache[list][idx][1] <<= 1;\
976 h->mvd_cache[list][idx][1] <<= 1;\
986 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
989 static inline void write_back_intra_pred_mode(H264Context *h){
990 MpegEncContext * const s = &h->s;
991 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
993 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
994 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
995 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
996 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
997 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
998 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
999 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
1003 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1005 static inline int check_intra4x4_pred_mode(H264Context *h){
1006 MpegEncContext * const s = &h->s;
1007 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
1008 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
1011 if(!(h->top_samples_available&0x8000)){
1013 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
1015 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1018 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
1023 if(!(h->left_samples_available&0x8000)){
1025 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
1027 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1030 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
1036 } //FIXME cleanup like next
1039 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1041 static inline int check_intra_pred_mode(H264Context *h, int mode){
1042 MpegEncContext * const s = &h->s;
1043 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
1044 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
1046 if(mode < 0 || mode > 6) {
1047 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
1051 if(!(h->top_samples_available&0x8000)){
1054 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1059 if(!(h->left_samples_available&0x8000)){
1062 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1071 * gets the predicted intra4x4 prediction mode.
1073 static inline int pred_intra_mode(H264Context *h, int n){
1074 const int index8= scan8[n];
1075 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
1076 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
1077 const int min= FFMIN(left, top);
1079 tprintf("mode:%d %d min:%d\n", left ,top, min);
1081 if(min<0) return DC_PRED;
1085 static inline void write_back_non_zero_count(H264Context *h){
1086 MpegEncContext * const s = &h->s;
1087 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1089 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
1090 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
1091 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
1092 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
1093 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
1094 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
1095 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
1097 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
1098 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
1099 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
1101 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1102 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1103 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1106 // store all luma nnzs, for deblocking
1109 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
1110 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
1115 * gets the predicted number of non zero coefficients.
1116 * @param n block index
1118 static inline int pred_non_zero_count(H264Context *h, int n){
1119 const int index8= scan8[n];
1120 const int left= h->non_zero_count_cache[index8 - 1];
1121 const int top = h->non_zero_count_cache[index8 - 8];
1124 if(i<64) i= (i+1)>>1;
1126 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1131 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1132 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1134 /* there is no consistent mapping of mvs to neighboring locations that will
1135 * make mbaff happy, so we can't move all this logic to fill_caches */
1137 MpegEncContext *s = &h->s;
1138 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
1140 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
1141 *C = h->mv_cache[list][scan8[0]-2];
1144 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
1145 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
1146 if(IS_INTERLACED(mb_types[topright_xy])){
1147 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
1148 const int x4 = X4, y4 = Y4;\
1149 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
1150 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
1151 return LIST_NOT_USED;\
1152 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
1153 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
1154 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
1155 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
1157 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
1160 if(topright_ref == PART_NOT_AVAILABLE
1161 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
1162 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
1164 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
1165 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
1168 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
1169 && i >= scan8[0]+8){
1170 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
1171 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
1177 if(topright_ref != PART_NOT_AVAILABLE){
1178 *C= h->mv_cache[list][ i - 8 + part_width ];
1179 return topright_ref;
1181 tprintf("topright MV not available\n");
1183 *C= h->mv_cache[list][ i - 8 - 1 ];
1184 return h->ref_cache[list][ i - 8 - 1 ];
1189 * gets the predicted MV.
1190 * @param n the block index
1191 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1192 * @param mx the x component of the predicted motion vector
1193 * @param my the y component of the predicted motion vector
1195 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1196 const int index8= scan8[n];
1197 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1198 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1199 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1200 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1202 int diagonal_ref, match_count;
1204 assert(part_width==1 || part_width==2 || part_width==4);
1214 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1215 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1216 tprintf("pred_motion match_count=%d\n", match_count);
1217 if(match_count > 1){ //most common
1218 *mx= mid_pred(A[0], B[0], C[0]);
1219 *my= mid_pred(A[1], B[1], C[1]);
1220 }else if(match_count==1){
1224 }else if(top_ref==ref){
1232 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1236 *mx= mid_pred(A[0], B[0], C[0]);
1237 *my= mid_pred(A[1], B[1], C[1]);
1241 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1245 * gets the directionally predicted 16x8 MV.
1246 * @param n the block index
1247 * @param mx the x component of the predicted motion vector
1248 * @param my the y component of the predicted motion vector
1250 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1252 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1253 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1255 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1263 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1264 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1266 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1268 if(left_ref == ref){
1276 pred_motion(h, n, 4, list, ref, mx, my);
1280 * gets the directionally predicted 8x16 MV.
1281 * @param n the block index
1282 * @param mx the x component of the predicted motion vector
1283 * @param my the y component of the predicted motion vector
1285 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1287 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1288 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1290 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1292 if(left_ref == ref){
1301 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1303 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1305 if(diagonal_ref == ref){
1313 pred_motion(h, n, 2, list, ref, mx, my);
1316 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1317 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1318 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1320 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1322 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1323 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1324 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1330 pred_motion(h, 0, 4, 0, 0, mx, my);
1335 static inline void direct_dist_scale_factor(H264Context * const h){
1336 const int poc = h->s.current_picture_ptr->poc;
1337 const int poc1 = h->ref_list[1][0].poc;
1339 for(i=0; i<h->ref_count[0]; i++){
1340 int poc0 = h->ref_list[0][i].poc;
1341 int td = clip(poc1 - poc0, -128, 127);
1342 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1343 h->dist_scale_factor[i] = 256;
1345 int tb = clip(poc - poc0, -128, 127);
1346 int tx = (16384 + (FFABS(td) >> 1)) / td;
1347 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1351 for(i=0; i<h->ref_count[0]; i++){
1352 h->dist_scale_factor_field[2*i] =
1353 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
1357 static inline void direct_ref_list_init(H264Context * const h){
1358 MpegEncContext * const s = &h->s;
1359 Picture * const ref1 = &h->ref_list[1][0];
1360 Picture * const cur = s->current_picture_ptr;
1362 if(cur->pict_type == I_TYPE)
1363 cur->ref_count[0] = 0;
1364 if(cur->pict_type != B_TYPE)
1365 cur->ref_count[1] = 0;
1366 for(list=0; list<2; list++){
1367 cur->ref_count[list] = h->ref_count[list];
1368 for(j=0; j<h->ref_count[list]; j++)
1369 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1371 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1373 for(list=0; list<2; list++){
1374 for(i=0; i<ref1->ref_count[list]; i++){
1375 const int poc = ref1->ref_poc[list][i];
1376 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1377 for(j=0; j<h->ref_count[list]; j++)
1378 if(h->ref_list[list][j].poc == poc){
1379 h->map_col_to_list0[list][i] = j;
1385 for(list=0; list<2; list++){
1386 for(i=0; i<ref1->ref_count[list]; i++){
1387 j = h->map_col_to_list0[list][i];
1388 h->map_col_to_list0_field[list][2*i] = 2*j;
1389 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1395 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1396 MpegEncContext * const s = &h->s;
1397 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1398 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1399 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1400 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1401 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1402 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1403 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1404 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1405 const int is_b8x8 = IS_8X8(*mb_type);
1409 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1410 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1411 /* FIXME save sub mb types from previous frames (or derive from MVs)
1412 * so we know exactly what block size to use */
1413 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1414 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1415 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1416 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1417 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1419 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1420 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1423 *mb_type |= MB_TYPE_DIRECT2;
1425 *mb_type |= MB_TYPE_INTERLACED;
1427 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1429 if(h->direct_spatial_mv_pred){
1434 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1436 /* ref = min(neighbors) */
1437 for(list=0; list<2; list++){
1438 int refa = h->ref_cache[list][scan8[0] - 1];
1439 int refb = h->ref_cache[list][scan8[0] - 8];
1440 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1442 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1444 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1446 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1452 if(ref[0] < 0 && ref[1] < 0){
1453 ref[0] = ref[1] = 0;
1454 mv[0][0] = mv[0][1] =
1455 mv[1][0] = mv[1][1] = 0;
1457 for(list=0; list<2; list++){
1459 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1461 mv[list][0] = mv[list][1] = 0;
1466 *mb_type &= ~MB_TYPE_P0L1;
1467 sub_mb_type &= ~MB_TYPE_P0L1;
1468 }else if(ref[0] < 0){
1469 *mb_type &= ~MB_TYPE_P0L0;
1470 sub_mb_type &= ~MB_TYPE_P0L0;
1473 if(IS_16X16(*mb_type)){
1474 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1475 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1476 if(!IS_INTRA(mb_type_col)
1477 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1478 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1479 && (h->x264_build>33 || !h->x264_build)))){
1481 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1483 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1485 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1487 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1489 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1490 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1493 for(i8=0; i8<4; i8++){
1494 const int x8 = i8&1;
1495 const int y8 = i8>>1;
1497 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1499 h->sub_mb_type[i8] = sub_mb_type;
1501 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1502 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1503 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1504 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1507 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1508 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1509 && (h->x264_build>33 || !h->x264_build)))){
1510 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1511 if(IS_SUB_8X8(sub_mb_type)){
1512 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1513 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1515 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1517 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1520 for(i4=0; i4<4; i4++){
1521 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1522 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1524 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1526 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1532 }else{ /* direct temporal mv pred */
1533 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1534 const int *dist_scale_factor = h->dist_scale_factor;
1537 if(IS_INTERLACED(*mb_type)){
1538 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1539 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1540 dist_scale_factor = h->dist_scale_factor_field;
1542 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1543 /* FIXME assumes direct_8x8_inference == 1 */
1544 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1545 int mb_types_col[2];
1548 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1549 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1550 | (*mb_type & MB_TYPE_INTERLACED);
1551 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1553 if(IS_INTERLACED(*mb_type)){
1554 /* frame to field scaling */
1555 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1556 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1558 l1ref0 -= 2*h->b8_stride;
1559 l1ref1 -= 2*h->b8_stride;
1560 l1mv0 -= 4*h->b_stride;
1561 l1mv1 -= 4*h->b_stride;
1565 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1566 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1568 *mb_type |= MB_TYPE_16x8;
1570 *mb_type |= MB_TYPE_8x8;
1572 /* field to frame scaling */
1573 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1574 * but in MBAFF, top and bottom POC are equal */
1575 int dy = (s->mb_y&1) ? 1 : 2;
1577 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1578 l1ref0 += dy*h->b8_stride;
1579 l1ref1 += dy*h->b8_stride;
1580 l1mv0 += 2*dy*h->b_stride;
1581 l1mv1 += 2*dy*h->b_stride;
1584 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1586 *mb_type |= MB_TYPE_16x16;
1588 *mb_type |= MB_TYPE_8x8;
1591 for(i8=0; i8<4; i8++){
1592 const int x8 = i8&1;
1593 const int y8 = i8>>1;
1595 const int16_t (*l1mv)[2]= l1mv0;
1597 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1599 h->sub_mb_type[i8] = sub_mb_type;
1601 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1602 if(IS_INTRA(mb_types_col[y8])){
1603 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1604 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1605 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1609 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1611 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1613 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1616 scale = dist_scale_factor[ref0];
1617 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1620 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1621 int my_col = (mv_col[1]<<y_shift)/2;
1622 int mx = (scale * mv_col[0] + 128) >> 8;
1623 int my = (scale * my_col + 128) >> 8;
1624 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1625 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1632 /* one-to-one mv scaling */
1634 if(IS_16X16(*mb_type)){
1635 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1636 if(IS_INTRA(mb_type_col)){
1637 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1638 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1639 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1641 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1642 : map_col_to_list0[1][l1ref1[0]];
1643 const int scale = dist_scale_factor[ref0];
1644 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1646 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1647 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1648 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1649 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1650 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1653 for(i8=0; i8<4; i8++){
1654 const int x8 = i8&1;
1655 const int y8 = i8>>1;
1657 const int16_t (*l1mv)[2]= l1mv0;
1659 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1661 h->sub_mb_type[i8] = sub_mb_type;
1662 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1663 if(IS_INTRA(mb_type_col)){
1664 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1665 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1666 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1670 ref0 = l1ref0[x8 + y8*h->b8_stride];
1672 ref0 = map_col_to_list0[0][ref0];
1674 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1677 scale = dist_scale_factor[ref0];
1679 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1680 if(IS_SUB_8X8(sub_mb_type)){
1681 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1682 int mx = (scale * mv_col[0] + 128) >> 8;
1683 int my = (scale * mv_col[1] + 128) >> 8;
1684 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1685 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1687 for(i4=0; i4<4; i4++){
1688 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1689 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1690 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1691 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1692 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1693 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1700 static inline void write_back_motion(H264Context *h, int mb_type){
1701 MpegEncContext * const s = &h->s;
1702 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1703 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1706 if(!USES_LIST(mb_type, 0))
1707 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1709 for(list=0; list<2; list++){
1711 if(!USES_LIST(mb_type, list))
1715 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1716 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1718 if( h->pps.cabac ) {
1719 if(IS_SKIP(mb_type))
1720 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1723 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1724 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1729 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1730 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1731 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1732 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1733 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1737 if(h->slice_type == B_TYPE && h->pps.cabac){
1738 if(IS_8X8(mb_type)){
1739 uint8_t *direct_table = &h->direct_table[b8_xy];
1740 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1741 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1742 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1748 * Decodes a network abstraction layer unit.
1749 * @param consumed is the number of bytes used as input
1750 * @param length is the length of the array
1751 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1752 * @returns decoded bytes, might be src+1 if no escapes
1754 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1758 // src[0]&0x80; //forbidden bit
1759 h->nal_ref_idc= src[0]>>5;
1760 h->nal_unit_type= src[0]&0x1F;
1764 for(i=0; i<length; i++)
1765 printf("%2X ", src[i]);
1767 for(i=0; i+1<length; i+=2){
1768 if(src[i]) continue;
1769 if(i>0 && src[i-1]==0) i--;
1770 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1772 /* startcode, so we must be past the end */
1779 if(i>=length-1){ //no escaped 0
1780 *dst_length= length;
1781 *consumed= length+1; //+1 for the header
1785 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1786 dst= h->rbsp_buffer;
1788 //printf("decoding esc\n");
1791 //remove escapes (very rare 1:2^22)
1792 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1793 if(src[si+2]==3){ //escape
1798 }else //next start code
1802 dst[di++]= src[si++];
1806 *consumed= si + 1;//+1 for the header
1807 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1813 * @param src the data which should be escaped
1814 * @param dst the target buffer, dst+1 == src is allowed as a special case
1815 * @param length the length of the src data
1816 * @param dst_length the length of the dst array
1817 * @returns length of escaped data in bytes or -1 if an error occured
1819 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1820 int i, escape_count, si, di;
1824 assert(dst_length>0);
1826 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1828 if(length==0) return 1;
1831 for(i=0; i<length; i+=2){
1832 if(src[i]) continue;
1833 if(i>0 && src[i-1]==0)
1835 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1841 if(escape_count==0){
1843 memcpy(dst+1, src, length);
1847 if(length + escape_count + 1> dst_length)
1850 //this should be damn rare (hopefully)
1852 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1853 temp= h->rbsp_buffer;
1854 //printf("encoding esc\n");
1859 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1860 temp[di++]= 0; si++;
1861 temp[di++]= 0; si++;
1863 temp[di++]= src[si++];
1866 temp[di++]= src[si++];
1868 memcpy(dst+1, temp, length+escape_count);
1870 assert(di == length+escape_count);
1876 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1878 static void encode_rbsp_trailing(PutBitContext *pb){
1881 length= (-put_bits_count(pb))&7;
1882 if(length) put_bits(pb, length, 0);
1887 * identifies the exact end of the bitstream
1888 * @return the length of the trailing, or 0 if damaged
1890 static int decode_rbsp_trailing(uint8_t *src){
1894 tprintf("rbsp trailing %X\n", v);
1904 * idct tranforms the 16 dc values and dequantize them.
1905 * @param qp quantization parameter
1907 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1910 int temp[16]; //FIXME check if this is a good idea
1911 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1912 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1914 //memset(block, 64, 2*256);
1917 const int offset= y_offset[i];
1918 const int z0= block[offset+stride*0] + block[offset+stride*4];
1919 const int z1= block[offset+stride*0] - block[offset+stride*4];
1920 const int z2= block[offset+stride*1] - block[offset+stride*5];
1921 const int z3= block[offset+stride*1] + block[offset+stride*5];
1930 const int offset= x_offset[i];
1931 const int z0= temp[4*0+i] + temp[4*2+i];
1932 const int z1= temp[4*0+i] - temp[4*2+i];
1933 const int z2= temp[4*1+i] - temp[4*3+i];
1934 const int z3= temp[4*1+i] + temp[4*3+i];
1936 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1937 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1938 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1939 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1945 * dct tranforms the 16 dc values.
1946 * @param qp quantization parameter ??? FIXME
1948 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1949 // const int qmul= dequant_coeff[qp][0];
1951 int temp[16]; //FIXME check if this is a good idea
1952 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1953 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1956 const int offset= y_offset[i];
1957 const int z0= block[offset+stride*0] + block[offset+stride*4];
1958 const int z1= block[offset+stride*0] - block[offset+stride*4];
1959 const int z2= block[offset+stride*1] - block[offset+stride*5];
1960 const int z3= block[offset+stride*1] + block[offset+stride*5];
1969 const int offset= x_offset[i];
1970 const int z0= temp[4*0+i] + temp[4*2+i];
1971 const int z1= temp[4*0+i] - temp[4*2+i];
1972 const int z2= temp[4*1+i] - temp[4*3+i];
1973 const int z3= temp[4*1+i] + temp[4*3+i];
1975 block[stride*0 +offset]= (z0 + z3)>>1;
1976 block[stride*2 +offset]= (z1 + z2)>>1;
1977 block[stride*8 +offset]= (z1 - z2)>>1;
1978 block[stride*10+offset]= (z0 - z3)>>1;
1986 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1987 const int stride= 16*2;
1988 const int xStride= 16;
1991 a= block[stride*0 + xStride*0];
1992 b= block[stride*0 + xStride*1];
1993 c= block[stride*1 + xStride*0];
1994 d= block[stride*1 + xStride*1];
2001 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
2002 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
2003 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
2004 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
2008 static void chroma_dc_dct_c(DCTELEM *block){
2009 const int stride= 16*2;
2010 const int xStride= 16;
2013 a= block[stride*0 + xStride*0];
2014 b= block[stride*0 + xStride*1];
2015 c= block[stride*1 + xStride*0];
2016 d= block[stride*1 + xStride*1];
2023 block[stride*0 + xStride*0]= (a+c);
2024 block[stride*0 + xStride*1]= (e+b);
2025 block[stride*1 + xStride*0]= (a-c);
2026 block[stride*1 + xStride*1]= (e-b);
2031 * gets the chroma qp.
2033 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
2035 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
2040 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
2042 //FIXME try int temp instead of block
2045 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
2046 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
2047 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
2048 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
2049 const int z0= d0 + d3;
2050 const int z3= d0 - d3;
2051 const int z1= d1 + d2;
2052 const int z2= d1 - d2;
2054 block[0 + 4*i]= z0 + z1;
2055 block[1 + 4*i]= 2*z3 + z2;
2056 block[2 + 4*i]= z0 - z1;
2057 block[3 + 4*i]= z3 - 2*z2;
2061 const int z0= block[0*4 + i] + block[3*4 + i];
2062 const int z3= block[0*4 + i] - block[3*4 + i];
2063 const int z1= block[1*4 + i] + block[2*4 + i];
2064 const int z2= block[1*4 + i] - block[2*4 + i];
2066 block[0*4 + i]= z0 + z1;
2067 block[1*4 + i]= 2*z3 + z2;
2068 block[2*4 + i]= z0 - z1;
2069 block[3*4 + i]= z3 - 2*z2;
2074 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
2075 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
2076 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
2078 const int * const quant_table= quant_coeff[qscale];
2079 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
2080 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
2081 const unsigned int threshold2= (threshold1<<1);
2087 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
2088 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
2089 const unsigned int dc_threshold2= (dc_threshold1<<1);
2091 int level= block[0]*quant_coeff[qscale+18][0];
2092 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2094 level= (dc_bias + level)>>(QUANT_SHIFT-2);
2097 level= (dc_bias - level)>>(QUANT_SHIFT-2);
2100 // last_non_zero = i;
2105 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
2106 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
2107 const unsigned int dc_threshold2= (dc_threshold1<<1);
2109 int level= block[0]*quant_table[0];
2110 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2112 level= (dc_bias + level)>>(QUANT_SHIFT+1);
2115 level= (dc_bias - level)>>(QUANT_SHIFT+1);
2118 // last_non_zero = i;
2131 const int j= scantable[i];
2132 int level= block[j]*quant_table[j];
2134 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
2135 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
2136 if(((unsigned)(level+threshold1))>threshold2){
2138 level= (bias + level)>>QUANT_SHIFT;
2141 level= (bias - level)>>QUANT_SHIFT;
2150 return last_non_zero;
2153 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
2154 const uint32_t a= ((uint32_t*)(src-stride))[0];
2155 ((uint32_t*)(src+0*stride))[0]= a;
2156 ((uint32_t*)(src+1*stride))[0]= a;
2157 ((uint32_t*)(src+2*stride))[0]= a;
2158 ((uint32_t*)(src+3*stride))[0]= a;
2161 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
2162 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
2163 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
2164 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
2165 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
2168 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
2169 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
2170 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
2172 ((uint32_t*)(src+0*stride))[0]=
2173 ((uint32_t*)(src+1*stride))[0]=
2174 ((uint32_t*)(src+2*stride))[0]=
2175 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2178 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
2179 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
2181 ((uint32_t*)(src+0*stride))[0]=
2182 ((uint32_t*)(src+1*stride))[0]=
2183 ((uint32_t*)(src+2*stride))[0]=
2184 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2187 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
2188 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
2190 ((uint32_t*)(src+0*stride))[0]=
2191 ((uint32_t*)(src+1*stride))[0]=
2192 ((uint32_t*)(src+2*stride))[0]=
2193 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2196 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
2197 ((uint32_t*)(src+0*stride))[0]=
2198 ((uint32_t*)(src+1*stride))[0]=
2199 ((uint32_t*)(src+2*stride))[0]=
2200 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
2204 #define LOAD_TOP_RIGHT_EDGE\
2205 const int t4= topright[0];\
2206 const int t5= topright[1];\
2207 const int t6= topright[2];\
2208 const int t7= topright[3];\
2210 #define LOAD_LEFT_EDGE\
2211 const int l0= src[-1+0*stride];\
2212 const int l1= src[-1+1*stride];\
2213 const int l2= src[-1+2*stride];\
2214 const int l3= src[-1+3*stride];\
2216 #define LOAD_TOP_EDGE\
2217 const int t0= src[ 0-1*stride];\
2218 const int t1= src[ 1-1*stride];\
2219 const int t2= src[ 2-1*stride];\
2220 const int t3= src[ 3-1*stride];\
2222 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
2223 const int lt= src[-1-1*stride];
2227 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
2229 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
2232 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
2236 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2239 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
2241 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2242 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2245 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
2250 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
2252 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
2255 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
2259 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
2262 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
2264 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
2265 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
2268 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
2269 const int lt= src[-1-1*stride];
2272 const __attribute__((unused)) int unu= l3;
2275 src[1+2*stride]=(lt + t0 + 1)>>1;
2277 src[2+2*stride]=(t0 + t1 + 1)>>1;
2279 src[3+2*stride]=(t1 + t2 + 1)>>1;
2280 src[3+0*stride]=(t2 + t3 + 1)>>1;
2282 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2284 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2286 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2287 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2288 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2289 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2292 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2295 const __attribute__((unused)) int unu= t7;
2297 src[0+0*stride]=(t0 + t1 + 1)>>1;
2299 src[0+2*stride]=(t1 + t2 + 1)>>1;
2301 src[1+2*stride]=(t2 + t3 + 1)>>1;
2303 src[2+2*stride]=(t3 + t4+ 1)>>1;
2304 src[3+2*stride]=(t4 + t5+ 1)>>1;
2305 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2307 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2309 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2311 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2312 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2315 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2318 src[0+0*stride]=(l0 + l1 + 1)>>1;
2319 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2321 src[0+1*stride]=(l1 + l2 + 1)>>1;
2323 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2325 src[0+2*stride]=(l2 + l3 + 1)>>1;
2327 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2336 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2337 const int lt= src[-1-1*stride];
2340 const __attribute__((unused)) int unu= t3;
2343 src[2+1*stride]=(lt + l0 + 1)>>1;
2345 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2346 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2347 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2349 src[2+2*stride]=(l0 + l1 + 1)>>1;
2351 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2353 src[2+3*stride]=(l1 + l2+ 1)>>1;
2355 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2356 src[0+3*stride]=(l2 + l3 + 1)>>1;
2357 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2360 static void pred16x16_vertical_c(uint8_t *src, int stride){
2362 const uint32_t a= ((uint32_t*)(src-stride))[0];
2363 const uint32_t b= ((uint32_t*)(src-stride))[1];
2364 const uint32_t c= ((uint32_t*)(src-stride))[2];
2365 const uint32_t d= ((uint32_t*)(src-stride))[3];
2367 for(i=0; i<16; i++){
2368 ((uint32_t*)(src+i*stride))[0]= a;
2369 ((uint32_t*)(src+i*stride))[1]= b;
2370 ((uint32_t*)(src+i*stride))[2]= c;
2371 ((uint32_t*)(src+i*stride))[3]= d;
2375 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2378 for(i=0; i<16; i++){
2379 ((uint32_t*)(src+i*stride))[0]=
2380 ((uint32_t*)(src+i*stride))[1]=
2381 ((uint32_t*)(src+i*stride))[2]=
2382 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2386 static void pred16x16_dc_c(uint8_t *src, int stride){
2390 dc+= src[-1+i*stride];
2397 dc= 0x01010101*((dc + 16)>>5);
2399 for(i=0; i<16; i++){
2400 ((uint32_t*)(src+i*stride))[0]=
2401 ((uint32_t*)(src+i*stride))[1]=
2402 ((uint32_t*)(src+i*stride))[2]=
2403 ((uint32_t*)(src+i*stride))[3]= dc;
2407 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2411 dc+= src[-1+i*stride];
2414 dc= 0x01010101*((dc + 8)>>4);
2416 for(i=0; i<16; i++){
2417 ((uint32_t*)(src+i*stride))[0]=
2418 ((uint32_t*)(src+i*stride))[1]=
2419 ((uint32_t*)(src+i*stride))[2]=
2420 ((uint32_t*)(src+i*stride))[3]= dc;
2424 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2430 dc= 0x01010101*((dc + 8)>>4);
2432 for(i=0; i<16; i++){
2433 ((uint32_t*)(src+i*stride))[0]=
2434 ((uint32_t*)(src+i*stride))[1]=
2435 ((uint32_t*)(src+i*stride))[2]=
2436 ((uint32_t*)(src+i*stride))[3]= dc;
2440 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2443 for(i=0; i<16; i++){
2444 ((uint32_t*)(src+i*stride))[0]=
2445 ((uint32_t*)(src+i*stride))[1]=
2446 ((uint32_t*)(src+i*stride))[2]=
2447 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2451 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2454 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2455 const uint8_t * const src0 = src+7-stride;
2456 const uint8_t *src1 = src+8*stride-1;
2457 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2458 int H = src0[1] - src0[-1];
2459 int V = src1[0] - src2[ 0];
2460 for(k=2; k<=8; ++k) {
2461 src1 += stride; src2 -= stride;
2462 H += k*(src0[k] - src0[-k]);
2463 V += k*(src1[0] - src2[ 0]);
2466 H = ( 5*(H/4) ) / 16;
2467 V = ( 5*(V/4) ) / 16;
2469 /* required for 100% accuracy */
2470 i = H; H = V; V = i;
2472 H = ( 5*H+32 ) >> 6;
2473 V = ( 5*V+32 ) >> 6;
2476 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2477 for(j=16; j>0; --j) {
2480 for(i=-16; i<0; i+=4) {
2481 src[16+i] = cm[ (b ) >> 5 ];
2482 src[17+i] = cm[ (b+ H) >> 5 ];
2483 src[18+i] = cm[ (b+2*H) >> 5 ];
2484 src[19+i] = cm[ (b+3*H) >> 5 ];
2491 static void pred16x16_plane_c(uint8_t *src, int stride){
2492 pred16x16_plane_compat_c(src, stride, 0);
2495 static void pred8x8_vertical_c(uint8_t *src, int stride){
2497 const uint32_t a= ((uint32_t*)(src-stride))[0];
2498 const uint32_t b= ((uint32_t*)(src-stride))[1];
2501 ((uint32_t*)(src+i*stride))[0]= a;
2502 ((uint32_t*)(src+i*stride))[1]= b;
2506 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2510 ((uint32_t*)(src+i*stride))[0]=
2511 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2515 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2519 ((uint32_t*)(src+i*stride))[0]=
2520 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2524 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2530 dc0+= src[-1+i*stride];
2531 dc2+= src[-1+(i+4)*stride];
2533 dc0= 0x01010101*((dc0 + 2)>>2);
2534 dc2= 0x01010101*((dc2 + 2)>>2);
2537 ((uint32_t*)(src+i*stride))[0]=
2538 ((uint32_t*)(src+i*stride))[1]= dc0;
2541 ((uint32_t*)(src+i*stride))[0]=
2542 ((uint32_t*)(src+i*stride))[1]= dc2;
2546 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2552 dc0+= src[i-stride];
2553 dc1+= src[4+i-stride];
2555 dc0= 0x01010101*((dc0 + 2)>>2);
2556 dc1= 0x01010101*((dc1 + 2)>>2);
2559 ((uint32_t*)(src+i*stride))[0]= dc0;
2560 ((uint32_t*)(src+i*stride))[1]= dc1;
2563 ((uint32_t*)(src+i*stride))[0]= dc0;
2564 ((uint32_t*)(src+i*stride))[1]= dc1;
2569 static void pred8x8_dc_c(uint8_t *src, int stride){
2571 int dc0, dc1, dc2, dc3;
2575 dc0+= src[-1+i*stride] + src[i-stride];
2576 dc1+= src[4+i-stride];
2577 dc2+= src[-1+(i+4)*stride];
2579 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2580 dc0= 0x01010101*((dc0 + 4)>>3);
2581 dc1= 0x01010101*((dc1 + 2)>>2);
2582 dc2= 0x01010101*((dc2 + 2)>>2);
2585 ((uint32_t*)(src+i*stride))[0]= dc0;
2586 ((uint32_t*)(src+i*stride))[1]= dc1;
2589 ((uint32_t*)(src+i*stride))[0]= dc2;
2590 ((uint32_t*)(src+i*stride))[1]= dc3;
2594 static void pred8x8_plane_c(uint8_t *src, int stride){
2597 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2598 const uint8_t * const src0 = src+3-stride;
2599 const uint8_t *src1 = src+4*stride-1;
2600 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2601 int H = src0[1] - src0[-1];
2602 int V = src1[0] - src2[ 0];
2603 for(k=2; k<=4; ++k) {
2604 src1 += stride; src2 -= stride;
2605 H += k*(src0[k] - src0[-k]);
2606 V += k*(src1[0] - src2[ 0]);
2608 H = ( 17*H+16 ) >> 5;
2609 V = ( 17*V+16 ) >> 5;
2611 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2612 for(j=8; j>0; --j) {
2615 src[0] = cm[ (b ) >> 5 ];
2616 src[1] = cm[ (b+ H) >> 5 ];
2617 src[2] = cm[ (b+2*H) >> 5 ];
2618 src[3] = cm[ (b+3*H) >> 5 ];
2619 src[4] = cm[ (b+4*H) >> 5 ];
2620 src[5] = cm[ (b+5*H) >> 5 ];
2621 src[6] = cm[ (b+6*H) >> 5 ];
2622 src[7] = cm[ (b+7*H) >> 5 ];
2627 #define SRC(x,y) src[(x)+(y)*stride]
2629 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2630 #define PREDICT_8x8_LOAD_LEFT \
2631 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2632 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2633 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2634 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2637 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2638 #define PREDICT_8x8_LOAD_TOP \
2639 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2640 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2641 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2642 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2643 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2646 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2647 #define PREDICT_8x8_LOAD_TOPRIGHT \
2648 int t8, t9, t10, t11, t12, t13, t14, t15; \
2649 if(has_topright) { \
2650 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2651 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2652 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2654 #define PREDICT_8x8_LOAD_TOPLEFT \
2655 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2657 #define PREDICT_8x8_DC(v) \
2659 for( y = 0; y < 8; y++ ) { \
2660 ((uint32_t*)src)[0] = \
2661 ((uint32_t*)src)[1] = v; \
2665 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2667 PREDICT_8x8_DC(0x80808080);
2669 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2671 PREDICT_8x8_LOAD_LEFT;
2672 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2675 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2677 PREDICT_8x8_LOAD_TOP;
2678 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2681 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2683 PREDICT_8x8_LOAD_LEFT;
2684 PREDICT_8x8_LOAD_TOP;
2685 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2686 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2689 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2691 PREDICT_8x8_LOAD_LEFT;
2692 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2693 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2694 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2697 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2700 PREDICT_8x8_LOAD_TOP;
2709 for( y = 1; y < 8; y++ )
2710 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2712 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2714 PREDICT_8x8_LOAD_TOP;
2715 PREDICT_8x8_LOAD_TOPRIGHT;
2716 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2717 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2718 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2719 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2720 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2721 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2722 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2723 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2724 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2725 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2726 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2727 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2728 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2729 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2730 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2732 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2734 PREDICT_8x8_LOAD_TOP;
2735 PREDICT_8x8_LOAD_LEFT;
2736 PREDICT_8x8_LOAD_TOPLEFT;
2737 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2738 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2739 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2740 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2741 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2742 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2743 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2744 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2745 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2746 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2747 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2748 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2749 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2750 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2751 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2754 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2756 PREDICT_8x8_LOAD_TOP;
2757 PREDICT_8x8_LOAD_LEFT;
2758 PREDICT_8x8_LOAD_TOPLEFT;
2759 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2760 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2761 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2762 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2763 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2764 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2765 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2766 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2767 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2768 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2769 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2770 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2771 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2772 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2773 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2774 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2775 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2776 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2777 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2778 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2779 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2780 SRC(7,0)= (t6 + t7 + 1) >> 1;
2782 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2784 PREDICT_8x8_LOAD_TOP;
2785 PREDICT_8x8_LOAD_LEFT;
2786 PREDICT_8x8_LOAD_TOPLEFT;
2787 SRC(0,7)= (l6 + l7 + 1) >> 1;
2788 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2789 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2790 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2791 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2792 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2793 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2794 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2795 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2796 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2797 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2798 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2799 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2800 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2801 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2802 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2803 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2804 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2805 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2806 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2807 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2808 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2810 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2812 PREDICT_8x8_LOAD_TOP;
2813 PREDICT_8x8_LOAD_TOPRIGHT;
2814 SRC(0,0)= (t0 + t1 + 1) >> 1;
2815 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2816 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2817 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2818 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2819 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2820 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2821 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2822 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2823 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2824 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2825 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2826 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2827 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2828 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2829 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2830 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2831 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2832 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2833 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2834 SRC(7,6)= (t10 + t11 + 1) >> 1;
2835 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2837 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2839 PREDICT_8x8_LOAD_LEFT;
2840 SRC(0,0)= (l0 + l1 + 1) >> 1;
2841 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2842 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2843 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2844 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2845 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2846 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2847 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2848 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2849 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2850 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2851 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2852 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2853 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2854 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2855 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2856 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2857 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2859 #undef PREDICT_8x8_LOAD_LEFT
2860 #undef PREDICT_8x8_LOAD_TOP
2861 #undef PREDICT_8x8_LOAD_TOPLEFT
2862 #undef PREDICT_8x8_LOAD_TOPRIGHT
2863 #undef PREDICT_8x8_DC
2869 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2870 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2871 int src_x_offset, int src_y_offset,
2872 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2873 MpegEncContext * const s = &h->s;
2874 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2875 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2876 const int luma_xy= (mx&3) + ((my&3)<<2);
2877 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2878 uint8_t * src_cb, * src_cr;
2879 int extra_width= h->emu_edge_width;
2880 int extra_height= h->emu_edge_height;
2882 const int full_mx= mx>>2;
2883 const int full_my= my>>2;
2884 const int pic_width = 16*s->mb_width;
2885 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2890 if(mx&7) extra_width -= 3;
2891 if(my&7) extra_height -= 3;
2893 if( full_mx < 0-extra_width
2894 || full_my < 0-extra_height
2895 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2896 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2897 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2898 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2902 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2904 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2907 if(s->flags&CODEC_FLAG_GRAY) return;
2910 // chroma offset when predicting from a field of opposite parity
2911 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2912 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2914 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2915 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2918 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2919 src_cb= s->edge_emu_buffer;
2921 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2924 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2925 src_cr= s->edge_emu_buffer;
2927 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2930 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2931 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2932 int x_offset, int y_offset,
2933 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2934 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2935 int list0, int list1){
2936 MpegEncContext * const s = &h->s;
2937 qpel_mc_func *qpix_op= qpix_put;
2938 h264_chroma_mc_func chroma_op= chroma_put;
2940 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2941 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2942 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2943 x_offset += 8*s->mb_x;
2944 y_offset += 8*(s->mb_y >> MB_MBAFF);
2947 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2948 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2949 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2950 qpix_op, chroma_op);
2953 chroma_op= chroma_avg;
2957 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2958 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2959 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2960 qpix_op, chroma_op);
2964 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2965 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2966 int x_offset, int y_offset,
2967 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2968 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2969 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2970 int list0, int list1){
2971 MpegEncContext * const s = &h->s;
2973 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2974 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2975 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2976 x_offset += 8*s->mb_x;
2977 y_offset += 8*(s->mb_y >> MB_MBAFF);
2980 /* don't optimize for luma-only case, since B-frames usually
2981 * use implicit weights => chroma too. */
2982 uint8_t *tmp_cb = s->obmc_scratchpad;
2983 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2984 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2985 int refn0 = h->ref_cache[0][ scan8[n] ];
2986 int refn1 = h->ref_cache[1][ scan8[n] ];
2988 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2989 dest_y, dest_cb, dest_cr,
2990 x_offset, y_offset, qpix_put, chroma_put);
2991 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2992 tmp_y, tmp_cb, tmp_cr,
2993 x_offset, y_offset, qpix_put, chroma_put);
2995 if(h->use_weight == 2){
2996 int weight0 = h->implicit_weight[refn0][refn1];
2997 int weight1 = 64 - weight0;
2998 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2999 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
3000 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
3002 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
3003 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
3004 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
3005 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3006 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
3007 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
3008 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3009 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
3010 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
3013 int list = list1 ? 1 : 0;
3014 int refn = h->ref_cache[list][ scan8[n] ];
3015 Picture *ref= &h->ref_list[list][refn];
3016 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
3017 dest_y, dest_cb, dest_cr, x_offset, y_offset,
3018 qpix_put, chroma_put);
3020 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
3021 h->luma_weight[list][refn], h->luma_offset[list][refn]);
3022 if(h->use_weight_chroma){
3023 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3024 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
3025 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3026 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
3031 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
3032 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3033 int x_offset, int y_offset,
3034 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
3035 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
3036 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
3037 int list0, int list1){
3038 if((h->use_weight==2 && list0 && list1
3039 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
3040 || h->use_weight==1)
3041 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3042 x_offset, y_offset, qpix_put, chroma_put,
3043 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
3045 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3046 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
3049 static inline void prefetch_motion(H264Context *h, int list){
3050 /* fetch pixels for estimated mv 4 macroblocks ahead
3051 * optimized for 64byte cache lines */
3052 MpegEncContext * const s = &h->s;
3053 const int refn = h->ref_cache[list][scan8[0]];
3055 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
3056 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
3057 uint8_t **src= h->ref_list[list][refn].data;
3058 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
3059 s->dsp.prefetch(src[0]+off, s->linesize, 4);
3060 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3061 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
3065 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3066 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
3067 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
3068 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
3069 MpegEncContext * const s = &h->s;
3070 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
3071 const int mb_type= s->current_picture.mb_type[mb_xy];
3073 assert(IS_INTER(mb_type));
3075 prefetch_motion(h, 0);
3077 if(IS_16X16(mb_type)){
3078 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
3079 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
3080 &weight_op[0], &weight_avg[0],
3081 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3082 }else if(IS_16X8(mb_type)){
3083 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
3084 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3085 &weight_op[1], &weight_avg[1],
3086 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3087 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
3088 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3089 &weight_op[1], &weight_avg[1],
3090 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3091 }else if(IS_8X16(mb_type)){
3092 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
3093 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3094 &weight_op[2], &weight_avg[2],
3095 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3096 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
3097 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3098 &weight_op[2], &weight_avg[2],
3099 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3103 assert(IS_8X8(mb_type));
3106 const int sub_mb_type= h->sub_mb_type[i];
3108 int x_offset= (i&1)<<2;
3109 int y_offset= (i&2)<<1;
3111 if(IS_SUB_8X8(sub_mb_type)){
3112 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3113 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3114 &weight_op[3], &weight_avg[3],
3115 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3116 }else if(IS_SUB_8X4(sub_mb_type)){
3117 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3118 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3119 &weight_op[4], &weight_avg[4],
3120 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3121 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
3122 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3123 &weight_op[4], &weight_avg[4],
3124 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3125 }else if(IS_SUB_4X8(sub_mb_type)){
3126 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3127 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3128 &weight_op[5], &weight_avg[5],
3129 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3130 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
3131 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3132 &weight_op[5], &weight_avg[5],
3133 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3136 assert(IS_SUB_4X4(sub_mb_type));
3138 int sub_x_offset= x_offset + 2*(j&1);
3139 int sub_y_offset= y_offset + (j&2);
3140 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
3141 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3142 &weight_op[6], &weight_avg[6],
3143 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3149 prefetch_motion(h, 1);
3152 static void decode_init_vlc(H264Context *h){
3153 static int done = 0;
3159 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
3160 &chroma_dc_coeff_token_len [0], 1, 1,
3161 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
3164 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
3165 &coeff_token_len [i][0], 1, 1,
3166 &coeff_token_bits[i][0], 1, 1, 1);
3170 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
3171 &chroma_dc_total_zeros_len [i][0], 1, 1,
3172 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
3174 for(i=0; i<15; i++){
3175 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
3176 &total_zeros_len [i][0], 1, 1,
3177 &total_zeros_bits[i][0], 1, 1, 1);
3181 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
3182 &run_len [i][0], 1, 1,
3183 &run_bits[i][0], 1, 1, 1);
3185 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
3186 &run_len [6][0], 1, 1,
3187 &run_bits[6][0], 1, 1, 1);
3192 * Sets the intra prediction function pointers.
3194 static void init_pred_ptrs(H264Context *h){
3195 // MpegEncContext * const s = &h->s;
3197 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
3198 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
3199 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
3200 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
3201 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
3202 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
3203 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
3204 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
3205 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
3206 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
3207 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
3208 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
3210 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
3211 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
3212 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
3213 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
3214 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
3215 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
3216 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
3217 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
3218 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
3219 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
3220 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
3221 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
3223 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
3224 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
3225 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
3226 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
3227 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
3228 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
3229 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
3231 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
3232 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
3233 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
3234 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
3235 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
3236 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
3237 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
3240 static void free_tables(H264Context *h){
3241 av_freep(&h->intra4x4_pred_mode);
3242 av_freep(&h->chroma_pred_mode_table);
3243 av_freep(&h->cbp_table);
3244 av_freep(&h->mvd_table[0]);
3245 av_freep(&h->mvd_table[1]);
3246 av_freep(&h->direct_table);
3247 av_freep(&h->non_zero_count);
3248 av_freep(&h->slice_table_base);
3249 av_freep(&h->top_borders[1]);
3250 av_freep(&h->top_borders[0]);
3251 h->slice_table= NULL;
3253 av_freep(&h->mb2b_xy);
3254 av_freep(&h->mb2b8_xy);
3256 av_freep(&h->s.obmc_scratchpad);
3259 static void init_dequant8_coeff_table(H264Context *h){
3261 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
3262 h->dequant8_coeff[0] = h->dequant8_buffer[0];
3263 h->dequant8_coeff[1] = h->dequant8_buffer[1];
3265 for(i=0; i<2; i++ ){
3266 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
3267 h->dequant8_coeff[1] = h->dequant8_buffer[0];
3271 for(q=0; q<52; q++){
3272 int shift = div6[q];
3275 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
3276 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
3277 h->pps.scaling_matrix8[i][x]) << shift;
3282 static void init_dequant4_coeff_table(H264Context *h){
3284 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
3285 for(i=0; i<6; i++ ){
3286 h->dequant4_coeff[i] = h->dequant4_buffer[i];
3288 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
3289 h->dequant4_coeff[i] = h->dequant4_buffer[j];
3296 for(q=0; q<52; q++){
3297 int shift = div6[q] + 2;
3300 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
3301 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
3302 h->pps.scaling_matrix4[i][x]) << shift;
3307 static void init_dequant_tables(H264Context *h){
3309 init_dequant4_coeff_table(h);
3310 if(h->pps.transform_8x8_mode)
3311 init_dequant8_coeff_table(h);
3312 if(h->sps.transform_bypass){
3315 h->dequant4_coeff[i][0][x] = 1<<6;
3316 if(h->pps.transform_8x8_mode)
3319 h->dequant8_coeff[i][0][x] = 1<<6;
3326 * needs width/height
3328 static int alloc_tables(H264Context *h){
3329 MpegEncContext * const s = &h->s;
3330 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3333 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3335 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3336 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
3337 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3338 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3339 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3341 if( h->pps.cabac ) {
3342 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3343 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3344 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3345 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3348 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
3349 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
3351 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3352 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3353 for(y=0; y<s->mb_height; y++){
3354 for(x=0; x<s->mb_width; x++){
3355 const int mb_xy= x + y*s->mb_stride;
3356 const int b_xy = 4*x + 4*y*h->b_stride;
3357 const int b8_xy= 2*x + 2*y*h->b8_stride;
3359 h->mb2b_xy [mb_xy]= b_xy;
3360 h->mb2b8_xy[mb_xy]= b8_xy;
3364 s->obmc_scratchpad = NULL;
3366 if(!h->dequant4_coeff[0])
3367 init_dequant_tables(h);
3375 static void common_init(H264Context *h){
3376 MpegEncContext * const s = &h->s;
3378 s->width = s->avctx->width;
3379 s->height = s->avctx->height;
3380 s->codec_id= s->avctx->codec->id;
3384 h->dequant_coeff_pps= -1;
3385 s->unrestricted_mv=1;
3386 s->decode=1; //FIXME
3388 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3389 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3392 static int decode_init(AVCodecContext *avctx){
3393 H264Context *h= avctx->priv_data;
3394 MpegEncContext * const s = &h->s;
3396 MPV_decode_defaults(s);
3401 s->out_format = FMT_H264;
3402 s->workaround_bugs= avctx->workaround_bugs;
3405 // s->decode_mb= ff_h263_decode_mb;
3407 avctx->pix_fmt= PIX_FMT_YUV420P;
3411 if(avctx->extradata_size > 0 && avctx->extradata &&
3412 *(char *)avctx->extradata == 1){
3422 static int frame_start(H264Context *h){
3423 MpegEncContext * const s = &h->s;
3426 if(MPV_frame_start(s, s->avctx) < 0)
3428 ff_er_frame_start(s);
3430 assert(s->linesize && s->uvlinesize);
3432 for(i=0; i<16; i++){
3433 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3434 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3437 h->block_offset[16+i]=
3438 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3439 h->block_offset[24+16+i]=
3440 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3443 /* can't be in alloc_tables because linesize isn't known there.
3444 * FIXME: redo bipred weight to not require extra buffer? */
3445 if(!s->obmc_scratchpad)
3446 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
3448 /* some macroblocks will be accessed before they're available */
3450 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
3452 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3456 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3457 MpegEncContext * const s = &h->s;
3461 src_cb -= uvlinesize;
3462 src_cr -= uvlinesize;
3464 // There are two lines saved, the line above the the top macroblock of a pair,
3465 // and the line above the bottom macroblock
3466 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3467 for(i=1; i<17; i++){
3468 h->left_border[i]= src_y[15+i* linesize];
3471 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3472 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3474 if(!(s->flags&CODEC_FLAG_GRAY)){
3475 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3476 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3478 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3479 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3481 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3482 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3486 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3487 MpegEncContext * const s = &h->s;
3490 int deblock_left = (s->mb_x > 0);
3491 int deblock_top = (s->mb_y > 0);
3493 src_y -= linesize + 1;
3494 src_cb -= uvlinesize + 1;
3495 src_cr -= uvlinesize + 1;
3497 #define XCHG(a,b,t,xchg)\
3504 for(i = !deblock_top; i<17; i++){
3505 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3510 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3511 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3512 if(s->mb_x+1 < s->mb_width){
3513 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3517 if(!(s->flags&CODEC_FLAG_GRAY)){
3519 for(i = !deblock_top; i<9; i++){
3520 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3521 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3525 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3526 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3531 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3532 MpegEncContext * const s = &h->s;
3535 src_y -= 2 * linesize;
3536 src_cb -= 2 * uvlinesize;
3537 src_cr -= 2 * uvlinesize;
3539 // There are two lines saved, the line above the the top macroblock of a pair,
3540 // and the line above the bottom macroblock
3541 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3542 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3543 for(i=2; i<34; i++){
3544 h->left_border[i]= src_y[15+i* linesize];
3547 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3548 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3549 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3550 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3552 if(!(s->flags&CODEC_FLAG_GRAY)){
3553 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3554 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3555 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3556 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3557 for(i=2; i<18; i++){
3558 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3559 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3561 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3562 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3563 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3564 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3568 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3569 MpegEncContext * const s = &h->s;
3572 int deblock_left = (s->mb_x > 0);
3573 int deblock_top = (s->mb_y > 1);
3575 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3577 src_y -= 2 * linesize + 1;
3578 src_cb -= 2 * uvlinesize + 1;
3579 src_cr -= 2 * uvlinesize + 1;
3581 #define XCHG(a,b,t,xchg)\
3588 for(i = (!deblock_top)<<1; i<34; i++){
3589 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3594 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3595 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3596 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3597 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3598 if(s->mb_x+1 < s->mb_width){
3599 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3600 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3604 if(!(s->flags&CODEC_FLAG_GRAY)){
3606 for(i = (!deblock_top) << 1; i<18; i++){
3607 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3608 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3612 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3613 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3614 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3615 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3620 static void hl_decode_mb(H264Context *h){
3621 MpegEncContext * const s = &h->s;
3622 const int mb_x= s->mb_x;
3623 const int mb_y= s->mb_y;
3624 const int mb_xy= mb_x + mb_y*s->mb_stride;
3625 const int mb_type= s->current_picture.mb_type[mb_xy];
3626 uint8_t *dest_y, *dest_cb, *dest_cr;
3627 int linesize, uvlinesize /*dct_offset*/;
3629 int *block_offset = &h->block_offset[0];
3630 const unsigned int bottom = mb_y & 1;
3631 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3632 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3633 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3638 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3639 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3640 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3642 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
3643 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
3646 linesize = h->mb_linesize = s->linesize * 2;
3647 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3648 block_offset = &h->block_offset[24];
3649 if(mb_y&1){ //FIXME move out of this func?
3650 dest_y -= s->linesize*15;
3651 dest_cb-= s->uvlinesize*7;
3652 dest_cr-= s->uvlinesize*7;
3656 for(list=0; list<2; list++){
3657 if(!USES_LIST(mb_type, list))
3659 if(IS_16X16(mb_type)){
3660 int8_t *ref = &h->ref_cache[list][scan8[0]];
3661 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3663 for(i=0; i<16; i+=4){
3664 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3665 int ref = h->ref_cache[list][scan8[i]];
3667 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3673 linesize = h->mb_linesize = s->linesize;
3674 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3675 // dct_offset = s->linesize * 16;
3678 if(transform_bypass){
3680 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3681 }else if(IS_8x8DCT(mb_type)){
3682 idct_dc_add = s->dsp.h264_idct8_dc_add;
3683 idct_add = s->dsp.h264_idct8_add;
3685 idct_dc_add = s->dsp.h264_idct_dc_add;
3686 idct_add = s->dsp.h264_idct_add;
3689 if(FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3690 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3691 int mbt_y = mb_y&~1;
3692 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3693 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3694 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3695 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3698 if (IS_INTRA_PCM(mb_type)) {
3701 // The pixels are stored in h->mb array in the same order as levels,
3702 // copy them in output in the correct order.
3703 for(i=0; i<16; i++) {
3704 for (y=0; y<4; y++) {
3705 for (x=0; x<4; x++) {
3706 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3710 for(i=16; i<16+4; i++) {
3711 for (y=0; y<4; y++) {
3712 for (x=0; x<4; x++) {
3713 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3717 for(i=20; i<20+4; i++) {
3718 for (y=0; y<4; y++) {
3719 for (x=0; x<4; x++) {
3720 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3725 if(IS_INTRA(mb_type)){
3726 if(h->deblocking_filter && !FRAME_MBAFF)
3727 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3729 if(!(s->flags&CODEC_FLAG_GRAY)){
3730 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3731 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3734 if(IS_INTRA4x4(mb_type)){
3736 if(IS_8x8DCT(mb_type)){
3737 for(i=0; i<16; i+=4){
3738 uint8_t * const ptr= dest_y + block_offset[i];
3739 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3740 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3741 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3742 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3744 if(nnz == 1 && h->mb[i*16])
3745 idct_dc_add(ptr, h->mb + i*16, linesize);
3747 idct_add(ptr, h->mb + i*16, linesize);
3751 for(i=0; i<16; i++){
3752 uint8_t * const ptr= dest_y + block_offset[i];
3754 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3757 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3758 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3759 assert(mb_y || linesize <= block_offset[i]);
3760 if(!topright_avail){
3761 tr= ptr[3 - linesize]*0x01010101;
3762 topright= (uint8_t*) &tr;
3764 topright= ptr + 4 - linesize;
3768 h->pred4x4[ dir ](ptr, topright, linesize);
3769 nnz = h->non_zero_count_cache[ scan8[i] ];
3771 if(s->codec_id == CODEC_ID_H264){
3772 if(nnz == 1 && h->mb[i*16])
3773 idct_dc_add(ptr, h->mb + i*16, linesize);
3775 idct_add(ptr, h->mb + i*16, linesize);
3777 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3782 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3783 if(s->codec_id == CODEC_ID_H264){
3784 if(!transform_bypass)
3785 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3787 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3789 if(h->deblocking_filter && !FRAME_MBAFF)
3790 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3791 }else if(s->codec_id == CODEC_ID_H264){
3792 hl_motion(h, dest_y, dest_cb, dest_cr,
3793 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
3794 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
3795 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3799 if(!IS_INTRA4x4(mb_type)){
3800 if(s->codec_id == CODEC_ID_H264){
3801 if(IS_INTRA16x16(mb_type)){
3802 for(i=0; i<16; i++){
3803 if(h->non_zero_count_cache[ scan8[i] ])
3804 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3805 else if(h->mb[i*16])
3806 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3809 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3810 for(i=0; i<16; i+=di){
3811 int nnz = h->non_zero_count_cache[ scan8[i] ];
3813 if(nnz==1 && h->mb[i*16])
3814 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3816 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3821 for(i=0; i<16; i++){
3822 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3823 uint8_t * const ptr= dest_y + block_offset[i];
3824 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3830 if(!(s->flags&CODEC_FLAG_GRAY)){
3831 uint8_t *dest[2] = {dest_cb, dest_cr};
3832 if(transform_bypass){
3833 idct_add = idct_dc_add = s->dsp.add_pixels4;
3835 idct_add = s->dsp.h264_idct_add;
3836 idct_dc_add = s->dsp.h264_idct_dc_add;
3837 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3838 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3840 if(s->codec_id == CODEC_ID_H264){
3841 for(i=16; i<16+8; i++){
3842 if(h->non_zero_count_cache[ scan8[i] ])
3843 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3844 else if(h->mb[i*16])
3845 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3848 for(i=16; i<16+8; i++){
3849 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3850 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3851 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3857 if(h->deblocking_filter) {
3859 //FIXME try deblocking one mb at a time?
3860 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3861 const int mb_y = s->mb_y - 1;
3862 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3863 const int mb_xy= mb_x + mb_y*s->mb_stride;
3864 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3865 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3866 if (!bottom) return;
3867 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3868 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3869 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3871 if(IS_INTRA(mb_type_top | mb_type_bottom))
3872 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3874 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3878 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3879 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3880 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
3881 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3884 tprintf("call mbaff filter_mb\n");
3885 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3886 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3887 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3889 tprintf("call filter_mb\n");
3890 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3891 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3892 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3898 * fills the default_ref_list.
3900 static int fill_default_ref_list(H264Context *h){
3901 MpegEncContext * const s = &h->s;
3903 int smallest_poc_greater_than_current = -1;
3904 Picture sorted_short_ref[32];
3906 if(h->slice_type==B_TYPE){
3910 /* sort frame according to poc in B slice */
3911 for(out_i=0; out_i<h->short_ref_count; out_i++){
3913 int best_poc=INT_MAX;
3915 for(i=0; i<h->short_ref_count; i++){
3916 const int poc= h->short_ref[i]->poc;
3917 if(poc > limit && poc < best_poc){
3923 assert(best_i != INT_MIN);
3926 sorted_short_ref[out_i]= *h->short_ref[best_i];
3927 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3928 if (-1 == smallest_poc_greater_than_current) {
3929 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3930 smallest_poc_greater_than_current = out_i;
3936 if(s->picture_structure == PICT_FRAME){
3937 if(h->slice_type==B_TYPE){
3939 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3941 // find the largest poc
3942 for(list=0; list<2; list++){
3945 int step= list ? -1 : 1;
3947 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3948 while(j<0 || j>= h->short_ref_count){
3949 if(j != -99 && step == (list ? -1 : 1))
3952 j= smallest_poc_greater_than_current + (step>>1);
3954 if(sorted_short_ref[j].reference != 3) continue;
3955 h->default_ref_list[list][index ]= sorted_short_ref[j];
3956 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3959 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3960 if(h->long_ref[i] == NULL) continue;
3961 if(h->long_ref[i]->reference != 3) continue;
3963 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3964 h->default_ref_list[ list ][index++].pic_id= i;;
3967 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3968 // swap the two first elements of L1 when
3969 // L0 and L1 are identical
3970 Picture temp= h->default_ref_list[1][0];
3971 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3972 h->default_ref_list[1][1] = temp;
3975 if(index < h->ref_count[ list ])
3976 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3980 for(i=0; i<h->short_ref_count; i++){
3981 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3982 h->default_ref_list[0][index ]= *h->short_ref[i];
3983 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3985 for(i = 0; i < 16; i++){
3986 if(h->long_ref[i] == NULL) continue;
3987 if(h->long_ref[i]->reference != 3) continue;
3988 h->default_ref_list[0][index ]= *h->long_ref[i];
3989 h->default_ref_list[0][index++].pic_id= i;;
3991 if(index < h->ref_count[0])
3992 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3995 if(h->slice_type==B_TYPE){
3997 //FIXME second field balh
4001 for (i=0; i<h->ref_count[0]; i++) {
4002 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
4004 if(h->slice_type==B_TYPE){
4005 for (i=0; i<h->ref_count[1]; i++) {
4006 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
4013 static void print_short_term(H264Context *h);
4014 static void print_long_term(H264Context *h);
4016 static int decode_ref_pic_list_reordering(H264Context *h){
4017 MpegEncContext * const s = &h->s;
4020 print_short_term(h);
4022 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
4024 for(list=0; list<2; list++){
4025 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
4027 if(get_bits1(&s->gb)){
4028 int pred= h->curr_pic_num;
4030 for(index=0; ; index++){
4031 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
4034 Picture *ref = NULL;
4036 if(reordering_of_pic_nums_idc==3)
4039 if(index >= h->ref_count[list]){
4040 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
4044 if(reordering_of_pic_nums_idc<3){
4045 if(reordering_of_pic_nums_idc<2){
4046 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
4048 if(abs_diff_pic_num >= h->max_pic_num){
4049 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
4053 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
4054 else pred+= abs_diff_pic_num;
4055 pred &= h->max_pic_num - 1;
4057 for(i= h->short_ref_count-1; i>=0; i--){
4058 ref = h->short_ref[i];
4059 assert(ref->reference == 3);
4060 assert(!ref->long_ref);
4061 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
4065 ref->pic_id= ref->frame_num;
4067 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
4068 ref = h->long_ref[pic_id];
4069 ref->pic_id= pic_id;
4070 assert(ref->reference == 3);
4071 assert(ref->long_ref);
4076 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
4077 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
4079 for(i=index; i+1<h->ref_count[list]; i++){
4080 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
4083 for(; i > index; i--){
4084 h->ref_list[list][i]= h->ref_list[list][i-1];
4086 h->ref_list[list][index]= *ref;
4089 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
4095 if(h->slice_type!=B_TYPE) break;
4097 for(list=0; list<2; list++){
4098 for(index= 0; index < h->ref_count[list]; index++){
4099 if(!h->ref_list[list][index].data[0])
4100 h->ref_list[list][index]= s->current_picture;
4102 if(h->slice_type!=B_TYPE) break;
4105 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
4106 direct_dist_scale_factor(h);
4107 direct_ref_list_init(h);
4111 static void fill_mbaff_ref_list(H264Context *h){
4113 for(list=0; list<2; list++){
4114 for(i=0; i<h->ref_count[list]; i++){
4115 Picture *frame = &h->ref_list[list][i];
4116 Picture *field = &h->ref_list[list][16+2*i];
4119 field[0].linesize[j] <<= 1;
4120 field[1] = field[0];
4122 field[1].data[j] += frame->linesize[j];
4124 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
4125 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
4127 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
4128 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
4132 for(j=0; j<h->ref_count[1]; j++){
4133 for(i=0; i<h->ref_count[0]; i++)
4134 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
4135 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
4136 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
4140 static int pred_weight_table(H264Context *h){
4141 MpegEncContext * const s = &h->s;
4143 int luma_def, chroma_def;
4146 h->use_weight_chroma= 0;
4147 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
4148 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
4149 luma_def = 1<<h->luma_log2_weight_denom;
4150 chroma_def = 1<<h->chroma_log2_weight_denom;
4152 for(list=0; list<2; list++){
4153 for(i=0; i<h->ref_count[list]; i++){
4154 int luma_weight_flag, chroma_weight_flag;
4156 luma_weight_flag= get_bits1(&s->gb);
4157 if(luma_weight_flag){
4158 h->luma_weight[list][i]= get_se_golomb(&s->gb);
4159 h->luma_offset[list][i]= get_se_golomb(&s->gb);
4160 if( h->luma_weight[list][i] != luma_def
4161 || h->luma_offset[list][i] != 0)
4164 h->luma_weight[list][i]= luma_def;
4165 h->luma_offset[list][i]= 0;
4168 chroma_weight_flag= get_bits1(&s->gb);
4169 if(chroma_weight_flag){
4172 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
4173 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
4174 if( h->chroma_weight[list][i][j] != chroma_def
4175 || h->chroma_offset[list][i][j] != 0)
4176 h->use_weight_chroma= 1;
4181 h->chroma_weight[list][i][j]= chroma_def;
4182 h->chroma_offset[list][i][j]= 0;
4186 if(h->slice_type != B_TYPE) break;
4188 h->use_weight= h->use_weight || h->use_weight_chroma;
4192 static void implicit_weight_table(H264Context *h){
4193 MpegEncContext * const s = &h->s;
4195 int cur_poc = s->current_picture_ptr->poc;
4197 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
4198 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
4200 h->use_weight_chroma= 0;
4205 h->use_weight_chroma= 2;
4206 h->luma_log2_weight_denom= 5;
4207 h->chroma_log2_weight_denom= 5;
4209 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
4210 int poc0 = h->ref_list[0][ref0].poc;
4211 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
4212 int poc1 = h->ref_list[1][ref1].poc;
4213 int td = clip(poc1 - poc0, -128, 127);
4215 int tb = clip(cur_poc - poc0, -128, 127);
4216 int tx = (16384 + (FFABS(td) >> 1)) / td;
4217 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
4218 if(dist_scale_factor < -64 || dist_scale_factor > 128)
4219 h->implicit_weight[ref0][ref1] = 32;
4221 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
4223 h->implicit_weight[ref0][ref1] = 32;
4228 static inline void unreference_pic(H264Context *h, Picture *pic){
4231 if(pic == h->delayed_output_pic)
4234 for(i = 0; h->delayed_pic[i]; i++)
4235 if(pic == h->delayed_pic[i]){
4243 * instantaneous decoder refresh.
4245 static void idr(H264Context *h){
4248 for(i=0; i<16; i++){
4249 if (h->long_ref[i] != NULL) {
4250 unreference_pic(h, h->long_ref[i]);
4251 h->long_ref[i]= NULL;
4254 h->long_ref_count=0;
4256 for(i=0; i<h->short_ref_count; i++){
4257 unreference_pic(h, h->short_ref[i]);
4258 h->short_ref[i]= NULL;
4260 h->short_ref_count=0;
4263 /* forget old pics after a seek */
4264 static void flush_dpb(AVCodecContext *avctx){
4265 H264Context *h= avctx->priv_data;
4267 for(i=0; i<16; i++) {
4268 if(h->delayed_pic[i])
4269 h->delayed_pic[i]->reference= 0;
4270 h->delayed_pic[i]= NULL;
4272 if(h->delayed_output_pic)
4273 h->delayed_output_pic->reference= 0;
4274 h->delayed_output_pic= NULL;
4276 if(h->s.current_picture_ptr)
4277 h->s.current_picture_ptr->reference= 0;
4282 * @return the removed picture or NULL if an error occurs
4284 static Picture * remove_short(H264Context *h, int frame_num){
4285 MpegEncContext * const s = &h->s;
4288 if(s->avctx->debug&FF_DEBUG_MMCO)
4289 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
4291 for(i=0; i<h->short_ref_count; i++){
4292 Picture *pic= h->short_ref[i];
4293 if(s->avctx->debug&FF_DEBUG_MMCO)
4294 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
4295 if(pic->frame_num == frame_num){
4296 h->short_ref[i]= NULL;
4297 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
4298 h->short_ref_count--;
4307 * @return the removed picture or NULL if an error occurs
4309 static Picture * remove_long(H264Context *h, int i){
4312 pic= h->long_ref[i];
4313 h->long_ref[i]= NULL;
4314 if(pic) h->long_ref_count--;
4320 * print short term list
4322 static void print_short_term(H264Context *h) {
4324 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4325 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
4326 for(i=0; i<h->short_ref_count; i++){
4327 Picture *pic= h->short_ref[i];
4328 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4334 * print long term list
4336 static void print_long_term(H264Context *h) {
4338 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4339 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
4340 for(i = 0; i < 16; i++){
4341 Picture *pic= h->long_ref[i];
4343 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4350 * Executes the reference picture marking (memory management control operations).
4352 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
4353 MpegEncContext * const s = &h->s;
4355 int current_is_long=0;
4358 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
4359 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4361 for(i=0; i<mmco_count; i++){
4362 if(s->avctx->debug&FF_DEBUG_MMCO)
4363 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4365 switch(mmco[i].opcode){
4366 case MMCO_SHORT2UNUSED:
4367 pic= remove_short(h, mmco[i].short_frame_num);
4369 unreference_pic(h, pic);
4370 else if(s->avctx->debug&FF_DEBUG_MMCO)
4371 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4373 case MMCO_SHORT2LONG:
4374 pic= remove_long(h, mmco[i].long_index);
4375 if(pic) unreference_pic(h, pic);
4377 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4378 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4379 h->long_ref_count++;
4381 case MMCO_LONG2UNUSED:
4382 pic= remove_long(h, mmco[i].long_index);
4384 unreference_pic(h, pic);
4385 else if(s->avctx->debug&FF_DEBUG_MMCO)
4386 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4389 pic= remove_long(h, mmco[i].long_index);
4390 if(pic) unreference_pic(h, pic);
4392 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4393 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4394 h->long_ref_count++;
4398 case MMCO_SET_MAX_LONG:
4399 assert(mmco[i].long_index <= 16);
4400 // just remove the long term which index is greater than new max
4401 for(j = mmco[i].long_index; j<16; j++){
4402 pic = remove_long(h, j);
4403 if (pic) unreference_pic(h, pic);
4407 while(h->short_ref_count){
4408 pic= remove_short(h, h->short_ref[0]->frame_num);
4409 unreference_pic(h, pic);
4411 for(j = 0; j < 16; j++) {
4412 pic= remove_long(h, j);
4413 if(pic) unreference_pic(h, pic);
4420 if(!current_is_long){
4421 pic= remove_short(h, s->current_picture_ptr->frame_num);
4423 unreference_pic(h, pic);
4424 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4427 if(h->short_ref_count)
4428 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4430 h->short_ref[0]= s->current_picture_ptr;
4431 h->short_ref[0]->long_ref=0;
4432 h->short_ref_count++;
4435 print_short_term(h);
4440 static int decode_ref_pic_marking(H264Context *h){
4441 MpegEncContext * const s = &h->s;
4444 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4445 s->broken_link= get_bits1(&s->gb) -1;
4446 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4447 if(h->mmco[0].long_index == -1)
4450 h->mmco[0].opcode= MMCO_LONG;
4454 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4455 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4456 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4458 h->mmco[i].opcode= opcode;
4459 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4460 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4461 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4462 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4466 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4467 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4468 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4469 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4474 if(opcode > MMCO_LONG){
4475 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4478 if(opcode == MMCO_END)
4483 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4485 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4486 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4487 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4497 static int init_poc(H264Context *h){
4498 MpegEncContext * const s = &h->s;
4499 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4502 if(h->nal_unit_type == NAL_IDR_SLICE){
4503 h->frame_num_offset= 0;
4505 if(h->frame_num < h->prev_frame_num)
4506 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4508 h->frame_num_offset= h->prev_frame_num_offset;
4511 if(h->sps.poc_type==0){
4512 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4514 if(h->nal_unit_type == NAL_IDR_SLICE){
4519 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4520 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4521 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4522 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4524 h->poc_msb = h->prev_poc_msb;
4525 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4527 field_poc[1] = h->poc_msb + h->poc_lsb;
4528 if(s->picture_structure == PICT_FRAME)
4529 field_poc[1] += h->delta_poc_bottom;
4530 }else if(h->sps.poc_type==1){
4531 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4534 if(h->sps.poc_cycle_length != 0)
4535 abs_frame_num = h->frame_num_offset + h->frame_num;
4539 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4542 expected_delta_per_poc_cycle = 0;
4543 for(i=0; i < h->sps.poc_cycle_length; i++)
4544 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4546 if(abs_frame_num > 0){
4547 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4548 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4550 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4551 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4552 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4556 if(h->nal_ref_idc == 0)
4557 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4559 field_poc[0] = expectedpoc + h->delta_poc[0];
4560 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4562 if(s->picture_structure == PICT_FRAME)
4563 field_poc[1] += h->delta_poc[1];
4566 if(h->nal_unit_type == NAL_IDR_SLICE){
4569 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4570 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4576 if(s->picture_structure != PICT_BOTTOM_FIELD)
4577 s->current_picture_ptr->field_poc[0]= field_poc[0];
4578 if(s->picture_structure != PICT_TOP_FIELD)
4579 s->current_picture_ptr->field_poc[1]= field_poc[1];
4580 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4581 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4587 * decodes a slice header.
4588 * this will allso call MPV_common_init() and frame_start() as needed
4590 static int decode_slice_header(H264Context *h){
4591 MpegEncContext * const s = &h->s;
4592 int first_mb_in_slice, pps_id;
4593 int num_ref_idx_active_override_flag;
4594 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4596 int default_ref_list_done = 0;
4598 s->current_picture.reference= h->nal_ref_idc != 0;
4599 s->dropable= h->nal_ref_idc == 0;
4601 first_mb_in_slice= get_ue_golomb(&s->gb);
4603 slice_type= get_ue_golomb(&s->gb);
4605 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4610 h->slice_type_fixed=1;
4612 h->slice_type_fixed=0;
4614 slice_type= slice_type_map[ slice_type ];
4615 if (slice_type == I_TYPE
4616 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4617 default_ref_list_done = 1;
4619 h->slice_type= slice_type;
4621 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4623 pps_id= get_ue_golomb(&s->gb);
4625 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4628 h->pps= h->pps_buffer[pps_id];
4629 if(h->pps.slice_group_count == 0){
4630 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4634 h->sps= h->sps_buffer[ h->pps.sps_id ];
4635 if(h->sps.log2_max_frame_num == 0){
4636 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4640 if(h->dequant_coeff_pps != pps_id){
4641 h->dequant_coeff_pps = pps_id;
4642 init_dequant_tables(h);
4645 s->mb_width= h->sps.mb_width;
4646 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4648 h->b_stride= s->mb_width*4;
4649 h->b8_stride= s->mb_width*2;
4651 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4652 if(h->sps.frame_mbs_only_flag)
4653 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4655 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4657 if (s->context_initialized
4658 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4662 if (!s->context_initialized) {
4663 if (MPV_common_init(s) < 0)
4666 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4667 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4668 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4671 for(i=0; i<16; i++){
4672 #define T(x) (x>>2) | ((x<<2) & 0xF)
4673 h->zigzag_scan[i] = T(zigzag_scan[i]);
4674 h-> field_scan[i] = T( field_scan[i]);
4678 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4679 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4680 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4681 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4682 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4685 for(i=0; i<64; i++){
4686 #define T(x) (x>>3) | ((x&7)<<3)
4687 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4688 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4689 h->field_scan8x8[i] = T(field_scan8x8[i]);
4690 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4694 if(h->sps.transform_bypass){ //FIXME same ugly
4695 h->zigzag_scan_q0 = zigzag_scan;
4696 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4697 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4698 h->field_scan_q0 = field_scan;
4699 h->field_scan8x8_q0 = field_scan8x8;
4700 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4702 h->zigzag_scan_q0 = h->zigzag_scan;
4703 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4704 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4705 h->field_scan_q0 = h->field_scan;
4706 h->field_scan8x8_q0 = h->field_scan8x8;
4707 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4712 s->avctx->width = s->width;
4713 s->avctx->height = s->height;
4714 s->avctx->sample_aspect_ratio= h->sps.sar;
4715 if(!s->avctx->sample_aspect_ratio.den)
4716 s->avctx->sample_aspect_ratio.den = 1;
4718 if(h->sps.timing_info_present_flag){
4719 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4720 if(h->x264_build > 0 && h->x264_build < 44)
4721 s->avctx->time_base.den *= 2;
4722 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4723 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4727 if(h->slice_num == 0){
4728 if(frame_start(h) < 0)
4732 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4733 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4736 h->mb_aff_frame = 0;
4737 if(h->sps.frame_mbs_only_flag){
4738 s->picture_structure= PICT_FRAME;
4740 if(get_bits1(&s->gb)) { //field_pic_flag
4741 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4742 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4744 s->picture_structure= PICT_FRAME;
4745 h->mb_aff_frame = h->sps.mb_aff;
4749 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4750 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4751 if(s->mb_y >= s->mb_height){
4755 if(s->picture_structure==PICT_FRAME){
4756 h->curr_pic_num= h->frame_num;
4757 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4759 h->curr_pic_num= 2*h->frame_num;
4760 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4763 if(h->nal_unit_type == NAL_IDR_SLICE){
4764 get_ue_golomb(&s->gb); /* idr_pic_id */
4767 if(h->sps.poc_type==0){
4768 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4770 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4771 h->delta_poc_bottom= get_se_golomb(&s->gb);
4775 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4776 h->delta_poc[0]= get_se_golomb(&s->gb);
4778 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4779 h->delta_poc[1]= get_se_golomb(&s->gb);
4784 if(h->pps.redundant_pic_cnt_present){
4785 h->redundant_pic_count= get_ue_golomb(&s->gb);
4788 //set defaults, might be overriden a few line later
4789 h->ref_count[0]= h->pps.ref_count[0];
4790 h->ref_count[1]= h->pps.ref_count[1];
4792 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4793 if(h->slice_type == B_TYPE){
4794 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4795 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4796 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4798 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4800 if(num_ref_idx_active_override_flag){
4801 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4802 if(h->slice_type==B_TYPE)
4803 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4805 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4806 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4812 if(!default_ref_list_done){
4813 fill_default_ref_list(h);
4816 if(decode_ref_pic_list_reordering(h) < 0)
4819 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4820 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4821 pred_weight_table(h);
4822 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4823 implicit_weight_table(h);
4827 if(s->current_picture.reference)
4828 decode_ref_pic_marking(h);
4831 fill_mbaff_ref_list(h);
4833 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4834 h->cabac_init_idc = get_ue_golomb(&s->gb);
4836 h->last_qscale_diff = 0;
4837 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4838 if(s->qscale<0 || s->qscale>51){
4839 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4842 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4843 //FIXME qscale / qp ... stuff
4844 if(h->slice_type == SP_TYPE){
4845 get_bits1(&s->gb); /* sp_for_switch_flag */
4847 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4848 get_se_golomb(&s->gb); /* slice_qs_delta */
4851 h->deblocking_filter = 1;
4852 h->slice_alpha_c0_offset = 0;
4853 h->slice_beta_offset = 0;
4854 if( h->pps.deblocking_filter_parameters_present ) {
4855 h->deblocking_filter= get_ue_golomb(&s->gb);
4856 if(h->deblocking_filter < 2)
4857 h->deblocking_filter^= 1; // 1<->0
4859 if( h->deblocking_filter ) {
4860 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4861 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4864 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4865 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4866 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4867 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4868 h->deblocking_filter= 0;
4871 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4872 slice_group_change_cycle= get_bits(&s->gb, ?);
4877 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4878 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4880 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4881 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4883 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4885 av_get_pict_type_char(h->slice_type),
4886 pps_id, h->frame_num,
4887 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4888 h->ref_count[0], h->ref_count[1],
4890 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4892 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4896 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
4897 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4898 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4900 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4901 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4910 static inline int get_level_prefix(GetBitContext *gb){
4914 OPEN_READER(re, gb);
4915 UPDATE_CACHE(re, gb);
4916 buf=GET_CACHE(re, gb);
4918 log= 32 - av_log2(buf);
4920 print_bin(buf>>(32-log), log);
4921 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4924 LAST_SKIP_BITS(re, gb, log);
4925 CLOSE_READER(re, gb);
4930 static inline int get_dct8x8_allowed(H264Context *h){
4933 if(!IS_SUB_8X8(h->sub_mb_type[i])
4934 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4941 * decodes a residual block.
4942 * @param n block index
4943 * @param scantable scantable
4944 * @param max_coeff number of coefficients in the block
4945 * @return <0 if an error occured
4947 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4948 MpegEncContext * const s = &h->s;
4949 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4951 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4953 //FIXME put trailing_onex into the context
4955 if(n == CHROMA_DC_BLOCK_INDEX){
4956 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4957 total_coeff= coeff_token>>2;
4959 if(n == LUMA_DC_BLOCK_INDEX){
4960 total_coeff= pred_non_zero_count(h, 0);
4961 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4962 total_coeff= coeff_token>>2;
4964 total_coeff= pred_non_zero_count(h, n);
4965 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4966 total_coeff= coeff_token>>2;
4967 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4971 //FIXME set last_non_zero?
4976 trailing_ones= coeff_token&3;
4977 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4978 assert(total_coeff<=16);
4980 for(i=0; i<trailing_ones; i++){
4981 level[i]= 1 - 2*get_bits1(gb);
4985 int level_code, mask;
4986 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4987 int prefix= get_level_prefix(gb);
4989 //first coefficient has suffix_length equal to 0 or 1
4990 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4992 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4994 level_code= (prefix<<suffix_length); //part
4995 }else if(prefix==14){
4997 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4999 level_code= prefix + get_bits(gb, 4); //part
5000 }else if(prefix==15){
5001 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
5002 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
5004 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
5008 if(trailing_ones < 3) level_code += 2;
5013 mask= -(level_code&1);
5014 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5017 //remaining coefficients have suffix_length > 0
5018 for(;i<total_coeff;i++) {
5019 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
5020 prefix = get_level_prefix(gb);
5022 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
5023 }else if(prefix==15){
5024 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
5026 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
5029 mask= -(level_code&1);
5030 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5031 if(level_code > suffix_limit[suffix_length])
5036 if(total_coeff == max_coeff)
5039 if(n == CHROMA_DC_BLOCK_INDEX)
5040 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
5042 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
5045 coeff_num = zeros_left + total_coeff - 1;
5046 j = scantable[coeff_num];
5048 block[j] = level[0];
5049 for(i=1;i<total_coeff;i++) {
5052 else if(zeros_left < 7){
5053 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5055 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5057 zeros_left -= run_before;
5058 coeff_num -= 1 + run_before;
5059 j= scantable[ coeff_num ];
5064 block[j] = (level[0] * qmul[j] + 32)>>6;
5065 for(i=1;i<total_coeff;i++) {
5068 else if(zeros_left < 7){
5069 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5071 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5073 zeros_left -= run_before;
5074 coeff_num -= 1 + run_before;
5075 j= scantable[ coeff_num ];
5077 block[j]= (level[i] * qmul[j] + 32)>>6;
5082 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
5089 static void predict_field_decoding_flag(H264Context *h){
5090 MpegEncContext * const s = &h->s;
5091 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5092 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
5093 ? s->current_picture.mb_type[mb_xy-1]
5094 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
5095 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
5097 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
5101 * decodes a P_SKIP or B_SKIP macroblock
5103 static void decode_mb_skip(H264Context *h){
5104 MpegEncContext * const s = &h->s;
5105 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5108 memset(h->non_zero_count[mb_xy], 0, 16);
5109 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
5112 mb_type|= MB_TYPE_INTERLACED;
5114 if( h->slice_type == B_TYPE )
5116 // just for fill_caches. pred_direct_motion will set the real mb_type
5117 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
5119 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5120 pred_direct_motion(h, &mb_type);
5121 mb_type|= MB_TYPE_SKIP;
5126 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
5128 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5129 pred_pskip_motion(h, &mx, &my);
5130 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
5131 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
5134 write_back_motion(h, mb_type);
5135 s->current_picture.mb_type[mb_xy]= mb_type;
5136 s->current_picture.qscale_table[mb_xy]= s->qscale;
5137 h->slice_table[ mb_xy ]= h->slice_num;
5138 h->prev_mb_skipped= 1;
5142 * decodes a macroblock
5143 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5145 static int decode_mb_cavlc(H264Context *h){
5146 MpegEncContext * const s = &h->s;
5147 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5148 int mb_type, partition_count, cbp;
5149 int dct8x8_allowed= h->pps.transform_8x8_mode;
5151 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
5153 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5154 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
5156 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
5157 if(s->mb_skip_run==-1)
5158 s->mb_skip_run= get_ue_golomb(&s->gb);
5160 if (s->mb_skip_run--) {
5161 if(FRAME_MBAFF && (s->mb_y&1) == 0){
5162 if(s->mb_skip_run==0)
5163 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5165 predict_field_decoding_flag(h);
5172 if( (s->mb_y&1) == 0 )
5173 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5175 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5177 h->prev_mb_skipped= 0;
5179 mb_type= get_ue_golomb(&s->gb);
5180 if(h->slice_type == B_TYPE){
5182 partition_count= b_mb_type_info[mb_type].partition_count;
5183 mb_type= b_mb_type_info[mb_type].type;
5186 goto decode_intra_mb;
5188 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
5190 partition_count= p_mb_type_info[mb_type].partition_count;
5191 mb_type= p_mb_type_info[mb_type].type;
5194 goto decode_intra_mb;
5197 assert(h->slice_type == I_TYPE);
5200 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
5204 cbp= i_mb_type_info[mb_type].cbp;
5205 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5206 mb_type= i_mb_type_info[mb_type].type;
5210 mb_type |= MB_TYPE_INTERLACED;
5212 h->slice_table[ mb_xy ]= h->slice_num;
5214 if(IS_INTRA_PCM(mb_type)){
5217 // we assume these blocks are very rare so we dont optimize it
5218 align_get_bits(&s->gb);
5220 // The pixels are stored in the same order as levels in h->mb array.
5221 for(y=0; y<16; y++){
5222 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5223 for(x=0; x<16; x++){
5224 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5225 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
5229 const int index= 256 + 4*(y&3) + 32*(y>>2);
5231 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5232 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5236 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5238 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5239 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5243 // In deblocking, the quantizer is 0
5244 s->current_picture.qscale_table[mb_xy]= 0;
5245 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5246 // All coeffs are present
5247 memset(h->non_zero_count[mb_xy], 16, 16);
5249 s->current_picture.mb_type[mb_xy]= mb_type;
5254 h->ref_count[0] <<= 1;
5255 h->ref_count[1] <<= 1;
5258 fill_caches(h, mb_type, 0);
5261 if(IS_INTRA(mb_type)){
5262 // init_top_left_availability(h);
5263 if(IS_INTRA4x4(mb_type)){
5266 if(dct8x8_allowed && get_bits1(&s->gb)){
5267 mb_type |= MB_TYPE_8x8DCT;
5271 // fill_intra4x4_pred_table(h);
5272 for(i=0; i<16; i+=di){
5273 int mode= pred_intra_mode(h, i);
5275 if(!get_bits1(&s->gb)){
5276 const int rem_mode= get_bits(&s->gb, 3);
5277 mode = rem_mode + (rem_mode >= mode);
5281 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5283 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
5285 write_back_intra_pred_mode(h);
5286 if( check_intra4x4_pred_mode(h) < 0)
5289 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
5290 if(h->intra16x16_pred_mode < 0)
5293 h->chroma_pred_mode= get_ue_golomb(&s->gb);
5295 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
5296 if(h->chroma_pred_mode < 0)
5298 }else if(partition_count==4){
5299 int i, j, sub_partition_count[4], list, ref[2][4];
5301 if(h->slice_type == B_TYPE){
5303 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5304 if(h->sub_mb_type[i] >=13){
5305 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5308 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5309 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5311 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5312 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5313 pred_direct_motion(h, &mb_type);
5314 h->ref_cache[0][scan8[4]] =
5315 h->ref_cache[1][scan8[4]] =
5316 h->ref_cache[0][scan8[12]] =
5317 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5320 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
5322 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5323 if(h->sub_mb_type[i] >=4){
5324 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5327 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5328 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5332 for(list=0; list<2; list++){
5333 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5334 if(ref_count == 0) continue;
5336 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5337 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5338 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
5347 dct8x8_allowed = get_dct8x8_allowed(h);
5349 for(list=0; list<2; list++){
5350 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5351 if(ref_count == 0) continue;
5354 if(IS_DIRECT(h->sub_mb_type[i])) {
5355 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
5358 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
5359 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5361 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5362 const int sub_mb_type= h->sub_mb_type[i];
5363 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5364 for(j=0; j<sub_partition_count[i]; j++){
5366 const int index= 4*i + block_width*j;
5367 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5368 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
5369 mx += get_se_golomb(&s->gb);
5370 my += get_se_golomb(&s->gb);
5371 tprintf("final mv:%d %d\n", mx, my);
5373 if(IS_SUB_8X8(sub_mb_type)){
5374 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5375 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5376 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5377 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5378 }else if(IS_SUB_8X4(sub_mb_type)){
5379 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5380 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5381 }else if(IS_SUB_4X8(sub_mb_type)){
5382 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5383 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5385 assert(IS_SUB_4X4(sub_mb_type));
5386 mv_cache[ 0 ][0]= mx;
5387 mv_cache[ 0 ][1]= my;
5391 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5397 }else if(IS_DIRECT(mb_type)){
5398 pred_direct_motion(h, &mb_type);
5399 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5401 int list, mx, my, i;
5402 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5403 if(IS_16X16(mb_type)){
5404 for(list=0; list<2; list++){
5405 if(h->ref_count[list]>0){
5406 if(IS_DIR(mb_type, 0, list)){
5407 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5408 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5410 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
5413 for(list=0; list<2; list++){
5414 if(IS_DIR(mb_type, 0, list)){
5415 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5416 mx += get_se_golomb(&s->gb);
5417 my += get_se_golomb(&s->gb);
5418 tprintf("final mv:%d %d\n", mx, my);
5420 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5422 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5425 else if(IS_16X8(mb_type)){
5426 for(list=0; list<2; list++){
5427 if(h->ref_count[list]>0){
5429 if(IS_DIR(mb_type, i, list)){
5430 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5431 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5433 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5437 for(list=0; list<2; list++){
5439 if(IS_DIR(mb_type, i, list)){
5440 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5441 mx += get_se_golomb(&s->gb);
5442 my += get_se_golomb(&s->gb);
5443 tprintf("final mv:%d %d\n", mx, my);
5445 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5447 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5451 assert(IS_8X16(mb_type));
5452 for(list=0; list<2; list++){
5453 if(h->ref_count[list]>0){
5455 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5456 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5457 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5459 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5463 for(list=0; list<2; list++){
5465 if(IS_DIR(mb_type, i, list)){
5466 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5467 mx += get_se_golomb(&s->gb);
5468 my += get_se_golomb(&s->gb);
5469 tprintf("final mv:%d %d\n", mx, my);
5471 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5473 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5479 if(IS_INTER(mb_type))
5480 write_back_motion(h, mb_type);
5482 if(!IS_INTRA16x16(mb_type)){
5483 cbp= get_ue_golomb(&s->gb);
5485 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5489 if(IS_INTRA4x4(mb_type))
5490 cbp= golomb_to_intra4x4_cbp[cbp];
5492 cbp= golomb_to_inter_cbp[cbp];
5496 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5497 if(get_bits1(&s->gb))
5498 mb_type |= MB_TYPE_8x8DCT;
5500 s->current_picture.mb_type[mb_xy]= mb_type;
5502 if(cbp || IS_INTRA16x16(mb_type)){
5503 int i8x8, i4x4, chroma_idx;
5504 int chroma_qp, dquant;
5505 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5506 const uint8_t *scan, *scan8x8, *dc_scan;
5508 // fill_non_zero_count_cache(h);
5510 if(IS_INTERLACED(mb_type)){
5511 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5512 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5513 dc_scan= luma_dc_field_scan;
5515 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5516 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5517 dc_scan= luma_dc_zigzag_scan;
5520 dquant= get_se_golomb(&s->gb);
5522 if( dquant > 25 || dquant < -26 ){
5523 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5527 s->qscale += dquant;
5528 if(((unsigned)s->qscale) > 51){
5529 if(s->qscale<0) s->qscale+= 52;
5530 else s->qscale-= 52;
5533 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5534 if(IS_INTRA16x16(mb_type)){
5535 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5536 return -1; //FIXME continue if partitioned and other return -1 too
5539 assert((cbp&15) == 0 || (cbp&15) == 15);
5542 for(i8x8=0; i8x8<4; i8x8++){
5543 for(i4x4=0; i4x4<4; i4x4++){
5544 const int index= i4x4 + 4*i8x8;
5545 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5551 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5554 for(i8x8=0; i8x8<4; i8x8++){
5555 if(cbp & (1<<i8x8)){
5556 if(IS_8x8DCT(mb_type)){
5557 DCTELEM *buf = &h->mb[64*i8x8];
5559 for(i4x4=0; i4x4<4; i4x4++){
5560 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5561 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5564 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5565 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5567 for(i4x4=0; i4x4<4; i4x4++){
5568 const int index= i4x4 + 4*i8x8;
5570 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5576 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5577 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5583 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5584 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5590 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5591 for(i4x4=0; i4x4<4; i4x4++){
5592 const int index= 16 + 4*chroma_idx + i4x4;
5593 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5599 uint8_t * const nnz= &h->non_zero_count_cache[0];
5600 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5601 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5604 uint8_t * const nnz= &h->non_zero_count_cache[0];
5605 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5606 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5607 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5609 s->current_picture.qscale_table[mb_xy]= s->qscale;
5610 write_back_non_zero_count(h);
5613 h->ref_count[0] >>= 1;
5614 h->ref_count[1] >>= 1;
5620 static int decode_cabac_field_decoding_flag(H264Context *h) {
5621 MpegEncContext * const s = &h->s;
5622 const int mb_x = s->mb_x;
5623 const int mb_y = s->mb_y & ~1;
5624 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5625 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5627 unsigned int ctx = 0;
5629 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5632 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5636 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5639 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5640 uint8_t *state= &h->cabac_state[ctx_base];
5644 MpegEncContext * const s = &h->s;
5645 const int mba_xy = h->left_mb_xy[0];
5646 const int mbb_xy = h->top_mb_xy;
5648 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5650 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5652 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5653 return 0; /* I4x4 */
5656 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5657 return 0; /* I4x4 */
5660 if( get_cabac_terminate( &h->cabac ) )
5661 return 25; /* PCM */
5663 mb_type = 1; /* I16x16 */
5664 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5665 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5666 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5667 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5668 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5672 static int decode_cabac_mb_type( H264Context *h ) {
5673 MpegEncContext * const s = &h->s;
5675 if( h->slice_type == I_TYPE ) {
5676 return decode_cabac_intra_mb_type(h, 3, 1);
5677 } else if( h->slice_type == P_TYPE ) {
5678 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5680 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5681 /* P_L0_D16x16, P_8x8 */
5682 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5684 /* P_L0_D8x16, P_L0_D16x8 */
5685 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5688 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5690 } else if( h->slice_type == B_TYPE ) {
5691 const int mba_xy = h->left_mb_xy[0];
5692 const int mbb_xy = h->top_mb_xy;
5696 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5698 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5701 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5702 return 0; /* B_Direct_16x16 */
5704 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5705 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5708 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5709 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5710 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5711 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5713 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5714 else if( bits == 13 ) {
5715 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5716 } else if( bits == 14 )
5717 return 11; /* B_L1_L0_8x16 */
5718 else if( bits == 15 )
5719 return 22; /* B_8x8 */
5721 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5722 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5724 /* TODO SI/SP frames? */
5729 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5730 MpegEncContext * const s = &h->s;
5734 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5735 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5738 && h->slice_table[mba_xy] == h->slice_num
5739 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5740 mba_xy += s->mb_stride;
5742 mbb_xy = mb_xy - s->mb_stride;
5744 && h->slice_table[mbb_xy] == h->slice_num
5745 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5746 mbb_xy -= s->mb_stride;
5748 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5750 int mb_xy = mb_x + mb_y*s->mb_stride;
5752 mbb_xy = mb_xy - s->mb_stride;
5755 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5757 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5760 if( h->slice_type == B_TYPE )
5762 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5765 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5768 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5771 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5772 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5773 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5775 if( mode >= pred_mode )
5781 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5782 const int mba_xy = h->left_mb_xy[0];
5783 const int mbb_xy = h->top_mb_xy;
5787 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5788 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5791 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5794 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5797 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5799 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5805 static const uint8_t block_idx_x[16] = {
5806 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5808 static const uint8_t block_idx_y[16] = {
5809 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5811 static const uint8_t block_idx_xy[4][4] = {
5818 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5823 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5825 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5828 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5833 x = block_idx_x[4*i8x8];
5834 y = block_idx_y[4*i8x8];
5838 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5839 cbp_a = h->left_cbp;
5840 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5846 /* No need to test for skip as we put 0 for skip block */
5847 /* No need to test for IPCM as we put 1 for IPCM block */
5849 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5850 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5855 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5856 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5860 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5866 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5870 cbp_a = (h->left_cbp>>4)&0x03;
5871 cbp_b = (h-> top_cbp>>4)&0x03;
5874 if( cbp_a > 0 ) ctx++;
5875 if( cbp_b > 0 ) ctx += 2;
5876 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5880 if( cbp_a == 2 ) ctx++;
5881 if( cbp_b == 2 ) ctx += 2;
5882 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5884 static int decode_cabac_mb_dqp( H264Context *h) {
5885 MpegEncContext * const s = &h->s;
5891 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5893 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5895 if( h->last_qscale_diff != 0 )
5898 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5904 if(val > 102) //prevent infinite loop
5911 return -(val + 1)/2;
5913 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5914 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5916 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5918 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5922 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5924 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5925 return 0; /* B_Direct_8x8 */
5926 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5927 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5929 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5930 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5931 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5934 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5935 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5939 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5940 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5943 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5944 int refa = h->ref_cache[list][scan8[n] - 1];
5945 int refb = h->ref_cache[list][scan8[n] - 8];
5949 if( h->slice_type == B_TYPE) {
5950 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5952 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5961 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5971 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5972 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5973 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5974 int ctxbase = (l == 0) ? 40 : 47;
5979 else if( amvd > 32 )
5984 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5989 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5997 while( get_cabac_bypass( &h->cabac ) ) {
6002 if( get_cabac_bypass( &h->cabac ) )
6006 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
6010 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
6015 nza = h->left_cbp&0x100;
6016 nzb = h-> top_cbp&0x100;
6017 } else if( cat == 1 || cat == 2 ) {
6018 nza = h->non_zero_count_cache[scan8[idx] - 1];
6019 nzb = h->non_zero_count_cache[scan8[idx] - 8];
6020 } else if( cat == 3 ) {
6021 nza = (h->left_cbp>>(6+idx))&0x01;
6022 nzb = (h-> top_cbp>>(6+idx))&0x01;
6025 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
6026 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
6035 return ctx + 4 * cat;
6038 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
6039 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
6040 static const int significant_coeff_flag_offset[2][6] = {
6041 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
6042 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
6044 static const int last_coeff_flag_offset[2][6] = {
6045 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
6046 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
6048 static const int coeff_abs_level_m1_offset[6] = {
6049 227+0, 227+10, 227+20, 227+30, 227+39, 426
6051 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
6052 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
6053 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
6054 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
6055 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
6056 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
6057 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
6058 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
6059 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
6061 static const uint8_t last_coeff_flag_offset_8x8[63] = {
6062 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6063 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
6064 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
6065 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
6071 int coeff_count = 0;
6074 int abslevelgt1 = 0;
6076 uint8_t *significant_coeff_ctx_base;
6077 uint8_t *last_coeff_ctx_base;
6078 uint8_t *abs_level_m1_ctx_base;
6080 /* cat: 0-> DC 16x16 n = 0
6081 * 1-> AC 16x16 n = luma4x4idx
6082 * 2-> Luma4x4 n = luma4x4idx
6083 * 3-> DC Chroma n = iCbCr
6084 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
6085 * 5-> Luma8x8 n = 4 * luma8x8idx
6088 /* read coded block flag */
6090 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
6091 if( cat == 1 || cat == 2 )
6092 h->non_zero_count_cache[scan8[n]] = 0;
6094 h->non_zero_count_cache[scan8[16+n]] = 0;
6100 significant_coeff_ctx_base = h->cabac_state
6101 + significant_coeff_flag_offset[MB_FIELD][cat];
6102 last_coeff_ctx_base = h->cabac_state
6103 + last_coeff_flag_offset[MB_FIELD][cat];
6104 abs_level_m1_ctx_base = h->cabac_state
6105 + coeff_abs_level_m1_offset[cat];
6108 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
6109 for(last= 0; last < coefs; last++) { \
6110 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
6111 if( get_cabac( &h->cabac, sig_ctx )) { \
6112 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
6113 index[coeff_count++] = last; \
6114 if( get_cabac( &h->cabac, last_ctx ) ) { \
6120 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
6121 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
6123 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
6125 if( last == max_coeff -1 ) {
6126 index[coeff_count++] = last;
6128 assert(coeff_count > 0);
6131 h->cbp_table[mb_xy] |= 0x100;
6132 else if( cat == 1 || cat == 2 )
6133 h->non_zero_count_cache[scan8[n]] = coeff_count;
6135 h->cbp_table[mb_xy] |= 0x40 << n;
6137 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
6140 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
6143 for( i = coeff_count - 1; i >= 0; i-- ) {
6144 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
6145 int j= scantable[index[i]];
6147 if( get_cabac( &h->cabac, ctx ) == 0 ) {
6149 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
6152 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6;
6153 else block[j] = ( qmul[j] + 32) >> 6;
6159 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
6160 while( coeff_abs < 15 && get_cabac( &h->cabac, ctx ) ) {
6164 if( coeff_abs >= 15 ) {
6166 while( get_cabac_bypass( &h->cabac ) ) {
6167 coeff_abs += 1 << j;
6172 if( get_cabac_bypass( &h->cabac ) )
6173 coeff_abs += 1 << j ;
6178 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
6179 else block[j] = coeff_abs;
6181 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
6182 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
6191 static void inline compute_mb_neighbors(H264Context *h)
6193 MpegEncContext * const s = &h->s;
6194 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
6195 h->top_mb_xy = mb_xy - s->mb_stride;
6196 h->left_mb_xy[0] = mb_xy - 1;
6198 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
6199 const int top_pair_xy = pair_xy - s->mb_stride;
6200 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
6201 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
6202 const int curr_mb_frame_flag = !MB_FIELD;
6203 const int bottom = (s->mb_y & 1);
6205 ? !curr_mb_frame_flag // bottom macroblock
6206 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
6208 h->top_mb_xy -= s->mb_stride;
6210 if (left_mb_frame_flag != curr_mb_frame_flag) {
6211 h->left_mb_xy[0] = pair_xy - 1;
6218 * decodes a macroblock
6219 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
6221 static int decode_mb_cabac(H264Context *h) {
6222 MpegEncContext * const s = &h->s;
6223 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
6224 int mb_type, partition_count, cbp = 0;
6225 int dct8x8_allowed= h->pps.transform_8x8_mode;
6227 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
6229 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
6230 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
6232 /* a skipped mb needs the aff flag from the following mb */
6233 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
6234 predict_field_decoding_flag(h);
6235 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
6236 skip = h->next_mb_skipped;
6238 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
6239 /* read skip flags */
6241 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
6242 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
6243 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
6244 if(h->next_mb_skipped)
6245 predict_field_decoding_flag(h);
6247 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6252 h->cbp_table[mb_xy] = 0;
6253 h->chroma_pred_mode_table[mb_xy] = 0;
6254 h->last_qscale_diff = 0;
6261 if( (s->mb_y&1) == 0 )
6263 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6265 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
6267 h->prev_mb_skipped = 0;
6269 compute_mb_neighbors(h);
6270 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
6271 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
6275 if( h->slice_type == B_TYPE ) {
6277 partition_count= b_mb_type_info[mb_type].partition_count;
6278 mb_type= b_mb_type_info[mb_type].type;
6281 goto decode_intra_mb;
6283 } else if( h->slice_type == P_TYPE ) {
6285 partition_count= p_mb_type_info[mb_type].partition_count;
6286 mb_type= p_mb_type_info[mb_type].type;
6289 goto decode_intra_mb;
6292 assert(h->slice_type == I_TYPE);
6294 partition_count = 0;
6295 cbp= i_mb_type_info[mb_type].cbp;
6296 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
6297 mb_type= i_mb_type_info[mb_type].type;
6300 mb_type |= MB_TYPE_INTERLACED;
6302 h->slice_table[ mb_xy ]= h->slice_num;
6304 if(IS_INTRA_PCM(mb_type)) {
6308 // We assume these blocks are very rare so we dont optimize it.
6309 // FIXME The two following lines get the bitstream position in the cabac
6310 // decode, I think it should be done by a function in cabac.h (or cabac.c).
6311 ptr= h->cabac.bytestream;
6312 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
6314 // The pixels are stored in the same order as levels in h->mb array.
6315 for(y=0; y<16; y++){
6316 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
6317 for(x=0; x<16; x++){
6318 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
6319 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
6323 const int index= 256 + 4*(y&3) + 32*(y>>2);
6325 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
6326 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6330 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
6332 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
6333 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6337 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6339 // All blocks are present
6340 h->cbp_table[mb_xy] = 0x1ef;
6341 h->chroma_pred_mode_table[mb_xy] = 0;
6342 // In deblocking, the quantizer is 0
6343 s->current_picture.qscale_table[mb_xy]= 0;
6344 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
6345 // All coeffs are present
6346 memset(h->non_zero_count[mb_xy], 16, 16);
6347 s->current_picture.mb_type[mb_xy]= mb_type;
6352 h->ref_count[0] <<= 1;
6353 h->ref_count[1] <<= 1;
6356 fill_caches(h, mb_type, 0);
6358 if( IS_INTRA( mb_type ) ) {
6360 if( IS_INTRA4x4( mb_type ) ) {
6361 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6362 mb_type |= MB_TYPE_8x8DCT;
6363 for( i = 0; i < 16; i+=4 ) {
6364 int pred = pred_intra_mode( h, i );
6365 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6366 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6369 for( i = 0; i < 16; i++ ) {
6370 int pred = pred_intra_mode( h, i );
6371 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6373 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6376 write_back_intra_pred_mode(h);
6377 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6379 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6380 if( h->intra16x16_pred_mode < 0 ) return -1;
6382 h->chroma_pred_mode_table[mb_xy] =
6383 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6385 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
6386 if( h->chroma_pred_mode < 0 ) return -1;
6387 } else if( partition_count == 4 ) {
6388 int i, j, sub_partition_count[4], list, ref[2][4];
6390 if( h->slice_type == B_TYPE ) {
6391 for( i = 0; i < 4; i++ ) {
6392 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6393 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6394 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6396 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
6397 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
6398 pred_direct_motion(h, &mb_type);
6399 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6400 for( i = 0; i < 4; i++ )
6401 if( IS_DIRECT(h->sub_mb_type[i]) )
6402 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6406 for( i = 0; i < 4; i++ ) {
6407 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6408 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6409 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6413 for( list = 0; list < 2; list++ ) {
6414 if( h->ref_count[list] > 0 ) {
6415 for( i = 0; i < 4; i++ ) {
6416 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6417 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6418 if( h->ref_count[list] > 1 )
6419 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6425 h->ref_cache[list][ scan8[4*i]+1 ]=
6426 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6432 dct8x8_allowed = get_dct8x8_allowed(h);
6434 for(list=0; list<2; list++){
6436 if(IS_DIRECT(h->sub_mb_type[i])){
6437 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6440 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6442 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6443 const int sub_mb_type= h->sub_mb_type[i];
6444 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6445 for(j=0; j<sub_partition_count[i]; j++){
6448 const int index= 4*i + block_width*j;
6449 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6450 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6451 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6453 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6454 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6455 tprintf("final mv:%d %d\n", mx, my);
6457 if(IS_SUB_8X8(sub_mb_type)){
6458 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
6459 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6460 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
6461 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6463 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
6464 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6465 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
6466 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6467 }else if(IS_SUB_8X4(sub_mb_type)){
6468 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
6469 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
6471 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
6472 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
6473 }else if(IS_SUB_4X8(sub_mb_type)){
6474 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
6475 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
6477 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
6478 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
6480 assert(IS_SUB_4X4(sub_mb_type));
6481 mv_cache[ 0 ][0]= mx;
6482 mv_cache[ 0 ][1]= my;
6484 mvd_cache[ 0 ][0]= mx - mpx;
6485 mvd_cache[ 0 ][1]= my - mpy;
6489 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6490 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6491 p[0] = p[1] = p[8] = p[9] = 0;
6492 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6496 } else if( IS_DIRECT(mb_type) ) {
6497 pred_direct_motion(h, &mb_type);
6498 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6499 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6500 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6502 int list, mx, my, i, mpx, mpy;
6503 if(IS_16X16(mb_type)){
6504 for(list=0; list<2; list++){
6505 if(IS_DIR(mb_type, 0, list)){
6506 if(h->ref_count[list] > 0 ){
6507 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6508 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6511 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6513 for(list=0; list<2; list++){
6514 if(IS_DIR(mb_type, 0, list)){
6515 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6517 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6518 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6519 tprintf("final mv:%d %d\n", mx, my);
6521 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6522 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6524 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6527 else if(IS_16X8(mb_type)){
6528 for(list=0; list<2; list++){
6529 if(h->ref_count[list]>0){
6531 if(IS_DIR(mb_type, i, list)){
6532 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6533 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6535 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6539 for(list=0; list<2; list++){
6541 if(IS_DIR(mb_type, i, list)){
6542 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6543 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6544 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6545 tprintf("final mv:%d %d\n", mx, my);
6547 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6548 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6550 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6551 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6556 assert(IS_8X16(mb_type));
6557 for(list=0; list<2; list++){
6558 if(h->ref_count[list]>0){
6560 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6561 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6562 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6564 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6568 for(list=0; list<2; list++){
6570 if(IS_DIR(mb_type, i, list)){
6571 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6572 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6573 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6575 tprintf("final mv:%d %d\n", mx, my);
6576 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6577 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6579 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6580 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6587 if( IS_INTER( mb_type ) ) {
6588 h->chroma_pred_mode_table[mb_xy] = 0;
6589 write_back_motion( h, mb_type );
6592 if( !IS_INTRA16x16( mb_type ) ) {
6593 cbp = decode_cabac_mb_cbp_luma( h );
6594 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6597 h->cbp_table[mb_xy] = h->cbp = cbp;
6599 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6600 if( decode_cabac_mb_transform_size( h ) )
6601 mb_type |= MB_TYPE_8x8DCT;
6603 s->current_picture.mb_type[mb_xy]= mb_type;
6605 if( cbp || IS_INTRA16x16( mb_type ) ) {
6606 const uint8_t *scan, *scan8x8, *dc_scan;
6609 if(IS_INTERLACED(mb_type)){
6610 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6611 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6612 dc_scan= luma_dc_field_scan;
6614 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6615 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6616 dc_scan= luma_dc_zigzag_scan;
6619 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6620 if( dqp == INT_MIN ){
6621 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6625 if(((unsigned)s->qscale) > 51){
6626 if(s->qscale<0) s->qscale+= 52;
6627 else s->qscale-= 52;
6629 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6631 if( IS_INTRA16x16( mb_type ) ) {
6633 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6634 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6637 for( i = 0; i < 16; i++ ) {
6638 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6639 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6643 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6647 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6648 if( cbp & (1<<i8x8) ) {
6649 if( IS_8x8DCT(mb_type) ) {
6650 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6651 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6654 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6655 const int index = 4*i8x8 + i4x4;
6656 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6658 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6660 //STOP_TIMER("decode_residual")
6663 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6664 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6671 for( c = 0; c < 2; c++ ) {
6672 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6673 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6680 for( c = 0; c < 2; c++ ) {
6681 for( i = 0; i < 4; i++ ) {
6682 const int index = 16 + 4 * c + i;
6683 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6684 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6689 uint8_t * const nnz= &h->non_zero_count_cache[0];
6690 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6691 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6694 uint8_t * const nnz= &h->non_zero_count_cache[0];
6695 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6696 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6697 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6698 h->last_qscale_diff = 0;
6701 s->current_picture.qscale_table[mb_xy]= s->qscale;
6702 write_back_non_zero_count(h);
6705 h->ref_count[0] >>= 1;
6706 h->ref_count[1] >>= 1;
6713 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6715 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6716 const int alpha = alpha_table[index_a];
6717 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6722 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6723 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6725 /* 16px edge length, because bS=4 is triggered by being at
6726 * the edge of an intra MB, so all 4 bS are the same */
6727 for( d = 0; d < 16; d++ ) {
6728 const int p0 = pix[-1];
6729 const int p1 = pix[-2];
6730 const int p2 = pix[-3];
6732 const int q0 = pix[0];
6733 const int q1 = pix[1];
6734 const int q2 = pix[2];
6736 if( FFABS( p0 - q0 ) < alpha &&
6737 FFABS( p1 - p0 ) < beta &&
6738 FFABS( q1 - q0 ) < beta ) {
6740 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6741 if( FFABS( p2 - p0 ) < beta)
6743 const int p3 = pix[-4];
6745 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6746 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6747 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6750 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6752 if( FFABS( q2 - q0 ) < beta)
6754 const int q3 = pix[3];
6756 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6757 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6758 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6761 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6765 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6766 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6768 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6774 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6776 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6777 const int alpha = alpha_table[index_a];
6778 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6783 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6784 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6786 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6790 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6792 for( i = 0; i < 16; i++, pix += stride) {
6798 int bS_index = (i >> 1);
6801 bS_index |= (i & 1);
6804 if( bS[bS_index] == 0 ) {
6808 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6809 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6810 alpha = alpha_table[index_a];
6811 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6813 if( bS[bS_index] < 4 ) {
6814 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6815 const int p0 = pix[-1];
6816 const int p1 = pix[-2];
6817 const int p2 = pix[-3];
6818 const int q0 = pix[0];
6819 const int q1 = pix[1];
6820 const int q2 = pix[2];
6822 if( FFABS( p0 - q0 ) < alpha &&
6823 FFABS( p1 - p0 ) < beta &&
6824 FFABS( q1 - q0 ) < beta ) {
6828 if( FFABS( p2 - p0 ) < beta ) {
6829 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6832 if( FFABS( q2 - q0 ) < beta ) {
6833 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6837 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6838 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6839 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6840 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6843 const int p0 = pix[-1];
6844 const int p1 = pix[-2];
6845 const int p2 = pix[-3];
6847 const int q0 = pix[0];
6848 const int q1 = pix[1];
6849 const int q2 = pix[2];
6851 if( FFABS( p0 - q0 ) < alpha &&
6852 FFABS( p1 - p0 ) < beta &&
6853 FFABS( q1 - q0 ) < beta ) {
6855 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6856 if( FFABS( p2 - p0 ) < beta)
6858 const int p3 = pix[-4];
6860 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6861 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6862 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6865 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6867 if( FFABS( q2 - q0 ) < beta)
6869 const int q3 = pix[3];
6871 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6872 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6873 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6876 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6880 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6881 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6883 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6888 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6890 for( i = 0; i < 8; i++, pix += stride) {
6898 if( bS[bS_index] == 0 ) {
6902 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6903 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6904 alpha = alpha_table[index_a];
6905 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6907 if( bS[bS_index] < 4 ) {
6908 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6909 const int p0 = pix[-1];
6910 const int p1 = pix[-2];
6911 const int q0 = pix[0];
6912 const int q1 = pix[1];
6914 if( FFABS( p0 - q0 ) < alpha &&
6915 FFABS( p1 - p0 ) < beta &&
6916 FFABS( q1 - q0 ) < beta ) {
6917 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6919 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6920 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6921 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6924 const int p0 = pix[-1];
6925 const int p1 = pix[-2];
6926 const int q0 = pix[0];
6927 const int q1 = pix[1];
6929 if( FFABS( p0 - q0 ) < alpha &&
6930 FFABS( p1 - p0 ) < beta &&
6931 FFABS( q1 - q0 ) < beta ) {
6933 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6934 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6935 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6941 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6943 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6944 const int alpha = alpha_table[index_a];
6945 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6946 const int pix_next = stride;
6951 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6952 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6954 /* 16px edge length, see filter_mb_edgev */
6955 for( d = 0; d < 16; d++ ) {
6956 const int p0 = pix[-1*pix_next];
6957 const int p1 = pix[-2*pix_next];
6958 const int p2 = pix[-3*pix_next];
6959 const int q0 = pix[0];
6960 const int q1 = pix[1*pix_next];
6961 const int q2 = pix[2*pix_next];
6963 if( FFABS( p0 - q0 ) < alpha &&
6964 FFABS( p1 - p0 ) < beta &&
6965 FFABS( q1 - q0 ) < beta ) {
6967 const int p3 = pix[-4*pix_next];
6968 const int q3 = pix[ 3*pix_next];
6970 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6971 if( FFABS( p2 - p0 ) < beta) {
6973 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6974 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6975 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6978 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6980 if( FFABS( q2 - q0 ) < beta) {
6982 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6983 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6984 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6987 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6991 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6992 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6994 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
7001 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
7003 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
7004 const int alpha = alpha_table[index_a];
7005 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
7010 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
7011 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
7013 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
7017 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7018 MpegEncContext * const s = &h->s;
7020 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
7022 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) {
7023 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
7026 assert(!FRAME_MBAFF);
7028 mb_xy = mb_x + mb_y*s->mb_stride;
7029 mb_type = s->current_picture.mb_type[mb_xy];
7030 qp = s->current_picture.qscale_table[mb_xy];
7031 qp0 = s->current_picture.qscale_table[mb_xy-1];
7032 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
7033 qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
7034 qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
7035 qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
7036 qp0 = (qp + qp0 + 1) >> 1;
7037 qp1 = (qp + qp1 + 1) >> 1;
7038 qpc0 = (qpc + qpc0 + 1) >> 1;
7039 qpc1 = (qpc + qpc1 + 1) >> 1;
7040 qp_thresh = 15 - h->slice_alpha_c0_offset;
7041 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
7042 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
7045 if( IS_INTRA(mb_type) ) {
7046 int16_t bS4[4] = {4,4,4,4};
7047 int16_t bS3[4] = {3,3,3,3};
7048 if( IS_8x8DCT(mb_type) ) {
7049 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7050 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7051 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7052 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7054 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7055 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
7056 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7057 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
7058 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7059 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
7060 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7061 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
7063 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
7064 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
7065 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
7066 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
7067 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7068 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
7069 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7070 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
7073 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
7074 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
7076 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
7078 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
7080 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
7081 (mb_type & MB_TYPE_16x8) ? 1 : 0;
7082 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
7083 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
7085 int step = IS_8x8DCT(mb_type) ? 2 : 1;
7086 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
7087 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
7088 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
7090 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
7091 bSv[0][0] = 0x0004000400040004ULL;
7092 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
7093 bSv[1][0] = 0x0004000400040004ULL;
7095 #define FILTER(hv,dir,edge)\
7096 if(bSv[dir][edge]) {\
7097 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
7099 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7100 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7106 } else if( IS_8x8DCT(mb_type) ) {
7125 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7126 MpegEncContext * const s = &h->s;
7127 const int mb_xy= mb_x + mb_y*s->mb_stride;
7128 const int mb_type = s->current_picture.mb_type[mb_xy];
7129 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
7130 int first_vertical_edge_done = 0;
7132 /* FIXME: A given frame may occupy more than one position in
7133 * the reference list. So ref2frm should be populated with
7134 * frame numbers, not indices. */
7135 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
7136 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7138 //for sufficiently low qp, filtering wouldn't do anything
7139 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
7141 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
7142 int qp = s->current_picture.qscale_table[mb_xy];
7144 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
7145 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
7151 // left mb is in picture
7152 && h->slice_table[mb_xy-1] != 255
7153 // and current and left pair do not have the same interlaced type
7154 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
7155 // and left mb is in the same slice if deblocking_filter == 2
7156 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
7157 /* First vertical edge is different in MBAFF frames
7158 * There are 8 different bS to compute and 2 different Qp
7160 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
7161 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
7165 int mb_qp, mbn0_qp, mbn1_qp;
7167 first_vertical_edge_done = 1;
7169 if( IS_INTRA(mb_type) )
7170 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
7172 for( i = 0; i < 8; i++ ) {
7173 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
7175 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
7177 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
7178 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
7179 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
7186 mb_qp = s->current_picture.qscale_table[mb_xy];
7187 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
7188 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
7189 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
7190 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7191 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
7192 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
7193 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7194 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
7197 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
7198 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7199 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
7200 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
7201 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
7203 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
7204 for( dir = 0; dir < 2; dir++ )
7207 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
7208 const int mbm_type = s->current_picture.mb_type[mbm_xy];
7209 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
7211 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
7212 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
7213 // how often to recheck mv-based bS when iterating between edges
7214 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
7215 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
7216 // how often to recheck mv-based bS when iterating along each edge
7217 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
7219 if (first_vertical_edge_done) {
7221 first_vertical_edge_done = 0;
7224 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
7227 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
7228 && !IS_INTERLACED(mb_type)
7229 && IS_INTERLACED(mbm_type)
7231 // This is a special case in the norm where the filtering must
7232 // be done twice (one each of the field) even if we are in a
7233 // frame macroblock.
7235 static const int nnz_idx[4] = {4,5,6,3};
7236 unsigned int tmp_linesize = 2 * linesize;
7237 unsigned int tmp_uvlinesize = 2 * uvlinesize;
7238 int mbn_xy = mb_xy - 2 * s->mb_stride;
7243 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
7244 if( IS_INTRA(mb_type) ||
7245 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
7246 bS[0] = bS[1] = bS[2] = bS[3] = 3;
7248 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
7249 for( i = 0; i < 4; i++ ) {
7250 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
7251 mbn_nnz[nnz_idx[i]] != 0 )
7257 // Do not use s->qscale as luma quantizer because it has not the same
7258 // value in IPCM macroblocks.
7259 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7260 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
7261 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7262 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
7263 chroma_qp = ( h->chroma_qp +
7264 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7265 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7266 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7273 for( edge = start; edge < edges; edge++ ) {
7274 /* mbn_xy: neighbor macroblock */
7275 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
7276 const int mbn_type = s->current_picture.mb_type[mbn_xy];
7280 if( (edge&1) && IS_8x8DCT(mb_type) )
7283 if( IS_INTRA(mb_type) ||
7284 IS_INTRA(mbn_type) ) {
7287 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
7288 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
7297 bS[0] = bS[1] = bS[2] = bS[3] = value;
7302 if( edge & mask_edge ) {
7303 bS[0] = bS[1] = bS[2] = bS[3] = 0;
7306 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
7307 bS[0] = bS[1] = bS[2] = bS[3] = 1;
7310 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
7311 int b_idx= 8 + 4 + edge * (dir ? 8:1);
7312 int bn_idx= b_idx - (dir ? 8:1);
7314 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
7315 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7316 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7317 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
7319 bS[0] = bS[1] = bS[2] = bS[3] = v;
7325 for( i = 0; i < 4; i++ ) {
7326 int x = dir == 0 ? edge : i;
7327 int y = dir == 0 ? i : edge;
7328 int b_idx= 8 + 4 + x + 8*y;
7329 int bn_idx= b_idx - (dir ? 8:1);
7331 if( h->non_zero_count_cache[b_idx] != 0 ||
7332 h->non_zero_count_cache[bn_idx] != 0 ) {
7338 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
7339 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7340 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7341 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7349 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7354 // Do not use s->qscale as luma quantizer because it has not the same
7355 // value in IPCM macroblocks.
7356 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7357 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7358 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7359 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7361 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7362 if( (edge&1) == 0 ) {
7363 int chroma_qp = ( h->chroma_qp +
7364 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7365 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
7366 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
7369 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7370 if( (edge&1) == 0 ) {
7371 int chroma_qp = ( h->chroma_qp +
7372 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7373 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7374 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7381 static int decode_slice(H264Context *h){
7382 MpegEncContext * const s = &h->s;
7383 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7387 if( h->pps.cabac ) {
7391 align_get_bits( &s->gb );
7394 ff_init_cabac_states( &h->cabac);
7395 ff_init_cabac_decoder( &h->cabac,
7396 s->gb.buffer + get_bits_count(&s->gb)/8,
7397 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7398 /* calculate pre-state */
7399 for( i= 0; i < 460; i++ ) {
7401 if( h->slice_type == I_TYPE )
7402 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7404 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7407 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
7409 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7414 int ret = decode_mb_cabac(h);
7416 //STOP_TIMER("decode_mb_cabac")
7418 if(ret>=0) hl_decode_mb(h);
7420 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7423 if(ret>=0) ret = decode_mb_cabac(h);
7425 if(ret>=0) hl_decode_mb(h);
7428 eos = get_cabac_terminate( &h->cabac );
7430 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
7431 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7432 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7436 if( ++s->mb_x >= s->mb_width ) {
7438 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7445 if( eos || s->mb_y >= s->mb_height ) {
7446 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7447 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7454 int ret = decode_mb_cavlc(h);
7456 if(ret>=0) hl_decode_mb(h);
7458 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7460 ret = decode_mb_cavlc(h);
7462 if(ret>=0) hl_decode_mb(h);
7467 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7468 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7473 if(++s->mb_x >= s->mb_width){
7475 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7480 if(s->mb_y >= s->mb_height){
7481 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7483 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7484 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7488 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7495 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7496 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7497 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7498 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7502 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7511 for(;s->mb_y < s->mb_height; s->mb_y++){
7512 for(;s->mb_x < s->mb_width; s->mb_x++){
7513 int ret= decode_mb(h);
7518 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7519 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7524 if(++s->mb_x >= s->mb_width){
7526 if(++s->mb_y >= s->mb_height){
7527 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7528 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7532 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7539 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7540 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7541 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7545 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7552 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7555 return -1; //not reached
7558 static int decode_unregistered_user_data(H264Context *h, int size){
7559 MpegEncContext * const s = &h->s;
7560 uint8_t user_data[16+256];
7566 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7567 user_data[i]= get_bits(&s->gb, 8);
7571 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7572 if(e==1 && build>=0)
7573 h->x264_build= build;
7575 if(s->avctx->debug & FF_DEBUG_BUGS)
7576 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7579 skip_bits(&s->gb, 8);
7584 static int decode_sei(H264Context *h){
7585 MpegEncContext * const s = &h->s;
7587 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7592 type+= show_bits(&s->gb, 8);
7593 }while(get_bits(&s->gb, 8) == 255);
7597 size+= show_bits(&s->gb, 8);
7598 }while(get_bits(&s->gb, 8) == 255);
7602 if(decode_unregistered_user_data(h, size) < 0)
7606 skip_bits(&s->gb, 8*size);
7609 //FIXME check bits here
7610 align_get_bits(&s->gb);
7616 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7617 MpegEncContext * const s = &h->s;
7619 cpb_count = get_ue_golomb(&s->gb) + 1;
7620 get_bits(&s->gb, 4); /* bit_rate_scale */
7621 get_bits(&s->gb, 4); /* cpb_size_scale */
7622 for(i=0; i<cpb_count; i++){
7623 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7624 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7625 get_bits1(&s->gb); /* cbr_flag */
7627 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7628 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7629 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7630 get_bits(&s->gb, 5); /* time_offset_length */
7633 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7634 MpegEncContext * const s = &h->s;
7635 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7636 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7638 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7640 if( aspect_ratio_info_present_flag ) {
7641 aspect_ratio_idc= get_bits(&s->gb, 8);
7642 if( aspect_ratio_idc == EXTENDED_SAR ) {
7643 sps->sar.num= get_bits(&s->gb, 16);
7644 sps->sar.den= get_bits(&s->gb, 16);
7645 }else if(aspect_ratio_idc < 14){
7646 sps->sar= pixel_aspect[aspect_ratio_idc];
7648 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7655 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7657 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7658 get_bits1(&s->gb); /* overscan_appropriate_flag */
7661 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7662 get_bits(&s->gb, 3); /* video_format */
7663 get_bits1(&s->gb); /* video_full_range_flag */
7664 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7665 get_bits(&s->gb, 8); /* colour_primaries */
7666 get_bits(&s->gb, 8); /* transfer_characteristics */
7667 get_bits(&s->gb, 8); /* matrix_coefficients */
7671 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7672 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7673 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7676 sps->timing_info_present_flag = get_bits1(&s->gb);
7677 if(sps->timing_info_present_flag){
7678 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7679 sps->time_scale = get_bits_long(&s->gb, 32);
7680 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7683 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7684 if(nal_hrd_parameters_present_flag)
7685 decode_hrd_parameters(h, sps);
7686 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7687 if(vcl_hrd_parameters_present_flag)
7688 decode_hrd_parameters(h, sps);
7689 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7690 get_bits1(&s->gb); /* low_delay_hrd_flag */
7691 get_bits1(&s->gb); /* pic_struct_present_flag */
7693 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7694 if(sps->bitstream_restriction_flag){
7695 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7696 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7697 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7698 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7699 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7700 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7701 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7707 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7708 const uint8_t *jvt_list, const uint8_t *fallback_list){
7709 MpegEncContext * const s = &h->s;
7710 int i, last = 8, next = 8;
7711 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7712 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7713 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7715 for(i=0;i<size;i++){
7717 next = (last + get_se_golomb(&s->gb)) & 0xff;
7718 if(!i && !next){ /* matrix not written, we use the preset one */
7719 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7722 last = factors[scan[i]] = next ? next : last;
7726 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7727 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7728 MpegEncContext * const s = &h->s;
7729 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7730 const uint8_t *fallback[4] = {
7731 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7732 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7733 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7734 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7736 if(get_bits1(&s->gb)){
7737 sps->scaling_matrix_present |= is_sps;
7738 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7739 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7740 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7741 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7742 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7743 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7744 if(is_sps || pps->transform_8x8_mode){
7745 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7746 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7748 } else if(fallback_sps) {
7749 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7750 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7754 static inline int decode_seq_parameter_set(H264Context *h){
7755 MpegEncContext * const s = &h->s;
7756 int profile_idc, level_idc;
7760 profile_idc= get_bits(&s->gb, 8);
7761 get_bits1(&s->gb); //constraint_set0_flag
7762 get_bits1(&s->gb); //constraint_set1_flag
7763 get_bits1(&s->gb); //constraint_set2_flag
7764 get_bits1(&s->gb); //constraint_set3_flag
7765 get_bits(&s->gb, 4); // reserved
7766 level_idc= get_bits(&s->gb, 8);
7767 sps_id= get_ue_golomb(&s->gb);
7769 sps= &h->sps_buffer[ sps_id ];
7770 sps->profile_idc= profile_idc;
7771 sps->level_idc= level_idc;
7773 if(sps->profile_idc >= 100){ //high profile
7774 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7775 get_bits1(&s->gb); //residual_color_transform_flag
7776 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7777 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7778 sps->transform_bypass = get_bits1(&s->gb);
7779 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7781 sps->scaling_matrix_present = 0;
7783 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7784 sps->poc_type= get_ue_golomb(&s->gb);
7786 if(sps->poc_type == 0){ //FIXME #define
7787 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7788 } else if(sps->poc_type == 1){//FIXME #define
7789 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7790 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7791 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7792 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7794 for(i=0; i<sps->poc_cycle_length; i++)
7795 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7797 if(sps->poc_type > 2){
7798 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7802 sps->ref_frame_count= get_ue_golomb(&s->gb);
7803 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7804 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7806 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7807 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7808 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7809 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7810 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7813 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7814 if(!sps->frame_mbs_only_flag)
7815 sps->mb_aff= get_bits1(&s->gb);
7819 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7821 #ifndef ALLOW_INTERLACE
7823 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7825 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7826 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7828 sps->crop= get_bits1(&s->gb);
7830 sps->crop_left = get_ue_golomb(&s->gb);
7831 sps->crop_right = get_ue_golomb(&s->gb);
7832 sps->crop_top = get_ue_golomb(&s->gb);
7833 sps->crop_bottom= get_ue_golomb(&s->gb);
7834 if(sps->crop_left || sps->crop_top){
7835 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7841 sps->crop_bottom= 0;
7844 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7845 if( sps->vui_parameters_present_flag )
7846 decode_vui_parameters(h, sps);
7848 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7849 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7850 sps_id, sps->profile_idc, sps->level_idc,
7852 sps->ref_frame_count,
7853 sps->mb_width, sps->mb_height,
7854 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7855 sps->direct_8x8_inference_flag ? "8B8" : "",
7856 sps->crop_left, sps->crop_right,
7857 sps->crop_top, sps->crop_bottom,
7858 sps->vui_parameters_present_flag ? "VUI" : ""
7864 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7865 MpegEncContext * const s = &h->s;
7866 int pps_id= get_ue_golomb(&s->gb);
7867 PPS *pps= &h->pps_buffer[pps_id];
7869 pps->sps_id= get_ue_golomb(&s->gb);
7870 pps->cabac= get_bits1(&s->gb);
7871 pps->pic_order_present= get_bits1(&s->gb);
7872 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7873 if(pps->slice_group_count > 1 ){
7874 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7875 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7876 switch(pps->mb_slice_group_map_type){
7879 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7880 | run_length[ i ] |1 |ue(v) |
7885 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7887 | top_left_mb[ i ] |1 |ue(v) |
7888 | bottom_right_mb[ i ] |1 |ue(v) |
7896 | slice_group_change_direction_flag |1 |u(1) |
7897 | slice_group_change_rate_minus1 |1 |ue(v) |
7902 | slice_group_id_cnt_minus1 |1 |ue(v) |
7903 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7905 | slice_group_id[ i ] |1 |u(v) |
7910 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7911 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7912 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7913 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7917 pps->weighted_pred= get_bits1(&s->gb);
7918 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7919 pps->init_qp= get_se_golomb(&s->gb) + 26;
7920 pps->init_qs= get_se_golomb(&s->gb) + 26;
7921 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7922 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7923 pps->constrained_intra_pred= get_bits1(&s->gb);
7924 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7926 pps->transform_8x8_mode= 0;
7927 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7928 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7929 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7931 if(get_bits_count(&s->gb) < bit_length){
7932 pps->transform_8x8_mode= get_bits1(&s->gb);
7933 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7934 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7937 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7938 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7939 pps_id, pps->sps_id,
7940 pps->cabac ? "CABAC" : "CAVLC",
7941 pps->slice_group_count,
7942 pps->ref_count[0], pps->ref_count[1],
7943 pps->weighted_pred ? "weighted" : "",
7944 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7945 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7946 pps->constrained_intra_pred ? "CONSTR" : "",
7947 pps->redundant_pic_cnt_present ? "REDU" : "",
7948 pps->transform_8x8_mode ? "8x8DCT" : ""
7956 * finds the end of the current frame in the bitstream.
7957 * @return the position of the first byte of the next frame, or -1
7959 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7962 ParseContext *pc = &(h->s.parse_context);
7963 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7964 // mb_addr= pc->mb_addr - 1;
7966 for(i=0; i<=buf_size; i++){
7967 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7968 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7969 if(pc->frame_start_found){
7970 // If there isn't one more byte in the buffer
7971 // the test on first_mb_in_slice cannot be done yet
7972 // do it at next call.
7973 if (i >= buf_size) break;
7974 if (buf[i] & 0x80) {
7975 // first_mb_in_slice is 0, probably the first nal of a new
7977 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7979 pc->frame_start_found= 0;
7983 pc->frame_start_found = 1;
7985 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7986 if(pc->frame_start_found){
7988 pc->frame_start_found= 0;
7993 state= (state<<8) | buf[i];
7997 return END_NOT_FOUND;
8000 #ifdef CONFIG_H264_PARSER
8001 static int h264_parse(AVCodecParserContext *s,
8002 AVCodecContext *avctx,
8003 uint8_t **poutbuf, int *poutbuf_size,
8004 const uint8_t *buf, int buf_size)
8006 H264Context *h = s->priv_data;
8007 ParseContext *pc = &h->s.parse_context;
8010 next= find_frame_end(h, buf, buf_size);
8012 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
8018 *poutbuf = (uint8_t *)buf;
8019 *poutbuf_size = buf_size;
8023 static int h264_split(AVCodecContext *avctx,
8024 const uint8_t *buf, int buf_size)
8027 uint32_t state = -1;
8030 for(i=0; i<=buf_size; i++){
8031 if((state&0xFFFFFF1F) == 0x107)
8033 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
8035 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
8037 while(i>4 && buf[i-5]==0) i--;
8042 state= (state<<8) | buf[i];
8046 #endif /* CONFIG_H264_PARSER */
8048 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
8049 MpegEncContext * const s = &h->s;
8050 AVCodecContext * const avctx= s->avctx;
8054 for(i=0; i<50; i++){
8055 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
8059 s->current_picture_ptr= NULL;
8068 if(buf_index >= buf_size) break;
8070 for(i = 0; i < h->nal_length_size; i++)
8071 nalsize = (nalsize << 8) | buf[buf_index++];
8077 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
8082 // start code prefix search
8083 for(; buf_index + 3 < buf_size; buf_index++){
8084 // this should allways succeed in the first iteration
8085 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
8089 if(buf_index+3 >= buf_size) break;
8094 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
8095 while(ptr[dst_length - 1] == 0 && dst_length > 1)
8097 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
8099 if(s->avctx->debug&FF_DEBUG_STARTCODE){
8100 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
8103 if (h->is_avc && (nalsize != consumed))
8104 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
8106 buf_index += consumed;
8108 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
8109 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
8112 switch(h->nal_unit_type){
8114 idr(h); //FIXME ensure we don't loose some frames if there is reordering
8116 init_get_bits(&s->gb, ptr, bit_length);
8118 h->inter_gb_ptr= &s->gb;
8119 s->data_partitioning = 0;
8121 if(decode_slice_header(h) < 0){
8122 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8125 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
8126 if(h->redundant_pic_count==0 && s->hurry_up < 5
8127 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8128 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8129 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8130 && avctx->skip_frame < AVDISCARD_ALL)
8134 init_get_bits(&s->gb, ptr, bit_length);
8136 h->inter_gb_ptr= NULL;
8137 s->data_partitioning = 1;
8139 if(decode_slice_header(h) < 0){
8140 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8144 init_get_bits(&h->intra_gb, ptr, bit_length);
8145 h->intra_gb_ptr= &h->intra_gb;
8148 init_get_bits(&h->inter_gb, ptr, bit_length);
8149 h->inter_gb_ptr= &h->inter_gb;
8151 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
8153 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8154 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8155 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8156 && avctx->skip_frame < AVDISCARD_ALL)
8160 init_get_bits(&s->gb, ptr, bit_length);
8164 init_get_bits(&s->gb, ptr, bit_length);
8165 decode_seq_parameter_set(h);
8167 if(s->flags& CODEC_FLAG_LOW_DELAY)
8170 if(avctx->has_b_frames < 2)
8171 avctx->has_b_frames= !s->low_delay;
8174 init_get_bits(&s->gb, ptr, bit_length);
8176 decode_picture_parameter_set(h, bit_length);
8180 case NAL_END_SEQUENCE:
8181 case NAL_END_STREAM:
8182 case NAL_FILLER_DATA:
8184 case NAL_AUXILIARY_SLICE:
8187 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
8191 if(!s->current_picture_ptr) return buf_index; //no frame
8193 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
8194 s->current_picture_ptr->pict_type= s->pict_type;
8196 h->prev_frame_num_offset= h->frame_num_offset;
8197 h->prev_frame_num= h->frame_num;
8198 if(s->current_picture_ptr->reference){
8199 h->prev_poc_msb= h->poc_msb;
8200 h->prev_poc_lsb= h->poc_lsb;
8202 if(s->current_picture_ptr->reference)
8203 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
8213 * returns the number of bytes consumed for building the current frame
8215 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
8216 if(s->flags&CODEC_FLAG_TRUNCATED){
8217 pos -= s->parse_context.last_index;
8218 if(pos<0) pos=0; // FIXME remove (unneeded?)
8222 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
8223 if(pos+10>buf_size) pos=buf_size; // oops ;)
8229 static int decode_frame(AVCodecContext *avctx,
8230 void *data, int *data_size,
8231 uint8_t *buf, int buf_size)
8233 H264Context *h = avctx->priv_data;
8234 MpegEncContext *s = &h->s;
8235 AVFrame *pict = data;
8238 s->flags= avctx->flags;
8239 s->flags2= avctx->flags2;
8241 /* no supplementary picture */
8242 if (buf_size == 0) {
8246 if(s->flags&CODEC_FLAG_TRUNCATED){
8247 int next= find_frame_end(h, buf, buf_size);
8249 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
8251 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
8254 if(h->is_avc && !h->got_avcC) {
8255 int i, cnt, nalsize;
8256 unsigned char *p = avctx->extradata;
8257 if(avctx->extradata_size < 7) {
8258 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
8262 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
8265 /* sps and pps in the avcC always have length coded with 2 bytes,
8266 so put a fake nal_length_size = 2 while parsing them */
8267 h->nal_length_size = 2;
8268 // Decode sps from avcC
8269 cnt = *(p+5) & 0x1f; // Number of sps
8271 for (i = 0; i < cnt; i++) {
8272 nalsize = BE_16(p) + 2;
8273 if(decode_nal_units(h, p, nalsize) < 0) {
8274 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
8279 // Decode pps from avcC
8280 cnt = *(p++); // Number of pps
8281 for (i = 0; i < cnt; i++) {
8282 nalsize = BE_16(p) + 2;
8283 if(decode_nal_units(h, p, nalsize) != nalsize) {
8284 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
8289 // Now store right nal length size, that will be use to parse all other nals
8290 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
8291 // Do not reparse avcC
8295 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
8296 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
8300 buf_index=decode_nal_units(h, buf, buf_size);
8304 //FIXME do something with unavailable reference frames
8306 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
8307 if(!s->current_picture_ptr){
8308 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
8313 Picture *out = s->current_picture_ptr;
8314 #if 0 //decode order
8315 *data_size = sizeof(AVFrame);
8317 /* Sort B-frames into display order */
8318 Picture *cur = s->current_picture_ptr;
8319 Picture *prev = h->delayed_output_pic;
8320 int i, pics, cross_idr, out_of_order, out_idx;
8322 if(h->sps.bitstream_restriction_flag
8323 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
8324 s->avctx->has_b_frames = h->sps.num_reorder_frames;
8329 while(h->delayed_pic[pics]) pics++;
8330 h->delayed_pic[pics++] = cur;
8331 if(cur->reference == 0)
8335 for(i=0; h->delayed_pic[i]; i++)
8336 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
8339 out = h->delayed_pic[0];
8341 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8342 if(h->delayed_pic[i]->poc < out->poc){
8343 out = h->delayed_pic[i];
8347 out_of_order = !cross_idr && prev && out->poc < prev->poc;
8348 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
8350 else if(prev && pics <= s->avctx->has_b_frames)
8352 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8354 ((!cross_idr && prev && out->poc > prev->poc + 2)
8355 || cur->pict_type == B_TYPE)))
8358 s->avctx->has_b_frames++;
8361 else if(out_of_order)
8364 if(out_of_order || pics > s->avctx->has_b_frames){
8365 for(i=out_idx; h->delayed_pic[i]; i++)
8366 h->delayed_pic[i] = h->delayed_pic[i+1];
8372 *data_size = sizeof(AVFrame);
8373 if(prev && prev != out && prev->reference == 1)
8374 prev->reference = 0;
8375 h->delayed_output_pic = out;
8379 *pict= *(AVFrame*)out;
8381 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8384 assert(pict->data[0] || !*data_size);
8385 ff_print_debug_info(s, pict);
8386 //printf("out %d\n", (int)pict->data[0]);
8389 /* Return the Picture timestamp as the frame number */
8390 /* we substract 1 because it is added on utils.c */
8391 avctx->frame_number = s->picture_number - 1;
8393 return get_consumed_bytes(s, buf_index, buf_size);
8396 static inline void fill_mb_avail(H264Context *h){
8397 MpegEncContext * const s = &h->s;
8398 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8401 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8402 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8403 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8409 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8410 h->mb_avail[4]= 1; //FIXME move out
8411 h->mb_avail[5]= 0; //FIXME move out
8417 #define SIZE (COUNT*40)
8423 // int int_temp[10000];
8425 AVCodecContext avctx;
8427 dsputil_init(&dsp, &avctx);
8429 init_put_bits(&pb, temp, SIZE);
8430 printf("testing unsigned exp golomb\n");
8431 for(i=0; i<COUNT; i++){
8433 set_ue_golomb(&pb, i);
8434 STOP_TIMER("set_ue_golomb");
8436 flush_put_bits(&pb);
8438 init_get_bits(&gb, temp, 8*SIZE);
8439 for(i=0; i<COUNT; i++){
8442 s= show_bits(&gb, 24);
8445 j= get_ue_golomb(&gb);
8447 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8450 STOP_TIMER("get_ue_golomb");
8454 init_put_bits(&pb, temp, SIZE);
8455 printf("testing signed exp golomb\n");
8456 for(i=0; i<COUNT; i++){
8458 set_se_golomb(&pb, i - COUNT/2);
8459 STOP_TIMER("set_se_golomb");
8461 flush_put_bits(&pb);
8463 init_get_bits(&gb, temp, 8*SIZE);
8464 for(i=0; i<COUNT; i++){
8467 s= show_bits(&gb, 24);
8470 j= get_se_golomb(&gb);
8471 if(j != i - COUNT/2){
8472 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8475 STOP_TIMER("get_se_golomb");
8478 printf("testing 4x4 (I)DCT\n");
8481 uint8_t src[16], ref[16];
8482 uint64_t error= 0, max_error=0;
8484 for(i=0; i<COUNT; i++){
8486 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8487 for(j=0; j<16; j++){
8488 ref[j]= random()%255;
8489 src[j]= random()%255;
8492 h264_diff_dct_c(block, src, ref, 4);
8495 for(j=0; j<16; j++){
8496 // printf("%d ", block[j]);
8497 block[j]= block[j]*4;
8498 if(j&1) block[j]= (block[j]*4 + 2)/5;
8499 if(j&4) block[j]= (block[j]*4 + 2)/5;
8503 s->dsp.h264_idct_add(ref, block, 4);
8504 /* for(j=0; j<16; j++){
8505 printf("%d ", ref[j]);
8509 for(j=0; j<16; j++){
8510 int diff= FFABS(src[j] - ref[j]);
8513 max_error= FFMAX(max_error, diff);
8516 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8518 printf("testing quantizer\n");
8519 for(qp=0; qp<52; qp++){
8521 src1_block[i]= src2_block[i]= random()%255;
8525 printf("Testing NAL layer\n");
8527 uint8_t bitstream[COUNT];
8528 uint8_t nal[COUNT*2];
8530 memset(&h, 0, sizeof(H264Context));
8532 for(i=0; i<COUNT; i++){
8540 for(j=0; j<COUNT; j++){
8541 bitstream[j]= (random() % 255) + 1;
8544 for(j=0; j<zeros; j++){
8545 int pos= random() % COUNT;
8546 while(bitstream[pos] == 0){
8555 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8557 printf("encoding failed\n");
8561 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8565 if(out_length != COUNT){
8566 printf("incorrect length %d %d\n", out_length, COUNT);
8570 if(consumed != nal_length){
8571 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8575 if(memcmp(bitstream, out, COUNT)){
8576 printf("missmatch\n");
8581 printf("Testing RBSP\n");
8589 static int decode_end(AVCodecContext *avctx)
8591 H264Context *h = avctx->priv_data;
8592 MpegEncContext *s = &h->s;
8594 av_freep(&h->rbsp_buffer);
8595 free_tables(h); //FIXME cleanup init stuff perhaps
8598 // memset(h, 0, sizeof(H264Context));
8604 AVCodec h264_decoder = {
8608 sizeof(H264Context),
8613 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8617 #ifdef CONFIG_H264_PARSER
8618 AVCodecParser h264_parser = {
8620 sizeof(H264Context),