2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * H.264 / AVC / MPEG4 part10 codec.
26 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
41 #define interlaced_dct interlaced_dct_is_a_bad_name
42 #define mb_intra mb_intra_isnt_initalized_see_mb_type
44 #define LUMA_DC_BLOCK_INDEX 25
45 #define CHROMA_DC_BLOCK_INDEX 26
47 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
48 #define COEFF_TOKEN_VLC_BITS 8
49 #define TOTAL_ZEROS_VLC_BITS 9
50 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
51 #define RUN_VLC_BITS 3
52 #define RUN7_VLC_BITS 6
54 #define MAX_SPS_COUNT 32
55 #define MAX_PPS_COUNT 256
57 #define MAX_MMCO_COUNT 66
59 /* Compiling in interlaced support reduces the speed
60 * of progressive decoding by about 2%. */
61 #define ALLOW_INTERLACE
63 #ifdef ALLOW_INTERLACE
64 #define MB_MBAFF h->mb_mbaff
65 #define MB_FIELD h->mb_field_decoding_flag
66 #define FRAME_MBAFF h->mb_aff_frame
72 #define IS_INTERLACED(mb_type) 0
76 * Sequence parameter set
82 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
83 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
84 int poc_type; ///< pic_order_cnt_type
85 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
86 int delta_pic_order_always_zero_flag;
87 int offset_for_non_ref_pic;
88 int offset_for_top_to_bottom_field;
89 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
90 int ref_frame_count; ///< num_ref_frames
91 int gaps_in_frame_num_allowed_flag;
92 int mb_width; ///< frame_width_in_mbs_minus1 + 1
93 int mb_height; ///< frame_height_in_mbs_minus1 + 1
94 int frame_mbs_only_flag;
95 int mb_aff; ///<mb_adaptive_frame_field_flag
96 int direct_8x8_inference_flag;
97 int crop; ///< frame_cropping_flag
98 int crop_left; ///< frame_cropping_rect_left_offset
99 int crop_right; ///< frame_cropping_rect_right_offset
100 int crop_top; ///< frame_cropping_rect_top_offset
101 int crop_bottom; ///< frame_cropping_rect_bottom_offset
102 int vui_parameters_present_flag;
104 int timing_info_present_flag;
105 uint32_t num_units_in_tick;
107 int fixed_frame_rate_flag;
108 short offset_for_ref_frame[256]; //FIXME dyn aloc?
109 int bitstream_restriction_flag;
110 int num_reorder_frames;
111 int scaling_matrix_present;
112 uint8_t scaling_matrix4[6][16];
113 uint8_t scaling_matrix8[2][64];
117 * Picture parameter set
121 int cabac; ///< entropy_coding_mode_flag
122 int pic_order_present; ///< pic_order_present_flag
123 int slice_group_count; ///< num_slice_groups_minus1 + 1
124 int mb_slice_group_map_type;
125 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
126 int weighted_pred; ///< weighted_pred_flag
127 int weighted_bipred_idc;
128 int init_qp; ///< pic_init_qp_minus26 + 26
129 int init_qs; ///< pic_init_qs_minus26 + 26
130 int chroma_qp_index_offset;
131 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
132 int constrained_intra_pred; ///< constrained_intra_pred_flag
133 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
134 int transform_8x8_mode; ///< transform_8x8_mode_flag
135 uint8_t scaling_matrix4[6][16];
136 uint8_t scaling_matrix8[2][64];
140 * Memory management control operation opcode.
142 typedef enum MMCOOpcode{
153 * Memory management control operation.
164 typedef struct H264Context{
172 #define NAL_IDR_SLICE 5
177 #define NAL_END_SEQUENCE 10
178 #define NAL_END_STREAM 11
179 #define NAL_FILLER_DATA 12
180 #define NAL_SPS_EXT 13
181 #define NAL_AUXILIARY_SLICE 19
182 uint8_t *rbsp_buffer;
183 unsigned int rbsp_buffer_size;
186 * Used to parse AVC variant of h264
188 int is_avc; ///< this flag is != 0 if codec is avc1
189 int got_avcC; ///< flag used to parse avcC data only once
190 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
198 int chroma_pred_mode;
199 int intra16x16_pred_mode;
204 int8_t intra4x4_pred_mode_cache[5*8];
205 int8_t (*intra4x4_pred_mode)[8];
206 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
207 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
208 void (*pred8x8 [4+3])(uint8_t *src, int stride);
209 void (*pred16x16[4+3])(uint8_t *src, int stride);
210 unsigned int topleft_samples_available;
211 unsigned int top_samples_available;
212 unsigned int topright_samples_available;
213 unsigned int left_samples_available;
214 uint8_t (*top_borders[2])[16+2*8];
215 uint8_t left_border[2*(17+2*9)];
218 * non zero coeff count cache.
219 * is 64 if not available.
221 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
222 uint8_t (*non_zero_count)[16];
225 * Motion vector cache.
227 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
228 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
229 #define LIST_NOT_USED -1 //FIXME rename?
230 #define PART_NOT_AVAILABLE -2
233 * is 1 if the specific list MV&references are set to 0,0,-2.
235 int mv_cache_clean[2];
238 * number of neighbors (top and/or left) that used 8x8 dct
240 int neighbor_transform_size;
243 * block_offset[ 0..23] for frame macroblocks
244 * block_offset[24..47] for field macroblocks
246 int block_offset[2*(16+8)];
248 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
250 int b_stride; //FIXME use s->b4_stride
253 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff
262 int unknown_svq3_flag;
263 int next_slice_index;
265 SPS sps_buffer[MAX_SPS_COUNT];
266 SPS sps; ///< current sps
268 PPS pps_buffer[MAX_PPS_COUNT];
272 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
274 uint32_t dequant4_buffer[6][52][16];
275 uint32_t dequant8_buffer[2][52][64];
276 uint32_t (*dequant4_coeff[6])[16];
277 uint32_t (*dequant8_coeff[2])[64];
278 int dequant_coeff_pps; ///< reinit tables when pps changes
281 uint8_t *slice_table_base;
282 uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
284 int slice_type_fixed;
286 //interlacing specific flags
288 int mb_field_decoding_flag;
289 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
296 int delta_poc_bottom;
299 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
300 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
301 int frame_num_offset; ///< for POC type 2
302 int prev_frame_num_offset; ///< for POC type 2
303 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
306 * frame_num for frames or 2*frame_num for field pics.
311 * max_frame_num or 2*max_frame_num for field pics.
315 //Weighted pred stuff
317 int use_weight_chroma;
318 int luma_log2_weight_denom;
319 int chroma_log2_weight_denom;
320 int luma_weight[2][48];
321 int luma_offset[2][48];
322 int chroma_weight[2][48][2];
323 int chroma_offset[2][48][2];
324 int implicit_weight[48][48];
327 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
328 int slice_alpha_c0_offset;
329 int slice_beta_offset;
331 int redundant_pic_count;
333 int direct_spatial_mv_pred;
334 int dist_scale_factor[16];
335 int dist_scale_factor_field[32];
336 int map_col_to_list0[2][16];
337 int map_col_to_list0_field[2][32];
340 * num_ref_idx_l0/1_active_minus1 + 1
342 int ref_count[2]; ///< counts frames or fields, depending on current mb mode
343 Picture *short_ref[32];
344 Picture *long_ref[32];
345 Picture default_ref_list[2][32];
346 Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs
347 Picture *delayed_pic[16]; //FIXME size?
348 Picture *delayed_output_pic;
351 * memory management control operations buffer.
353 MMCO mmco[MAX_MMCO_COUNT];
356 int long_ref_count; ///< number of actual long term references
357 int short_ref_count; ///< number of actual short term references
360 GetBitContext intra_gb;
361 GetBitContext inter_gb;
362 GetBitContext *intra_gb_ptr;
363 GetBitContext *inter_gb_ptr;
365 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
371 uint8_t cabac_state[460];
374 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
379 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
380 uint8_t *chroma_pred_mode_table;
381 int last_qscale_diff;
382 int16_t (*mvd_table[2])[2];
383 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
384 uint8_t *direct_table;
385 uint8_t direct_cache[5*8];
387 uint8_t zigzag_scan[16];
388 uint8_t zigzag_scan8x8[64];
389 uint8_t zigzag_scan8x8_cavlc[64];
390 uint8_t field_scan[16];
391 uint8_t field_scan8x8[64];
392 uint8_t field_scan8x8_cavlc[64];
393 const uint8_t *zigzag_scan_q0;
394 const uint8_t *zigzag_scan8x8_q0;
395 const uint8_t *zigzag_scan8x8_cavlc_q0;
396 const uint8_t *field_scan_q0;
397 const uint8_t *field_scan8x8_q0;
398 const uint8_t *field_scan8x8_cavlc_q0;
403 static VLC coeff_token_vlc[4];
404 static VLC chroma_dc_coeff_token_vlc;
406 static VLC total_zeros_vlc[15];
407 static VLC chroma_dc_total_zeros_vlc[3];
409 static VLC run_vlc[6];
412 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
413 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
414 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
415 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
417 static always_inline uint32_t pack16to32(int a, int b){
418 #ifdef WORDS_BIGENDIAN
419 return (b&0xFFFF) + (a<<16);
421 return (a&0xFFFF) + (b<<16);
427 * @param h height of the rectangle, should be a constant
428 * @param w width of the rectangle, should be a constant
429 * @param size the size of val (1 or 4), should be a constant
431 static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
432 uint8_t *p= (uint8_t*)vp;
433 assert(size==1 || size==4);
439 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
440 assert((stride&(w-1))==0);
442 const uint16_t v= size==4 ? val : val*0x0101;
443 *(uint16_t*)(p + 0*stride)= v;
445 *(uint16_t*)(p + 1*stride)= v;
447 *(uint16_t*)(p + 2*stride)=
448 *(uint16_t*)(p + 3*stride)= v;
450 const uint32_t v= size==4 ? val : val*0x01010101;
451 *(uint32_t*)(p + 0*stride)= v;
453 *(uint32_t*)(p + 1*stride)= v;
455 *(uint32_t*)(p + 2*stride)=
456 *(uint32_t*)(p + 3*stride)= v;
458 //gcc can't optimize 64bit math on x86_32
459 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
460 const uint64_t v= val*0x0100000001ULL;
461 *(uint64_t*)(p + 0*stride)= v;
463 *(uint64_t*)(p + 1*stride)= v;
465 *(uint64_t*)(p + 2*stride)=
466 *(uint64_t*)(p + 3*stride)= v;
468 const uint64_t v= val*0x0100000001ULL;
469 *(uint64_t*)(p + 0+0*stride)=
470 *(uint64_t*)(p + 8+0*stride)=
471 *(uint64_t*)(p + 0+1*stride)=
472 *(uint64_t*)(p + 8+1*stride)= v;
474 *(uint64_t*)(p + 0+2*stride)=
475 *(uint64_t*)(p + 8+2*stride)=
476 *(uint64_t*)(p + 0+3*stride)=
477 *(uint64_t*)(p + 8+3*stride)= v;
479 *(uint32_t*)(p + 0+0*stride)=
480 *(uint32_t*)(p + 4+0*stride)= val;
482 *(uint32_t*)(p + 0+1*stride)=
483 *(uint32_t*)(p + 4+1*stride)= val;
485 *(uint32_t*)(p + 0+2*stride)=
486 *(uint32_t*)(p + 4+2*stride)=
487 *(uint32_t*)(p + 0+3*stride)=
488 *(uint32_t*)(p + 4+3*stride)= val;
490 *(uint32_t*)(p + 0+0*stride)=
491 *(uint32_t*)(p + 4+0*stride)=
492 *(uint32_t*)(p + 8+0*stride)=
493 *(uint32_t*)(p +12+0*stride)=
494 *(uint32_t*)(p + 0+1*stride)=
495 *(uint32_t*)(p + 4+1*stride)=
496 *(uint32_t*)(p + 8+1*stride)=
497 *(uint32_t*)(p +12+1*stride)= val;
499 *(uint32_t*)(p + 0+2*stride)=
500 *(uint32_t*)(p + 4+2*stride)=
501 *(uint32_t*)(p + 8+2*stride)=
502 *(uint32_t*)(p +12+2*stride)=
503 *(uint32_t*)(p + 0+3*stride)=
504 *(uint32_t*)(p + 4+3*stride)=
505 *(uint32_t*)(p + 8+3*stride)=
506 *(uint32_t*)(p +12+3*stride)= val;
513 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
514 MpegEncContext * const s = &h->s;
515 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
516 int topleft_xy, top_xy, topright_xy, left_xy[2];
517 int topleft_type, top_type, topright_type, left_type[2];
521 //FIXME deblocking could skip the intra and nnz parts.
522 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
525 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
527 top_xy = mb_xy - s->mb_stride;
528 topleft_xy = top_xy - 1;
529 topright_xy= top_xy + 1;
530 left_xy[1] = left_xy[0] = mb_xy-1;
540 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
541 const int top_pair_xy = pair_xy - s->mb_stride;
542 const int topleft_pair_xy = top_pair_xy - 1;
543 const int topright_pair_xy = top_pair_xy + 1;
544 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
545 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
546 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
547 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
548 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
549 const int bottom = (s->mb_y & 1);
550 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
552 ? !curr_mb_frame_flag // bottom macroblock
553 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
555 top_xy -= s->mb_stride;
558 ? !curr_mb_frame_flag // bottom macroblock
559 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
561 topleft_xy -= s->mb_stride;
564 ? !curr_mb_frame_flag // bottom macroblock
565 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
567 topright_xy -= s->mb_stride;
569 if (left_mb_frame_flag != curr_mb_frame_flag) {
570 left_xy[1] = left_xy[0] = pair_xy - 1;
571 if (curr_mb_frame_flag) {
592 left_xy[1] += s->mb_stride;
605 h->top_mb_xy = top_xy;
606 h->left_mb_xy[0] = left_xy[0];
607 h->left_mb_xy[1] = left_xy[1];
611 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
612 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
613 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
615 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
617 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
619 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
620 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
621 if(USES_LIST(mb_type,list)){
622 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
623 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
624 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
625 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
631 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
632 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
634 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
635 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
637 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
638 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
643 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
644 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
645 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
646 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
647 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
650 if(IS_INTRA(mb_type)){
651 h->topleft_samples_available=
652 h->top_samples_available=
653 h->left_samples_available= 0xFFFF;
654 h->topright_samples_available= 0xEEEA;
656 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
657 h->topleft_samples_available= 0xB3FF;
658 h->top_samples_available= 0x33FF;
659 h->topright_samples_available= 0x26EA;
662 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
663 h->topleft_samples_available&= 0xDF5F;
664 h->left_samples_available&= 0x5F5F;
668 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
669 h->topleft_samples_available&= 0x7FFF;
671 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
672 h->topright_samples_available&= 0xFBFF;
674 if(IS_INTRA4x4(mb_type)){
675 if(IS_INTRA4x4(top_type)){
676 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
677 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
678 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
679 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
682 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
687 h->intra4x4_pred_mode_cache[4+8*0]=
688 h->intra4x4_pred_mode_cache[5+8*0]=
689 h->intra4x4_pred_mode_cache[6+8*0]=
690 h->intra4x4_pred_mode_cache[7+8*0]= pred;
693 if(IS_INTRA4x4(left_type[i])){
694 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
695 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
698 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
703 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
704 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
719 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
721 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
722 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
723 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
724 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
726 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
727 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
729 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
730 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
733 h->non_zero_count_cache[4+8*0]=
734 h->non_zero_count_cache[5+8*0]=
735 h->non_zero_count_cache[6+8*0]=
736 h->non_zero_count_cache[7+8*0]=
738 h->non_zero_count_cache[1+8*0]=
739 h->non_zero_count_cache[2+8*0]=
741 h->non_zero_count_cache[1+8*3]=
742 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
746 for (i=0; i<2; i++) {
748 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
749 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
750 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
751 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
753 h->non_zero_count_cache[3+8*1 + 2*8*i]=
754 h->non_zero_count_cache[3+8*2 + 2*8*i]=
755 h->non_zero_count_cache[0+8*1 + 8*i]=
756 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
763 h->top_cbp = h->cbp_table[top_xy];
764 } else if(IS_INTRA(mb_type)) {
771 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
772 } else if(IS_INTRA(mb_type)) {
778 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
781 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
786 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
788 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
789 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
790 /*if(!h->mv_cache_clean[list]){
791 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
792 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
793 h->mv_cache_clean[list]= 1;
797 h->mv_cache_clean[list]= 0;
799 if(USES_LIST(top_type, list)){
800 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
801 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
802 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
803 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
804 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
805 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
806 h->ref_cache[list][scan8[0] + 0 - 1*8]=
807 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
808 h->ref_cache[list][scan8[0] + 2 - 1*8]=
809 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
811 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
812 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
813 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
814 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
815 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
818 //FIXME unify cleanup or sth
819 if(USES_LIST(left_type[0], list)){
820 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
821 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
822 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
823 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
824 h->ref_cache[list][scan8[0] - 1 + 0*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
825 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1]>>1)];
827 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
828 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
829 h->ref_cache[list][scan8[0] - 1 + 0*8]=
830 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
833 if(USES_LIST(left_type[1], list)){
834 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
835 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
836 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
837 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
838 h->ref_cache[list][scan8[0] - 1 + 2*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
839 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[3]>>1)];
841 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
842 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
843 h->ref_cache[list][scan8[0] - 1 + 2*8]=
844 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
845 assert((!left_type[0]) == (!left_type[1]));
848 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
851 if(USES_LIST(topleft_type, list)){
852 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
853 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
854 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
855 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
857 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
858 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
861 if(USES_LIST(topright_type, list)){
862 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
863 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
864 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
865 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
867 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
868 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
871 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
874 h->ref_cache[list][scan8[5 ]+1] =
875 h->ref_cache[list][scan8[7 ]+1] =
876 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
877 h->ref_cache[list][scan8[4 ]] =
878 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
879 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
880 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
881 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
882 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
883 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
886 /* XXX beurk, Load mvd */
887 if(USES_LIST(top_type, list)){
888 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
889 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
890 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
891 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
892 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
894 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
895 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
896 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
897 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
899 if(USES_LIST(left_type[0], list)){
900 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
901 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
902 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
904 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
905 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
907 if(USES_LIST(left_type[1], list)){
908 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
909 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
910 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
912 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
913 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
915 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
916 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
917 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
918 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
919 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
921 if(h->slice_type == B_TYPE){
922 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
924 if(IS_DIRECT(top_type)){
925 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
926 }else if(IS_8X8(top_type)){
927 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
928 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
929 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
931 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
934 if(IS_DIRECT(left_type[0]))
935 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
936 else if(IS_8X8(left_type[0]))
937 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
939 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
941 if(IS_DIRECT(left_type[1]))
942 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
943 else if(IS_8X8(left_type[1]))
944 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
946 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
952 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
953 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
954 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
955 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
956 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
957 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
958 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
959 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
960 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
961 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
963 #define MAP_F2F(idx, mb_type)\
964 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
965 h->ref_cache[list][idx] <<= 1;\
966 h->mv_cache[list][idx][1] /= 2;\
967 h->mvd_cache[list][idx][1] /= 2;\
972 #define MAP_F2F(idx, mb_type)\
973 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
974 h->ref_cache[list][idx] >>= 1;\
975 h->mv_cache[list][idx][1] <<= 1;\
976 h->mvd_cache[list][idx][1] <<= 1;\
986 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
989 static inline void write_back_intra_pred_mode(H264Context *h){
990 MpegEncContext * const s = &h->s;
991 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
993 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
994 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
995 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
996 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
997 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
998 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
999 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
1003 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1005 static inline int check_intra4x4_pred_mode(H264Context *h){
1006 MpegEncContext * const s = &h->s;
1007 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
1008 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
1011 if(!(h->top_samples_available&0x8000)){
1013 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
1015 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1018 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
1023 if(!(h->left_samples_available&0x8000)){
1025 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
1027 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1030 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
1036 } //FIXME cleanup like next
1039 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1041 static inline int check_intra_pred_mode(H264Context *h, int mode){
1042 MpegEncContext * const s = &h->s;
1043 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
1044 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
1046 if(mode < 0 || mode > 6) {
1047 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
1051 if(!(h->top_samples_available&0x8000)){
1054 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1059 if(!(h->left_samples_available&0x8000)){
1062 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1071 * gets the predicted intra4x4 prediction mode.
1073 static inline int pred_intra_mode(H264Context *h, int n){
1074 const int index8= scan8[n];
1075 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
1076 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
1077 const int min= FFMIN(left, top);
1079 tprintf("mode:%d %d min:%d\n", left ,top, min);
1081 if(min<0) return DC_PRED;
1085 static inline void write_back_non_zero_count(H264Context *h){
1086 MpegEncContext * const s = &h->s;
1087 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1089 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
1090 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
1091 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
1092 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
1093 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
1094 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
1095 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
1097 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
1098 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
1099 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
1101 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1102 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1103 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1106 // store all luma nnzs, for deblocking
1109 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
1110 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
1115 * gets the predicted number of non zero coefficients.
1116 * @param n block index
1118 static inline int pred_non_zero_count(H264Context *h, int n){
1119 const int index8= scan8[n];
1120 const int left= h->non_zero_count_cache[index8 - 1];
1121 const int top = h->non_zero_count_cache[index8 - 8];
1124 if(i<64) i= (i+1)>>1;
1126 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1131 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1132 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1134 /* there is no consistent mapping of mvs to neighboring locations that will
1135 * make mbaff happy, so we can't move all this logic to fill_caches */
1137 MpegEncContext *s = &h->s;
1138 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
1140 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
1141 *C = h->mv_cache[list][scan8[0]-2];
1144 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
1145 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
1146 if(IS_INTERLACED(mb_types[topright_xy])){
1147 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
1148 const int x4 = X4, y4 = Y4;\
1149 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
1150 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
1151 return LIST_NOT_USED;\
1152 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
1153 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
1154 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
1155 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
1157 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
1160 if(topright_ref == PART_NOT_AVAILABLE
1161 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
1162 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
1164 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
1165 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
1168 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
1169 && i >= scan8[0]+8){
1170 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
1171 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
1177 if(topright_ref != PART_NOT_AVAILABLE){
1178 *C= h->mv_cache[list][ i - 8 + part_width ];
1179 return topright_ref;
1181 tprintf("topright MV not available\n");
1183 *C= h->mv_cache[list][ i - 8 - 1 ];
1184 return h->ref_cache[list][ i - 8 - 1 ];
1189 * gets the predicted MV.
1190 * @param n the block index
1191 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1192 * @param mx the x component of the predicted motion vector
1193 * @param my the y component of the predicted motion vector
1195 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1196 const int index8= scan8[n];
1197 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1198 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1199 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1200 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1202 int diagonal_ref, match_count;
1204 assert(part_width==1 || part_width==2 || part_width==4);
1214 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1215 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1216 tprintf("pred_motion match_count=%d\n", match_count);
1217 if(match_count > 1){ //most common
1218 *mx= mid_pred(A[0], B[0], C[0]);
1219 *my= mid_pred(A[1], B[1], C[1]);
1220 }else if(match_count==1){
1224 }else if(top_ref==ref){
1232 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1236 *mx= mid_pred(A[0], B[0], C[0]);
1237 *my= mid_pred(A[1], B[1], C[1]);
1241 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1245 * gets the directionally predicted 16x8 MV.
1246 * @param n the block index
1247 * @param mx the x component of the predicted motion vector
1248 * @param my the y component of the predicted motion vector
1250 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1252 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1253 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1255 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1263 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1264 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1266 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1268 if(left_ref == ref){
1276 pred_motion(h, n, 4, list, ref, mx, my);
1280 * gets the directionally predicted 8x16 MV.
1281 * @param n the block index
1282 * @param mx the x component of the predicted motion vector
1283 * @param my the y component of the predicted motion vector
1285 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1287 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1288 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1290 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1292 if(left_ref == ref){
1301 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1303 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1305 if(diagonal_ref == ref){
1313 pred_motion(h, n, 2, list, ref, mx, my);
1316 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1317 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1318 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1320 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1322 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1323 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1324 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1330 pred_motion(h, 0, 4, 0, 0, mx, my);
1335 static inline void direct_dist_scale_factor(H264Context * const h){
1336 const int poc = h->s.current_picture_ptr->poc;
1337 const int poc1 = h->ref_list[1][0].poc;
1339 for(i=0; i<h->ref_count[0]; i++){
1340 int poc0 = h->ref_list[0][i].poc;
1341 int td = clip(poc1 - poc0, -128, 127);
1342 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1343 h->dist_scale_factor[i] = 256;
1345 int tb = clip(poc - poc0, -128, 127);
1346 int tx = (16384 + (FFABS(td) >> 1)) / td;
1347 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1351 for(i=0; i<h->ref_count[0]; i++){
1352 h->dist_scale_factor_field[2*i] =
1353 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
1357 static inline void direct_ref_list_init(H264Context * const h){
1358 MpegEncContext * const s = &h->s;
1359 Picture * const ref1 = &h->ref_list[1][0];
1360 Picture * const cur = s->current_picture_ptr;
1362 if(cur->pict_type == I_TYPE)
1363 cur->ref_count[0] = 0;
1364 if(cur->pict_type != B_TYPE)
1365 cur->ref_count[1] = 0;
1366 for(list=0; list<2; list++){
1367 cur->ref_count[list] = h->ref_count[list];
1368 for(j=0; j<h->ref_count[list]; j++)
1369 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1371 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1373 for(list=0; list<2; list++){
1374 for(i=0; i<ref1->ref_count[list]; i++){
1375 const int poc = ref1->ref_poc[list][i];
1376 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1377 for(j=0; j<h->ref_count[list]; j++)
1378 if(h->ref_list[list][j].poc == poc){
1379 h->map_col_to_list0[list][i] = j;
1385 for(list=0; list<2; list++){
1386 for(i=0; i<ref1->ref_count[list]; i++){
1387 j = h->map_col_to_list0[list][i];
1388 h->map_col_to_list0_field[list][2*i] = 2*j;
1389 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1395 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1396 MpegEncContext * const s = &h->s;
1397 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1398 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1399 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1400 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1401 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1402 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1403 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1404 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1405 const int is_b8x8 = IS_8X8(*mb_type);
1409 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1410 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1411 /* FIXME save sub mb types from previous frames (or derive from MVs)
1412 * so we know exactly what block size to use */
1413 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1414 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1415 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1416 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1417 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1419 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1420 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1423 *mb_type |= MB_TYPE_DIRECT2;
1425 *mb_type |= MB_TYPE_INTERLACED;
1427 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1429 if(h->direct_spatial_mv_pred){
1434 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1436 /* ref = min(neighbors) */
1437 for(list=0; list<2; list++){
1438 int refa = h->ref_cache[list][scan8[0] - 1];
1439 int refb = h->ref_cache[list][scan8[0] - 8];
1440 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1442 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1444 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1446 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1452 if(ref[0] < 0 && ref[1] < 0){
1453 ref[0] = ref[1] = 0;
1454 mv[0][0] = mv[0][1] =
1455 mv[1][0] = mv[1][1] = 0;
1457 for(list=0; list<2; list++){
1459 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1461 mv[list][0] = mv[list][1] = 0;
1466 *mb_type &= ~MB_TYPE_P0L1;
1467 sub_mb_type &= ~MB_TYPE_P0L1;
1468 }else if(ref[0] < 0){
1469 *mb_type &= ~MB_TYPE_P0L0;
1470 sub_mb_type &= ~MB_TYPE_P0L0;
1473 if(IS_16X16(*mb_type)){
1474 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1475 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1476 if(!IS_INTRA(mb_type_col)
1477 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1478 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1479 && (h->x264_build>33 || !h->x264_build)))){
1481 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1483 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1485 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1487 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1489 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1490 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1493 for(i8=0; i8<4; i8++){
1494 const int x8 = i8&1;
1495 const int y8 = i8>>1;
1497 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1499 h->sub_mb_type[i8] = sub_mb_type;
1501 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1502 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1503 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1504 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1507 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1508 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1509 && (h->x264_build>33 || !h->x264_build)))){
1510 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1511 if(IS_SUB_8X8(sub_mb_type)){
1512 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1513 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1515 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1517 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1520 for(i4=0; i4<4; i4++){
1521 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1522 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1524 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1526 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1532 }else{ /* direct temporal mv pred */
1533 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1534 const int *dist_scale_factor = h->dist_scale_factor;
1537 if(IS_INTERLACED(*mb_type)){
1538 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1539 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1540 dist_scale_factor = h->dist_scale_factor_field;
1542 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1543 /* FIXME assumes direct_8x8_inference == 1 */
1544 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1545 int mb_types_col[2];
1548 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1549 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1550 | (*mb_type & MB_TYPE_INTERLACED);
1551 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1553 if(IS_INTERLACED(*mb_type)){
1554 /* frame to field scaling */
1555 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1556 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1558 l1ref0 -= 2*h->b8_stride;
1559 l1ref1 -= 2*h->b8_stride;
1560 l1mv0 -= 4*h->b_stride;
1561 l1mv1 -= 4*h->b_stride;
1565 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1566 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1568 *mb_type |= MB_TYPE_16x8;
1570 *mb_type |= MB_TYPE_8x8;
1572 /* field to frame scaling */
1573 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1574 * but in MBAFF, top and bottom POC are equal */
1575 int dy = (s->mb_y&1) ? 1 : 2;
1577 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1578 l1ref0 += dy*h->b8_stride;
1579 l1ref1 += dy*h->b8_stride;
1580 l1mv0 += 2*dy*h->b_stride;
1581 l1mv1 += 2*dy*h->b_stride;
1584 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1586 *mb_type |= MB_TYPE_16x16;
1588 *mb_type |= MB_TYPE_8x8;
1591 for(i8=0; i8<4; i8++){
1592 const int x8 = i8&1;
1593 const int y8 = i8>>1;
1595 const int16_t (*l1mv)[2]= l1mv0;
1597 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1599 h->sub_mb_type[i8] = sub_mb_type;
1601 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1602 if(IS_INTRA(mb_types_col[y8])){
1603 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1604 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1605 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1609 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1611 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1613 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1616 scale = dist_scale_factor[ref0];
1617 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1620 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1621 int my_col = (mv_col[1]<<y_shift)/2;
1622 int mx = (scale * mv_col[0] + 128) >> 8;
1623 int my = (scale * my_col + 128) >> 8;
1624 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1625 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1632 /* one-to-one mv scaling */
1634 if(IS_16X16(*mb_type)){
1635 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1636 if(IS_INTRA(mb_type_col)){
1637 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1638 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1639 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1641 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1642 : map_col_to_list0[1][l1ref1[0]];
1643 const int scale = dist_scale_factor[ref0];
1644 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1646 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1647 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1648 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1649 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1650 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1653 for(i8=0; i8<4; i8++){
1654 const int x8 = i8&1;
1655 const int y8 = i8>>1;
1657 const int16_t (*l1mv)[2]= l1mv0;
1659 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1661 h->sub_mb_type[i8] = sub_mb_type;
1662 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1663 if(IS_INTRA(mb_type_col)){
1664 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1665 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1666 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1670 ref0 = l1ref0[x8 + y8*h->b8_stride];
1672 ref0 = map_col_to_list0[0][ref0];
1674 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1677 scale = dist_scale_factor[ref0];
1679 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1680 if(IS_SUB_8X8(sub_mb_type)){
1681 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1682 int mx = (scale * mv_col[0] + 128) >> 8;
1683 int my = (scale * mv_col[1] + 128) >> 8;
1684 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1685 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1687 for(i4=0; i4<4; i4++){
1688 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1689 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1690 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1691 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1692 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1693 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1700 static inline void write_back_motion(H264Context *h, int mb_type){
1701 MpegEncContext * const s = &h->s;
1702 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1703 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1706 if(!USES_LIST(mb_type, 0))
1707 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1709 for(list=0; list<2; list++){
1711 if(!USES_LIST(mb_type, list))
1715 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1716 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1718 if( h->pps.cabac ) {
1719 if(IS_SKIP(mb_type))
1720 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1723 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1724 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1729 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1730 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1731 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1732 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1733 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1737 if(h->slice_type == B_TYPE && h->pps.cabac){
1738 if(IS_8X8(mb_type)){
1739 uint8_t *direct_table = &h->direct_table[b8_xy];
1740 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1741 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1742 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1748 * Decodes a network abstraction layer unit.
1749 * @param consumed is the number of bytes used as input
1750 * @param length is the length of the array
1751 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1752 * @returns decoded bytes, might be src+1 if no escapes
1754 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1758 // src[0]&0x80; //forbidden bit
1759 h->nal_ref_idc= src[0]>>5;
1760 h->nal_unit_type= src[0]&0x1F;
1764 for(i=0; i<length; i++)
1765 printf("%2X ", src[i]);
1767 for(i=0; i+1<length; i+=2){
1768 if(src[i]) continue;
1769 if(i>0 && src[i-1]==0) i--;
1770 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1772 /* startcode, so we must be past the end */
1779 if(i>=length-1){ //no escaped 0
1780 *dst_length= length;
1781 *consumed= length+1; //+1 for the header
1785 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1786 dst= h->rbsp_buffer;
1788 //printf("decoding esc\n");
1791 //remove escapes (very rare 1:2^22)
1792 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1793 if(src[si+2]==3){ //escape
1798 }else //next start code
1802 dst[di++]= src[si++];
1806 *consumed= si + 1;//+1 for the header
1807 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1813 * @param src the data which should be escaped
1814 * @param dst the target buffer, dst+1 == src is allowed as a special case
1815 * @param length the length of the src data
1816 * @param dst_length the length of the dst array
1817 * @returns length of escaped data in bytes or -1 if an error occured
1819 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1820 int i, escape_count, si, di;
1824 assert(dst_length>0);
1826 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1828 if(length==0) return 1;
1831 for(i=0; i<length; i+=2){
1832 if(src[i]) continue;
1833 if(i>0 && src[i-1]==0)
1835 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1841 if(escape_count==0){
1843 memcpy(dst+1, src, length);
1847 if(length + escape_count + 1> dst_length)
1850 //this should be damn rare (hopefully)
1852 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1853 temp= h->rbsp_buffer;
1854 //printf("encoding esc\n");
1859 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1860 temp[di++]= 0; si++;
1861 temp[di++]= 0; si++;
1863 temp[di++]= src[si++];
1866 temp[di++]= src[si++];
1868 memcpy(dst+1, temp, length+escape_count);
1870 assert(di == length+escape_count);
1876 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1878 static void encode_rbsp_trailing(PutBitContext *pb){
1881 length= (-put_bits_count(pb))&7;
1882 if(length) put_bits(pb, length, 0);
1887 * identifies the exact end of the bitstream
1888 * @return the length of the trailing, or 0 if damaged
1890 static int decode_rbsp_trailing(uint8_t *src){
1894 tprintf("rbsp trailing %X\n", v);
1904 * idct tranforms the 16 dc values and dequantize them.
1905 * @param qp quantization parameter
1907 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1910 int temp[16]; //FIXME check if this is a good idea
1911 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1912 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1914 //memset(block, 64, 2*256);
1917 const int offset= y_offset[i];
1918 const int z0= block[offset+stride*0] + block[offset+stride*4];
1919 const int z1= block[offset+stride*0] - block[offset+stride*4];
1920 const int z2= block[offset+stride*1] - block[offset+stride*5];
1921 const int z3= block[offset+stride*1] + block[offset+stride*5];
1930 const int offset= x_offset[i];
1931 const int z0= temp[4*0+i] + temp[4*2+i];
1932 const int z1= temp[4*0+i] - temp[4*2+i];
1933 const int z2= temp[4*1+i] - temp[4*3+i];
1934 const int z3= temp[4*1+i] + temp[4*3+i];
1936 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1937 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1938 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1939 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1945 * dct tranforms the 16 dc values.
1946 * @param qp quantization parameter ??? FIXME
1948 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1949 // const int qmul= dequant_coeff[qp][0];
1951 int temp[16]; //FIXME check if this is a good idea
1952 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1953 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1956 const int offset= y_offset[i];
1957 const int z0= block[offset+stride*0] + block[offset+stride*4];
1958 const int z1= block[offset+stride*0] - block[offset+stride*4];
1959 const int z2= block[offset+stride*1] - block[offset+stride*5];
1960 const int z3= block[offset+stride*1] + block[offset+stride*5];
1969 const int offset= x_offset[i];
1970 const int z0= temp[4*0+i] + temp[4*2+i];
1971 const int z1= temp[4*0+i] - temp[4*2+i];
1972 const int z2= temp[4*1+i] - temp[4*3+i];
1973 const int z3= temp[4*1+i] + temp[4*3+i];
1975 block[stride*0 +offset]= (z0 + z3)>>1;
1976 block[stride*2 +offset]= (z1 + z2)>>1;
1977 block[stride*8 +offset]= (z1 - z2)>>1;
1978 block[stride*10+offset]= (z0 - z3)>>1;
1986 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1987 const int stride= 16*2;
1988 const int xStride= 16;
1991 a= block[stride*0 + xStride*0];
1992 b= block[stride*0 + xStride*1];
1993 c= block[stride*1 + xStride*0];
1994 d= block[stride*1 + xStride*1];
2001 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
2002 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
2003 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
2004 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
2008 static void chroma_dc_dct_c(DCTELEM *block){
2009 const int stride= 16*2;
2010 const int xStride= 16;
2013 a= block[stride*0 + xStride*0];
2014 b= block[stride*0 + xStride*1];
2015 c= block[stride*1 + xStride*0];
2016 d= block[stride*1 + xStride*1];
2023 block[stride*0 + xStride*0]= (a+c);
2024 block[stride*0 + xStride*1]= (e+b);
2025 block[stride*1 + xStride*0]= (a-c);
2026 block[stride*1 + xStride*1]= (e-b);
2031 * gets the chroma qp.
2033 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
2035 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
2040 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
2042 //FIXME try int temp instead of block
2045 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
2046 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
2047 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
2048 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
2049 const int z0= d0 + d3;
2050 const int z3= d0 - d3;
2051 const int z1= d1 + d2;
2052 const int z2= d1 - d2;
2054 block[0 + 4*i]= z0 + z1;
2055 block[1 + 4*i]= 2*z3 + z2;
2056 block[2 + 4*i]= z0 - z1;
2057 block[3 + 4*i]= z3 - 2*z2;
2061 const int z0= block[0*4 + i] + block[3*4 + i];
2062 const int z3= block[0*4 + i] - block[3*4 + i];
2063 const int z1= block[1*4 + i] + block[2*4 + i];
2064 const int z2= block[1*4 + i] - block[2*4 + i];
2066 block[0*4 + i]= z0 + z1;
2067 block[1*4 + i]= 2*z3 + z2;
2068 block[2*4 + i]= z0 - z1;
2069 block[3*4 + i]= z3 - 2*z2;
2074 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
2075 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
2076 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
2078 const int * const quant_table= quant_coeff[qscale];
2079 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
2080 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
2081 const unsigned int threshold2= (threshold1<<1);
2087 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
2088 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
2089 const unsigned int dc_threshold2= (dc_threshold1<<1);
2091 int level= block[0]*quant_coeff[qscale+18][0];
2092 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2094 level= (dc_bias + level)>>(QUANT_SHIFT-2);
2097 level= (dc_bias - level)>>(QUANT_SHIFT-2);
2100 // last_non_zero = i;
2105 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
2106 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
2107 const unsigned int dc_threshold2= (dc_threshold1<<1);
2109 int level= block[0]*quant_table[0];
2110 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2112 level= (dc_bias + level)>>(QUANT_SHIFT+1);
2115 level= (dc_bias - level)>>(QUANT_SHIFT+1);
2118 // last_non_zero = i;
2131 const int j= scantable[i];
2132 int level= block[j]*quant_table[j];
2134 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
2135 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
2136 if(((unsigned)(level+threshold1))>threshold2){
2138 level= (bias + level)>>QUANT_SHIFT;
2141 level= (bias - level)>>QUANT_SHIFT;
2150 return last_non_zero;
2153 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
2154 const uint32_t a= ((uint32_t*)(src-stride))[0];
2155 ((uint32_t*)(src+0*stride))[0]= a;
2156 ((uint32_t*)(src+1*stride))[0]= a;
2157 ((uint32_t*)(src+2*stride))[0]= a;
2158 ((uint32_t*)(src+3*stride))[0]= a;
2161 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
2162 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
2163 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
2164 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
2165 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
2168 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
2169 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
2170 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
2172 ((uint32_t*)(src+0*stride))[0]=
2173 ((uint32_t*)(src+1*stride))[0]=
2174 ((uint32_t*)(src+2*stride))[0]=
2175 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2178 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
2179 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
2181 ((uint32_t*)(src+0*stride))[0]=
2182 ((uint32_t*)(src+1*stride))[0]=
2183 ((uint32_t*)(src+2*stride))[0]=
2184 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2187 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
2188 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
2190 ((uint32_t*)(src+0*stride))[0]=
2191 ((uint32_t*)(src+1*stride))[0]=
2192 ((uint32_t*)(src+2*stride))[0]=
2193 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2196 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
2197 ((uint32_t*)(src+0*stride))[0]=
2198 ((uint32_t*)(src+1*stride))[0]=
2199 ((uint32_t*)(src+2*stride))[0]=
2200 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
2204 #define LOAD_TOP_RIGHT_EDGE\
2205 const int t4= topright[0];\
2206 const int t5= topright[1];\
2207 const int t6= topright[2];\
2208 const int t7= topright[3];\
2210 #define LOAD_LEFT_EDGE\
2211 const int l0= src[-1+0*stride];\
2212 const int l1= src[-1+1*stride];\
2213 const int l2= src[-1+2*stride];\
2214 const int l3= src[-1+3*stride];\
2216 #define LOAD_TOP_EDGE\
2217 const int t0= src[ 0-1*stride];\
2218 const int t1= src[ 1-1*stride];\
2219 const int t2= src[ 2-1*stride];\
2220 const int t3= src[ 3-1*stride];\
2222 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
2223 const int lt= src[-1-1*stride];
2227 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
2229 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
2232 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
2236 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2239 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
2241 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2242 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2245 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
2250 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
2252 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
2255 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
2259 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
2262 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
2264 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
2265 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
2268 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
2269 const int lt= src[-1-1*stride];
2272 const __attribute__((unused)) int unu= l3;
2275 src[1+2*stride]=(lt + t0 + 1)>>1;
2277 src[2+2*stride]=(t0 + t1 + 1)>>1;
2279 src[3+2*stride]=(t1 + t2 + 1)>>1;
2280 src[3+0*stride]=(t2 + t3 + 1)>>1;
2282 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2284 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2286 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2287 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2288 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2289 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2292 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2295 const __attribute__((unused)) int unu= t7;
2297 src[0+0*stride]=(t0 + t1 + 1)>>1;
2299 src[0+2*stride]=(t1 + t2 + 1)>>1;
2301 src[1+2*stride]=(t2 + t3 + 1)>>1;
2303 src[2+2*stride]=(t3 + t4+ 1)>>1;
2304 src[3+2*stride]=(t4 + t5+ 1)>>1;
2305 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2307 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2309 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2311 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2312 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2315 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2318 src[0+0*stride]=(l0 + l1 + 1)>>1;
2319 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2321 src[0+1*stride]=(l1 + l2 + 1)>>1;
2323 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2325 src[0+2*stride]=(l2 + l3 + 1)>>1;
2327 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2336 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2337 const int lt= src[-1-1*stride];
2340 const __attribute__((unused)) int unu= t3;
2343 src[2+1*stride]=(lt + l0 + 1)>>1;
2345 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2346 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2347 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2349 src[2+2*stride]=(l0 + l1 + 1)>>1;
2351 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2353 src[2+3*stride]=(l1 + l2+ 1)>>1;
2355 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2356 src[0+3*stride]=(l2 + l3 + 1)>>1;
2357 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2360 static void pred16x16_vertical_c(uint8_t *src, int stride){
2362 const uint32_t a= ((uint32_t*)(src-stride))[0];
2363 const uint32_t b= ((uint32_t*)(src-stride))[1];
2364 const uint32_t c= ((uint32_t*)(src-stride))[2];
2365 const uint32_t d= ((uint32_t*)(src-stride))[3];
2367 for(i=0; i<16; i++){
2368 ((uint32_t*)(src+i*stride))[0]= a;
2369 ((uint32_t*)(src+i*stride))[1]= b;
2370 ((uint32_t*)(src+i*stride))[2]= c;
2371 ((uint32_t*)(src+i*stride))[3]= d;
2375 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2378 for(i=0; i<16; i++){
2379 ((uint32_t*)(src+i*stride))[0]=
2380 ((uint32_t*)(src+i*stride))[1]=
2381 ((uint32_t*)(src+i*stride))[2]=
2382 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2386 static void pred16x16_dc_c(uint8_t *src, int stride){
2390 dc+= src[-1+i*stride];
2397 dc= 0x01010101*((dc + 16)>>5);
2399 for(i=0; i<16; i++){
2400 ((uint32_t*)(src+i*stride))[0]=
2401 ((uint32_t*)(src+i*stride))[1]=
2402 ((uint32_t*)(src+i*stride))[2]=
2403 ((uint32_t*)(src+i*stride))[3]= dc;
2407 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2411 dc+= src[-1+i*stride];
2414 dc= 0x01010101*((dc + 8)>>4);
2416 for(i=0; i<16; i++){
2417 ((uint32_t*)(src+i*stride))[0]=
2418 ((uint32_t*)(src+i*stride))[1]=
2419 ((uint32_t*)(src+i*stride))[2]=
2420 ((uint32_t*)(src+i*stride))[3]= dc;
2424 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2430 dc= 0x01010101*((dc + 8)>>4);
2432 for(i=0; i<16; i++){
2433 ((uint32_t*)(src+i*stride))[0]=
2434 ((uint32_t*)(src+i*stride))[1]=
2435 ((uint32_t*)(src+i*stride))[2]=
2436 ((uint32_t*)(src+i*stride))[3]= dc;
2440 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2443 for(i=0; i<16; i++){
2444 ((uint32_t*)(src+i*stride))[0]=
2445 ((uint32_t*)(src+i*stride))[1]=
2446 ((uint32_t*)(src+i*stride))[2]=
2447 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2451 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2454 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2455 const uint8_t * const src0 = src+7-stride;
2456 const uint8_t *src1 = src+8*stride-1;
2457 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2458 int H = src0[1] - src0[-1];
2459 int V = src1[0] - src2[ 0];
2460 for(k=2; k<=8; ++k) {
2461 src1 += stride; src2 -= stride;
2462 H += k*(src0[k] - src0[-k]);
2463 V += k*(src1[0] - src2[ 0]);
2466 H = ( 5*(H/4) ) / 16;
2467 V = ( 5*(V/4) ) / 16;
2469 /* required for 100% accuracy */
2470 i = H; H = V; V = i;
2472 H = ( 5*H+32 ) >> 6;
2473 V = ( 5*V+32 ) >> 6;
2476 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2477 for(j=16; j>0; --j) {
2480 for(i=-16; i<0; i+=4) {
2481 src[16+i] = cm[ (b ) >> 5 ];
2482 src[17+i] = cm[ (b+ H) >> 5 ];
2483 src[18+i] = cm[ (b+2*H) >> 5 ];
2484 src[19+i] = cm[ (b+3*H) >> 5 ];
2491 static void pred16x16_plane_c(uint8_t *src, int stride){
2492 pred16x16_plane_compat_c(src, stride, 0);
2495 static void pred8x8_vertical_c(uint8_t *src, int stride){
2497 const uint32_t a= ((uint32_t*)(src-stride))[0];
2498 const uint32_t b= ((uint32_t*)(src-stride))[1];
2501 ((uint32_t*)(src+i*stride))[0]= a;
2502 ((uint32_t*)(src+i*stride))[1]= b;
2506 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2510 ((uint32_t*)(src+i*stride))[0]=
2511 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2515 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2519 ((uint32_t*)(src+i*stride))[0]=
2520 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2524 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2530 dc0+= src[-1+i*stride];
2531 dc2+= src[-1+(i+4)*stride];
2533 dc0= 0x01010101*((dc0 + 2)>>2);
2534 dc2= 0x01010101*((dc2 + 2)>>2);
2537 ((uint32_t*)(src+i*stride))[0]=
2538 ((uint32_t*)(src+i*stride))[1]= dc0;
2541 ((uint32_t*)(src+i*stride))[0]=
2542 ((uint32_t*)(src+i*stride))[1]= dc2;
2546 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2552 dc0+= src[i-stride];
2553 dc1+= src[4+i-stride];
2555 dc0= 0x01010101*((dc0 + 2)>>2);
2556 dc1= 0x01010101*((dc1 + 2)>>2);
2559 ((uint32_t*)(src+i*stride))[0]= dc0;
2560 ((uint32_t*)(src+i*stride))[1]= dc1;
2563 ((uint32_t*)(src+i*stride))[0]= dc0;
2564 ((uint32_t*)(src+i*stride))[1]= dc1;
2569 static void pred8x8_dc_c(uint8_t *src, int stride){
2571 int dc0, dc1, dc2, dc3;
2575 dc0+= src[-1+i*stride] + src[i-stride];
2576 dc1+= src[4+i-stride];
2577 dc2+= src[-1+(i+4)*stride];
2579 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2580 dc0= 0x01010101*((dc0 + 4)>>3);
2581 dc1= 0x01010101*((dc1 + 2)>>2);
2582 dc2= 0x01010101*((dc2 + 2)>>2);
2585 ((uint32_t*)(src+i*stride))[0]= dc0;
2586 ((uint32_t*)(src+i*stride))[1]= dc1;
2589 ((uint32_t*)(src+i*stride))[0]= dc2;
2590 ((uint32_t*)(src+i*stride))[1]= dc3;
2594 static void pred8x8_plane_c(uint8_t *src, int stride){
2597 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2598 const uint8_t * const src0 = src+3-stride;
2599 const uint8_t *src1 = src+4*stride-1;
2600 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2601 int H = src0[1] - src0[-1];
2602 int V = src1[0] - src2[ 0];
2603 for(k=2; k<=4; ++k) {
2604 src1 += stride; src2 -= stride;
2605 H += k*(src0[k] - src0[-k]);
2606 V += k*(src1[0] - src2[ 0]);
2608 H = ( 17*H+16 ) >> 5;
2609 V = ( 17*V+16 ) >> 5;
2611 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2612 for(j=8; j>0; --j) {
2615 src[0] = cm[ (b ) >> 5 ];
2616 src[1] = cm[ (b+ H) >> 5 ];
2617 src[2] = cm[ (b+2*H) >> 5 ];
2618 src[3] = cm[ (b+3*H) >> 5 ];
2619 src[4] = cm[ (b+4*H) >> 5 ];
2620 src[5] = cm[ (b+5*H) >> 5 ];
2621 src[6] = cm[ (b+6*H) >> 5 ];
2622 src[7] = cm[ (b+7*H) >> 5 ];
2627 #define SRC(x,y) src[(x)+(y)*stride]
2629 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2630 #define PREDICT_8x8_LOAD_LEFT \
2631 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2632 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2633 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2634 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2637 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2638 #define PREDICT_8x8_LOAD_TOP \
2639 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2640 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2641 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2642 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2643 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2646 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2647 #define PREDICT_8x8_LOAD_TOPRIGHT \
2648 int t8, t9, t10, t11, t12, t13, t14, t15; \
2649 if(has_topright) { \
2650 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2651 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2652 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2654 #define PREDICT_8x8_LOAD_TOPLEFT \
2655 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2657 #define PREDICT_8x8_DC(v) \
2659 for( y = 0; y < 8; y++ ) { \
2660 ((uint32_t*)src)[0] = \
2661 ((uint32_t*)src)[1] = v; \
2665 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2667 PREDICT_8x8_DC(0x80808080);
2669 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2671 PREDICT_8x8_LOAD_LEFT;
2672 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2675 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2677 PREDICT_8x8_LOAD_TOP;
2678 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2681 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2683 PREDICT_8x8_LOAD_LEFT;
2684 PREDICT_8x8_LOAD_TOP;
2685 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2686 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2689 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2691 PREDICT_8x8_LOAD_LEFT;
2692 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2693 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2694 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2697 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2700 PREDICT_8x8_LOAD_TOP;
2709 for( y = 1; y < 8; y++ )
2710 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2712 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2714 PREDICT_8x8_LOAD_TOP;
2715 PREDICT_8x8_LOAD_TOPRIGHT;
2716 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2717 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2718 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2719 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2720 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2721 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2722 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2723 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2724 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2725 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2726 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2727 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2728 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2729 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2730 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2732 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2734 PREDICT_8x8_LOAD_TOP;
2735 PREDICT_8x8_LOAD_LEFT;
2736 PREDICT_8x8_LOAD_TOPLEFT;
2737 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2738 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2739 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2740 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2741 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2742 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2743 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2744 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2745 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2746 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2747 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2748 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2749 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2750 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2751 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2754 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2756 PREDICT_8x8_LOAD_TOP;
2757 PREDICT_8x8_LOAD_LEFT;
2758 PREDICT_8x8_LOAD_TOPLEFT;
2759 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2760 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2761 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2762 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2763 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2764 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2765 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2766 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2767 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2768 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2769 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2770 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2771 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2772 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2773 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2774 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2775 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2776 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2777 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2778 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2779 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2780 SRC(7,0)= (t6 + t7 + 1) >> 1;
2782 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2784 PREDICT_8x8_LOAD_TOP;
2785 PREDICT_8x8_LOAD_LEFT;
2786 PREDICT_8x8_LOAD_TOPLEFT;
2787 SRC(0,7)= (l6 + l7 + 1) >> 1;
2788 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2789 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2790 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2791 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2792 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2793 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2794 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2795 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2796 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2797 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2798 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2799 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2800 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2801 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2802 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2803 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2804 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2805 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2806 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2807 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2808 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2810 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2812 PREDICT_8x8_LOAD_TOP;
2813 PREDICT_8x8_LOAD_TOPRIGHT;
2814 SRC(0,0)= (t0 + t1 + 1) >> 1;
2815 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2816 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2817 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2818 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2819 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2820 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2821 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2822 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2823 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2824 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2825 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2826 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2827 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2828 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2829 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2830 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2831 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2832 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2833 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2834 SRC(7,6)= (t10 + t11 + 1) >> 1;
2835 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2837 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2839 PREDICT_8x8_LOAD_LEFT;
2840 SRC(0,0)= (l0 + l1 + 1) >> 1;
2841 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2842 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2843 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2844 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2845 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2846 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2847 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2848 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2849 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2850 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2851 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2852 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2853 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2854 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2855 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2856 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2857 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2859 #undef PREDICT_8x8_LOAD_LEFT
2860 #undef PREDICT_8x8_LOAD_TOP
2861 #undef PREDICT_8x8_LOAD_TOPLEFT
2862 #undef PREDICT_8x8_LOAD_TOPRIGHT
2863 #undef PREDICT_8x8_DC
2869 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2870 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2871 int src_x_offset, int src_y_offset,
2872 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2873 MpegEncContext * const s = &h->s;
2874 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2875 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2876 const int luma_xy= (mx&3) + ((my&3)<<2);
2877 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2878 uint8_t * src_cb, * src_cr;
2879 int extra_width= h->emu_edge_width;
2880 int extra_height= h->emu_edge_height;
2882 const int full_mx= mx>>2;
2883 const int full_my= my>>2;
2884 const int pic_width = 16*s->mb_width;
2885 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2890 if(mx&7) extra_width -= 3;
2891 if(my&7) extra_height -= 3;
2893 if( full_mx < 0-extra_width
2894 || full_my < 0-extra_height
2895 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2896 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2897 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2898 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2902 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2904 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2907 if(s->flags&CODEC_FLAG_GRAY) return;
2910 // chroma offset when predicting from a field of opposite parity
2911 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2912 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2914 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2915 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2918 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2919 src_cb= s->edge_emu_buffer;
2921 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2924 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2925 src_cr= s->edge_emu_buffer;
2927 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2930 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2931 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2932 int x_offset, int y_offset,
2933 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2934 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2935 int list0, int list1){
2936 MpegEncContext * const s = &h->s;
2937 qpel_mc_func *qpix_op= qpix_put;
2938 h264_chroma_mc_func chroma_op= chroma_put;
2940 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2941 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2942 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2943 x_offset += 8*s->mb_x;
2944 y_offset += 8*(s->mb_y >> MB_MBAFF);
2947 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2948 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2949 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2950 qpix_op, chroma_op);
2953 chroma_op= chroma_avg;
2957 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2958 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2959 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2960 qpix_op, chroma_op);
2964 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2965 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2966 int x_offset, int y_offset,
2967 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2968 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2969 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2970 int list0, int list1){
2971 MpegEncContext * const s = &h->s;
2973 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2974 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2975 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2976 x_offset += 8*s->mb_x;
2977 y_offset += 8*(s->mb_y >> MB_MBAFF);
2980 /* don't optimize for luma-only case, since B-frames usually
2981 * use implicit weights => chroma too. */
2982 uint8_t *tmp_cb = s->obmc_scratchpad;
2983 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2984 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2985 int refn0 = h->ref_cache[0][ scan8[n] ];
2986 int refn1 = h->ref_cache[1][ scan8[n] ];
2988 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2989 dest_y, dest_cb, dest_cr,
2990 x_offset, y_offset, qpix_put, chroma_put);
2991 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2992 tmp_y, tmp_cb, tmp_cr,
2993 x_offset, y_offset, qpix_put, chroma_put);
2995 if(h->use_weight == 2){
2996 int weight0 = h->implicit_weight[refn0][refn1];
2997 int weight1 = 64 - weight0;
2998 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2999 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
3000 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
3002 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
3003 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
3004 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
3005 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3006 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
3007 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
3008 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3009 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
3010 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
3013 int list = list1 ? 1 : 0;
3014 int refn = h->ref_cache[list][ scan8[n] ];
3015 Picture *ref= &h->ref_list[list][refn];
3016 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
3017 dest_y, dest_cb, dest_cr, x_offset, y_offset,
3018 qpix_put, chroma_put);
3020 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
3021 h->luma_weight[list][refn], h->luma_offset[list][refn]);
3022 if(h->use_weight_chroma){
3023 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3024 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
3025 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3026 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
3031 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
3032 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3033 int x_offset, int y_offset,
3034 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
3035 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
3036 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
3037 int list0, int list1){
3038 if((h->use_weight==2 && list0 && list1
3039 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
3040 || h->use_weight==1)
3041 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3042 x_offset, y_offset, qpix_put, chroma_put,
3043 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
3045 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3046 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
3049 static inline void prefetch_motion(H264Context *h, int list){
3050 /* fetch pixels for estimated mv 4 macroblocks ahead
3051 * optimized for 64byte cache lines */
3052 MpegEncContext * const s = &h->s;
3053 const int refn = h->ref_cache[list][scan8[0]];
3055 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
3056 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
3057 uint8_t **src= h->ref_list[list][refn].data;
3058 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
3059 s->dsp.prefetch(src[0]+off, s->linesize, 4);
3060 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3061 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
3065 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3066 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
3067 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
3068 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
3069 MpegEncContext * const s = &h->s;
3070 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
3071 const int mb_type= s->current_picture.mb_type[mb_xy];
3073 assert(IS_INTER(mb_type));
3075 prefetch_motion(h, 0);
3077 if(IS_16X16(mb_type)){
3078 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
3079 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
3080 &weight_op[0], &weight_avg[0],
3081 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3082 }else if(IS_16X8(mb_type)){
3083 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
3084 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3085 &weight_op[1], &weight_avg[1],
3086 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3087 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
3088 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3089 &weight_op[1], &weight_avg[1],
3090 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3091 }else if(IS_8X16(mb_type)){
3092 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
3093 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3094 &weight_op[2], &weight_avg[2],
3095 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3096 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
3097 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3098 &weight_op[2], &weight_avg[2],
3099 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3103 assert(IS_8X8(mb_type));
3106 const int sub_mb_type= h->sub_mb_type[i];
3108 int x_offset= (i&1)<<2;
3109 int y_offset= (i&2)<<1;
3111 if(IS_SUB_8X8(sub_mb_type)){
3112 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3113 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3114 &weight_op[3], &weight_avg[3],
3115 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3116 }else if(IS_SUB_8X4(sub_mb_type)){
3117 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3118 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3119 &weight_op[4], &weight_avg[4],
3120 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3121 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
3122 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3123 &weight_op[4], &weight_avg[4],
3124 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3125 }else if(IS_SUB_4X8(sub_mb_type)){
3126 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3127 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3128 &weight_op[5], &weight_avg[5],
3129 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3130 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
3131 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3132 &weight_op[5], &weight_avg[5],
3133 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3136 assert(IS_SUB_4X4(sub_mb_type));
3138 int sub_x_offset= x_offset + 2*(j&1);
3139 int sub_y_offset= y_offset + (j&2);
3140 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
3141 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3142 &weight_op[6], &weight_avg[6],
3143 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3149 prefetch_motion(h, 1);
3152 static void decode_init_vlc(H264Context *h){
3153 static int done = 0;
3159 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
3160 &chroma_dc_coeff_token_len [0], 1, 1,
3161 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
3164 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
3165 &coeff_token_len [i][0], 1, 1,
3166 &coeff_token_bits[i][0], 1, 1, 1);
3170 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
3171 &chroma_dc_total_zeros_len [i][0], 1, 1,
3172 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
3174 for(i=0; i<15; i++){
3175 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
3176 &total_zeros_len [i][0], 1, 1,
3177 &total_zeros_bits[i][0], 1, 1, 1);
3181 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
3182 &run_len [i][0], 1, 1,
3183 &run_bits[i][0], 1, 1, 1);
3185 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
3186 &run_len [6][0], 1, 1,
3187 &run_bits[6][0], 1, 1, 1);
3192 * Sets the intra prediction function pointers.
3194 static void init_pred_ptrs(H264Context *h){
3195 // MpegEncContext * const s = &h->s;
3197 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
3198 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
3199 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
3200 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
3201 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
3202 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
3203 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
3204 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
3205 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
3206 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
3207 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
3208 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
3210 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
3211 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
3212 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
3213 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
3214 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
3215 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
3216 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
3217 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
3218 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
3219 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
3220 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
3221 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
3223 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
3224 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
3225 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
3226 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
3227 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
3228 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
3229 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
3231 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
3232 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
3233 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
3234 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
3235 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
3236 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
3237 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
3240 static void free_tables(H264Context *h){
3241 av_freep(&h->intra4x4_pred_mode);
3242 av_freep(&h->chroma_pred_mode_table);
3243 av_freep(&h->cbp_table);
3244 av_freep(&h->mvd_table[0]);
3245 av_freep(&h->mvd_table[1]);
3246 av_freep(&h->direct_table);
3247 av_freep(&h->non_zero_count);
3248 av_freep(&h->slice_table_base);
3249 av_freep(&h->top_borders[1]);
3250 av_freep(&h->top_borders[0]);
3251 h->slice_table= NULL;
3253 av_freep(&h->mb2b_xy);
3254 av_freep(&h->mb2b8_xy);
3256 av_freep(&h->s.obmc_scratchpad);
3259 static void init_dequant8_coeff_table(H264Context *h){
3261 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
3262 h->dequant8_coeff[0] = h->dequant8_buffer[0];
3263 h->dequant8_coeff[1] = h->dequant8_buffer[1];
3265 for(i=0; i<2; i++ ){
3266 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
3267 h->dequant8_coeff[1] = h->dequant8_buffer[0];
3271 for(q=0; q<52; q++){
3272 int shift = div6[q];
3275 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
3276 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
3277 h->pps.scaling_matrix8[i][x]) << shift;
3282 static void init_dequant4_coeff_table(H264Context *h){
3284 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
3285 for(i=0; i<6; i++ ){
3286 h->dequant4_coeff[i] = h->dequant4_buffer[i];
3288 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
3289 h->dequant4_coeff[i] = h->dequant4_buffer[j];
3296 for(q=0; q<52; q++){
3297 int shift = div6[q] + 2;
3300 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
3301 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
3302 h->pps.scaling_matrix4[i][x]) << shift;
3307 static void init_dequant_tables(H264Context *h){
3309 init_dequant4_coeff_table(h);
3310 if(h->pps.transform_8x8_mode)
3311 init_dequant8_coeff_table(h);
3312 if(h->sps.transform_bypass){
3315 h->dequant4_coeff[i][0][x] = 1<<6;
3316 if(h->pps.transform_8x8_mode)
3319 h->dequant8_coeff[i][0][x] = 1<<6;
3326 * needs width/height
3328 static int alloc_tables(H264Context *h){
3329 MpegEncContext * const s = &h->s;
3330 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3333 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3335 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3336 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
3337 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3338 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3339 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3341 if( h->pps.cabac ) {
3342 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3343 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3344 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3345 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3348 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
3349 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
3351 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3352 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3353 for(y=0; y<s->mb_height; y++){
3354 for(x=0; x<s->mb_width; x++){
3355 const int mb_xy= x + y*s->mb_stride;
3356 const int b_xy = 4*x + 4*y*h->b_stride;
3357 const int b8_xy= 2*x + 2*y*h->b8_stride;
3359 h->mb2b_xy [mb_xy]= b_xy;
3360 h->mb2b8_xy[mb_xy]= b8_xy;
3364 s->obmc_scratchpad = NULL;
3366 if(!h->dequant4_coeff[0])
3367 init_dequant_tables(h);
3375 static void common_init(H264Context *h){
3376 MpegEncContext * const s = &h->s;
3378 s->width = s->avctx->width;
3379 s->height = s->avctx->height;
3380 s->codec_id= s->avctx->codec->id;
3384 h->dequant_coeff_pps= -1;
3385 s->unrestricted_mv=1;
3386 s->decode=1; //FIXME
3388 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3389 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3392 static int decode_init(AVCodecContext *avctx){
3393 H264Context *h= avctx->priv_data;
3394 MpegEncContext * const s = &h->s;
3396 MPV_decode_defaults(s);
3401 s->out_format = FMT_H264;
3402 s->workaround_bugs= avctx->workaround_bugs;
3405 // s->decode_mb= ff_h263_decode_mb;
3407 avctx->pix_fmt= PIX_FMT_YUV420P;
3411 if(avctx->extradata_size > 0 && avctx->extradata &&
3412 *(char *)avctx->extradata == 1){
3422 static int frame_start(H264Context *h){
3423 MpegEncContext * const s = &h->s;
3426 if(MPV_frame_start(s, s->avctx) < 0)
3428 ff_er_frame_start(s);
3430 assert(s->linesize && s->uvlinesize);
3432 for(i=0; i<16; i++){
3433 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3434 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3437 h->block_offset[16+i]=
3438 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3439 h->block_offset[24+16+i]=
3440 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3443 /* can't be in alloc_tables because linesize isn't known there.
3444 * FIXME: redo bipred weight to not require extra buffer? */
3445 if(!s->obmc_scratchpad)
3446 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
3448 /* some macroblocks will be accessed before they're available */
3450 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
3452 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3456 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3457 MpegEncContext * const s = &h->s;
3461 src_cb -= uvlinesize;
3462 src_cr -= uvlinesize;
3464 // There are two lines saved, the line above the the top macroblock of a pair,
3465 // and the line above the bottom macroblock
3466 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3467 for(i=1; i<17; i++){
3468 h->left_border[i]= src_y[15+i* linesize];
3471 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3472 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3474 if(!(s->flags&CODEC_FLAG_GRAY)){
3475 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3476 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3478 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3479 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3481 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3482 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3486 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3487 MpegEncContext * const s = &h->s;
3490 int deblock_left = (s->mb_x > 0);
3491 int deblock_top = (s->mb_y > 0);
3493 src_y -= linesize + 1;
3494 src_cb -= uvlinesize + 1;
3495 src_cr -= uvlinesize + 1;
3497 #define XCHG(a,b,t,xchg)\
3504 for(i = !deblock_top; i<17; i++){
3505 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3510 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3511 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3512 if(s->mb_x+1 < s->mb_width){
3513 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3517 if(!(s->flags&CODEC_FLAG_GRAY)){
3519 for(i = !deblock_top; i<9; i++){
3520 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3521 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3525 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3526 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3531 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3532 MpegEncContext * const s = &h->s;
3535 src_y -= 2 * linesize;
3536 src_cb -= 2 * uvlinesize;
3537 src_cr -= 2 * uvlinesize;
3539 // There are two lines saved, the line above the the top macroblock of a pair,
3540 // and the line above the bottom macroblock
3541 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3542 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3543 for(i=2; i<34; i++){
3544 h->left_border[i]= src_y[15+i* linesize];
3547 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3548 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3549 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3550 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3552 if(!(s->flags&CODEC_FLAG_GRAY)){
3553 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3554 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3555 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3556 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3557 for(i=2; i<18; i++){
3558 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3559 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3561 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3562 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3563 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3564 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3568 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3569 MpegEncContext * const s = &h->s;
3572 int deblock_left = (s->mb_x > 0);
3573 int deblock_top = (s->mb_y > 1);
3575 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3577 src_y -= 2 * linesize + 1;
3578 src_cb -= 2 * uvlinesize + 1;
3579 src_cr -= 2 * uvlinesize + 1;
3581 #define XCHG(a,b,t,xchg)\
3588 for(i = (!deblock_top)<<1; i<34; i++){
3589 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3594 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3595 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3596 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3597 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3598 if(s->mb_x+1 < s->mb_width){
3599 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3600 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3604 if(!(s->flags&CODEC_FLAG_GRAY)){
3606 for(i = (!deblock_top) << 1; i<18; i++){
3607 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3608 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3612 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3613 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3614 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3615 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3620 static void hl_decode_mb(H264Context *h){
3621 MpegEncContext * const s = &h->s;
3622 const int mb_x= s->mb_x;
3623 const int mb_y= s->mb_y;
3624 const int mb_xy= mb_x + mb_y*s->mb_stride;
3625 const int mb_type= s->current_picture.mb_type[mb_xy];
3626 uint8_t *dest_y, *dest_cb, *dest_cr;
3627 int linesize, uvlinesize /*dct_offset*/;
3629 int *block_offset = &h->block_offset[0];
3630 const unsigned int bottom = mb_y & 1;
3631 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3632 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3633 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3638 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3639 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3640 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3642 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
3643 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
3646 linesize = h->mb_linesize = s->linesize * 2;
3647 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3648 block_offset = &h->block_offset[24];
3649 if(mb_y&1){ //FIXME move out of this func?
3650 dest_y -= s->linesize*15;
3651 dest_cb-= s->uvlinesize*7;
3652 dest_cr-= s->uvlinesize*7;
3656 for(list=0; list<2; list++){
3657 if(!USES_LIST(mb_type, list))
3659 if(IS_16X16(mb_type)){
3660 int8_t *ref = &h->ref_cache[list][scan8[0]];
3661 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3663 for(i=0; i<16; i+=4){
3664 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3665 int ref = h->ref_cache[list][scan8[i]];
3667 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3673 linesize = h->mb_linesize = s->linesize;
3674 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3675 // dct_offset = s->linesize * 16;
3678 if(transform_bypass){
3680 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3681 }else if(IS_8x8DCT(mb_type)){
3682 idct_dc_add = s->dsp.h264_idct8_dc_add;
3683 idct_add = s->dsp.h264_idct8_add;
3685 idct_dc_add = s->dsp.h264_idct_dc_add;
3686 idct_add = s->dsp.h264_idct_add;
3689 if(FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3690 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3691 int mbt_y = mb_y&~1;
3692 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3693 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3694 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3695 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3698 if (IS_INTRA_PCM(mb_type)) {
3701 // The pixels are stored in h->mb array in the same order as levels,
3702 // copy them in output in the correct order.
3703 for(i=0; i<16; i++) {
3704 for (y=0; y<4; y++) {
3705 for (x=0; x<4; x++) {
3706 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3710 for(i=16; i<16+4; i++) {
3711 for (y=0; y<4; y++) {
3712 for (x=0; x<4; x++) {
3713 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3717 for(i=20; i<20+4; i++) {
3718 for (y=0; y<4; y++) {
3719 for (x=0; x<4; x++) {
3720 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3725 if(IS_INTRA(mb_type)){
3726 if(h->deblocking_filter && !FRAME_MBAFF)
3727 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3729 if(!(s->flags&CODEC_FLAG_GRAY)){
3730 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3731 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3734 if(IS_INTRA4x4(mb_type)){
3736 if(IS_8x8DCT(mb_type)){
3737 for(i=0; i<16; i+=4){
3738 uint8_t * const ptr= dest_y + block_offset[i];
3739 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3740 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3741 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3742 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3744 if(nnz == 1 && h->mb[i*16])
3745 idct_dc_add(ptr, h->mb + i*16, linesize);
3747 idct_add(ptr, h->mb + i*16, linesize);
3751 for(i=0; i<16; i++){
3752 uint8_t * const ptr= dest_y + block_offset[i];
3754 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3757 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3758 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3759 assert(mb_y || linesize <= block_offset[i]);
3760 if(!topright_avail){
3761 tr= ptr[3 - linesize]*0x01010101;
3762 topright= (uint8_t*) &tr;
3764 topright= ptr + 4 - linesize;
3768 h->pred4x4[ dir ](ptr, topright, linesize);
3769 nnz = h->non_zero_count_cache[ scan8[i] ];
3771 if(s->codec_id == CODEC_ID_H264){
3772 if(nnz == 1 && h->mb[i*16])
3773 idct_dc_add(ptr, h->mb + i*16, linesize);
3775 idct_add(ptr, h->mb + i*16, linesize);
3777 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3782 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3783 if(s->codec_id == CODEC_ID_H264){
3784 if(!transform_bypass)
3785 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3787 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3789 if(h->deblocking_filter && !FRAME_MBAFF)
3790 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3791 }else if(s->codec_id == CODEC_ID_H264){
3792 hl_motion(h, dest_y, dest_cb, dest_cr,
3793 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
3794 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
3795 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3799 if(!IS_INTRA4x4(mb_type)){
3800 if(s->codec_id == CODEC_ID_H264){
3801 if(IS_INTRA16x16(mb_type)){
3802 for(i=0; i<16; i++){
3803 if(h->non_zero_count_cache[ scan8[i] ])
3804 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3805 else if(h->mb[i*16])
3806 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3809 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3810 for(i=0; i<16; i+=di){
3811 int nnz = h->non_zero_count_cache[ scan8[i] ];
3813 if(nnz==1 && h->mb[i*16])
3814 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3816 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3821 for(i=0; i<16; i++){
3822 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3823 uint8_t * const ptr= dest_y + block_offset[i];
3824 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3830 if(!(s->flags&CODEC_FLAG_GRAY)){
3831 uint8_t *dest[2] = {dest_cb, dest_cr};
3832 if(transform_bypass){
3833 idct_add = idct_dc_add = s->dsp.add_pixels4;
3835 idct_add = s->dsp.h264_idct_add;
3836 idct_dc_add = s->dsp.h264_idct_dc_add;
3837 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3838 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3840 if(s->codec_id == CODEC_ID_H264){
3841 for(i=16; i<16+8; i++){
3842 if(h->non_zero_count_cache[ scan8[i] ])
3843 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3844 else if(h->mb[i*16])
3845 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3848 for(i=16; i<16+8; i++){
3849 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3850 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3851 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3857 if(h->deblocking_filter) {
3859 //FIXME try deblocking one mb at a time?
3860 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3861 const int mb_y = s->mb_y - 1;
3862 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3863 const int mb_xy= mb_x + mb_y*s->mb_stride;
3864 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3865 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3866 if (!bottom) return;
3867 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3868 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3869 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3871 if(IS_INTRA(mb_type_top | mb_type_bottom))
3872 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3874 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3878 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3879 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3880 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
3881 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3884 tprintf("call mbaff filter_mb\n");
3885 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3886 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3887 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3889 tprintf("call filter_mb\n");
3890 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3891 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3892 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3898 * fills the default_ref_list.
3900 static int fill_default_ref_list(H264Context *h){
3901 MpegEncContext * const s = &h->s;
3903 int smallest_poc_greater_than_current = -1;
3904 Picture sorted_short_ref[32];
3906 if(h->slice_type==B_TYPE){
3910 /* sort frame according to poc in B slice */
3911 for(out_i=0; out_i<h->short_ref_count; out_i++){
3913 int best_poc=INT_MAX;
3915 for(i=0; i<h->short_ref_count; i++){
3916 const int poc= h->short_ref[i]->poc;
3917 if(poc > limit && poc < best_poc){
3923 assert(best_i != INT_MIN);
3926 sorted_short_ref[out_i]= *h->short_ref[best_i];
3927 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3928 if (-1 == smallest_poc_greater_than_current) {
3929 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3930 smallest_poc_greater_than_current = out_i;
3936 if(s->picture_structure == PICT_FRAME){
3937 if(h->slice_type==B_TYPE){
3939 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3941 // find the largest poc
3942 for(list=0; list<2; list++){
3945 int step= list ? -1 : 1;
3947 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3948 while(j<0 || j>= h->short_ref_count){
3949 if(j != -99 && step == (list ? -1 : 1))
3952 j= smallest_poc_greater_than_current + (step>>1);
3954 if(sorted_short_ref[j].reference != 3) continue;
3955 h->default_ref_list[list][index ]= sorted_short_ref[j];
3956 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3959 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3960 if(h->long_ref[i] == NULL) continue;
3961 if(h->long_ref[i]->reference != 3) continue;
3963 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3964 h->default_ref_list[ list ][index++].pic_id= i;;
3967 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3968 // swap the two first elements of L1 when
3969 // L0 and L1 are identical
3970 Picture temp= h->default_ref_list[1][0];
3971 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3972 h->default_ref_list[1][1] = temp;
3975 if(index < h->ref_count[ list ])
3976 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3980 for(i=0; i<h->short_ref_count; i++){
3981 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3982 h->default_ref_list[0][index ]= *h->short_ref[i];
3983 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3985 for(i = 0; i < 16; i++){
3986 if(h->long_ref[i] == NULL) continue;
3987 if(h->long_ref[i]->reference != 3) continue;
3988 h->default_ref_list[0][index ]= *h->long_ref[i];
3989 h->default_ref_list[0][index++].pic_id= i;;
3991 if(index < h->ref_count[0])
3992 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3995 if(h->slice_type==B_TYPE){
3997 //FIXME second field balh
4001 for (i=0; i<h->ref_count[0]; i++) {
4002 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
4004 if(h->slice_type==B_TYPE){
4005 for (i=0; i<h->ref_count[1]; i++) {
4006 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
4013 static void print_short_term(H264Context *h);
4014 static void print_long_term(H264Context *h);
4016 static int decode_ref_pic_list_reordering(H264Context *h){
4017 MpegEncContext * const s = &h->s;
4020 print_short_term(h);
4022 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
4024 for(list=0; list<2; list++){
4025 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
4027 if(get_bits1(&s->gb)){
4028 int pred= h->curr_pic_num;
4030 for(index=0; ; index++){
4031 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
4034 Picture *ref = NULL;
4036 if(reordering_of_pic_nums_idc==3)
4039 if(index >= h->ref_count[list]){
4040 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
4044 if(reordering_of_pic_nums_idc<3){
4045 if(reordering_of_pic_nums_idc<2){
4046 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
4048 if(abs_diff_pic_num >= h->max_pic_num){
4049 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
4053 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
4054 else pred+= abs_diff_pic_num;
4055 pred &= h->max_pic_num - 1;
4057 for(i= h->short_ref_count-1; i>=0; i--){
4058 ref = h->short_ref[i];
4059 assert(ref->reference == 3);
4060 assert(!ref->long_ref);
4061 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
4065 ref->pic_id= ref->frame_num;
4067 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
4068 ref = h->long_ref[pic_id];
4069 ref->pic_id= pic_id;
4070 assert(ref->reference == 3);
4071 assert(ref->long_ref);
4076 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
4077 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
4079 for(i=index; i+1<h->ref_count[list]; i++){
4080 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
4083 for(; i > index; i--){
4084 h->ref_list[list][i]= h->ref_list[list][i-1];
4086 h->ref_list[list][index]= *ref;
4089 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
4095 if(h->slice_type!=B_TYPE) break;
4097 for(list=0; list<2; list++){
4098 for(index= 0; index < h->ref_count[list]; index++){
4099 if(!h->ref_list[list][index].data[0])
4100 h->ref_list[list][index]= s->current_picture;
4102 if(h->slice_type!=B_TYPE) break;
4105 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
4106 direct_dist_scale_factor(h);
4107 direct_ref_list_init(h);
4111 static void fill_mbaff_ref_list(H264Context *h){
4113 for(list=0; list<2; list++){
4114 for(i=0; i<h->ref_count[list]; i++){
4115 Picture *frame = &h->ref_list[list][i];
4116 Picture *field = &h->ref_list[list][16+2*i];
4119 field[0].linesize[j] <<= 1;
4120 field[1] = field[0];
4122 field[1].data[j] += frame->linesize[j];
4124 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
4125 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
4127 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
4128 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
4132 for(j=0; j<h->ref_count[1]; j++){
4133 for(i=0; i<h->ref_count[0]; i++)
4134 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
4135 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
4136 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
4140 static int pred_weight_table(H264Context *h){
4141 MpegEncContext * const s = &h->s;
4143 int luma_def, chroma_def;
4146 h->use_weight_chroma= 0;
4147 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
4148 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
4149 luma_def = 1<<h->luma_log2_weight_denom;
4150 chroma_def = 1<<h->chroma_log2_weight_denom;
4152 for(list=0; list<2; list++){
4153 for(i=0; i<h->ref_count[list]; i++){
4154 int luma_weight_flag, chroma_weight_flag;
4156 luma_weight_flag= get_bits1(&s->gb);
4157 if(luma_weight_flag){
4158 h->luma_weight[list][i]= get_se_golomb(&s->gb);
4159 h->luma_offset[list][i]= get_se_golomb(&s->gb);
4160 if( h->luma_weight[list][i] != luma_def
4161 || h->luma_offset[list][i] != 0)
4164 h->luma_weight[list][i]= luma_def;
4165 h->luma_offset[list][i]= 0;
4168 chroma_weight_flag= get_bits1(&s->gb);
4169 if(chroma_weight_flag){
4172 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
4173 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
4174 if( h->chroma_weight[list][i][j] != chroma_def
4175 || h->chroma_offset[list][i][j] != 0)
4176 h->use_weight_chroma= 1;
4181 h->chroma_weight[list][i][j]= chroma_def;
4182 h->chroma_offset[list][i][j]= 0;
4186 if(h->slice_type != B_TYPE) break;
4188 h->use_weight= h->use_weight || h->use_weight_chroma;
4192 static void implicit_weight_table(H264Context *h){
4193 MpegEncContext * const s = &h->s;
4195 int cur_poc = s->current_picture_ptr->poc;
4197 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
4198 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
4200 h->use_weight_chroma= 0;
4205 h->use_weight_chroma= 2;
4206 h->luma_log2_weight_denom= 5;
4207 h->chroma_log2_weight_denom= 5;
4209 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
4210 int poc0 = h->ref_list[0][ref0].poc;
4211 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
4212 int poc1 = h->ref_list[1][ref1].poc;
4213 int td = clip(poc1 - poc0, -128, 127);
4215 int tb = clip(cur_poc - poc0, -128, 127);
4216 int tx = (16384 + (FFABS(td) >> 1)) / td;
4217 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
4218 if(dist_scale_factor < -64 || dist_scale_factor > 128)
4219 h->implicit_weight[ref0][ref1] = 32;
4221 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
4223 h->implicit_weight[ref0][ref1] = 32;
4228 static inline void unreference_pic(H264Context *h, Picture *pic){
4231 if(pic == h->delayed_output_pic)
4234 for(i = 0; h->delayed_pic[i]; i++)
4235 if(pic == h->delayed_pic[i]){
4243 * instantaneous decoder refresh.
4245 static void idr(H264Context *h){
4248 for(i=0; i<16; i++){
4249 if (h->long_ref[i] != NULL) {
4250 unreference_pic(h, h->long_ref[i]);
4251 h->long_ref[i]= NULL;
4254 h->long_ref_count=0;
4256 for(i=0; i<h->short_ref_count; i++){
4257 unreference_pic(h, h->short_ref[i]);
4258 h->short_ref[i]= NULL;
4260 h->short_ref_count=0;
4263 /* forget old pics after a seek */
4264 static void flush_dpb(AVCodecContext *avctx){
4265 H264Context *h= avctx->priv_data;
4267 for(i=0; i<16; i++) {
4268 if(h->delayed_pic[i])
4269 h->delayed_pic[i]->reference= 0;
4270 h->delayed_pic[i]= NULL;
4272 if(h->delayed_output_pic)
4273 h->delayed_output_pic->reference= 0;
4274 h->delayed_output_pic= NULL;
4276 if(h->s.current_picture_ptr)
4277 h->s.current_picture_ptr->reference= 0;
4282 * @return the removed picture or NULL if an error occurs
4284 static Picture * remove_short(H264Context *h, int frame_num){
4285 MpegEncContext * const s = &h->s;
4288 if(s->avctx->debug&FF_DEBUG_MMCO)
4289 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
4291 for(i=0; i<h->short_ref_count; i++){
4292 Picture *pic= h->short_ref[i];
4293 if(s->avctx->debug&FF_DEBUG_MMCO)
4294 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
4295 if(pic->frame_num == frame_num){
4296 h->short_ref[i]= NULL;
4297 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
4298 h->short_ref_count--;
4307 * @return the removed picture or NULL if an error occurs
4309 static Picture * remove_long(H264Context *h, int i){
4312 pic= h->long_ref[i];
4313 h->long_ref[i]= NULL;
4314 if(pic) h->long_ref_count--;
4320 * print short term list
4322 static void print_short_term(H264Context *h) {
4324 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4325 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
4326 for(i=0; i<h->short_ref_count; i++){
4327 Picture *pic= h->short_ref[i];
4328 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4334 * print long term list
4336 static void print_long_term(H264Context *h) {
4338 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4339 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
4340 for(i = 0; i < 16; i++){
4341 Picture *pic= h->long_ref[i];
4343 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4350 * Executes the reference picture marking (memory management control operations).
4352 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
4353 MpegEncContext * const s = &h->s;
4355 int current_is_long=0;
4358 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
4359 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4361 for(i=0; i<mmco_count; i++){
4362 if(s->avctx->debug&FF_DEBUG_MMCO)
4363 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4365 switch(mmco[i].opcode){
4366 case MMCO_SHORT2UNUSED:
4367 pic= remove_short(h, mmco[i].short_frame_num);
4369 unreference_pic(h, pic);
4370 else if(s->avctx->debug&FF_DEBUG_MMCO)
4371 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4373 case MMCO_SHORT2LONG:
4374 pic= remove_long(h, mmco[i].long_index);
4375 if(pic) unreference_pic(h, pic);
4377 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4378 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4379 h->long_ref_count++;
4381 case MMCO_LONG2UNUSED:
4382 pic= remove_long(h, mmco[i].long_index);
4384 unreference_pic(h, pic);
4385 else if(s->avctx->debug&FF_DEBUG_MMCO)
4386 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4389 pic= remove_long(h, mmco[i].long_index);
4390 if(pic) unreference_pic(h, pic);
4392 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4393 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4394 h->long_ref_count++;
4398 case MMCO_SET_MAX_LONG:
4399 assert(mmco[i].long_index <= 16);
4400 // just remove the long term which index is greater than new max
4401 for(j = mmco[i].long_index; j<16; j++){
4402 pic = remove_long(h, j);
4403 if (pic) unreference_pic(h, pic);
4407 while(h->short_ref_count){
4408 pic= remove_short(h, h->short_ref[0]->frame_num);
4409 unreference_pic(h, pic);
4411 for(j = 0; j < 16; j++) {
4412 pic= remove_long(h, j);
4413 if(pic) unreference_pic(h, pic);
4420 if(!current_is_long){
4421 pic= remove_short(h, s->current_picture_ptr->frame_num);
4423 unreference_pic(h, pic);
4424 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4427 if(h->short_ref_count)
4428 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4430 h->short_ref[0]= s->current_picture_ptr;
4431 h->short_ref[0]->long_ref=0;
4432 h->short_ref_count++;
4435 print_short_term(h);
4440 static int decode_ref_pic_marking(H264Context *h){
4441 MpegEncContext * const s = &h->s;
4444 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4445 s->broken_link= get_bits1(&s->gb) -1;
4446 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4447 if(h->mmco[0].long_index == -1)
4450 h->mmco[0].opcode= MMCO_LONG;
4454 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4455 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4456 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4458 h->mmco[i].opcode= opcode;
4459 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4460 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4461 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4462 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4466 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4467 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4468 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4469 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4474 if(opcode > MMCO_LONG){
4475 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4478 if(opcode == MMCO_END)
4483 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4485 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4486 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4487 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4497 static int init_poc(H264Context *h){
4498 MpegEncContext * const s = &h->s;
4499 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4502 if(h->nal_unit_type == NAL_IDR_SLICE){
4503 h->frame_num_offset= 0;
4505 if(h->frame_num < h->prev_frame_num)
4506 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4508 h->frame_num_offset= h->prev_frame_num_offset;
4511 if(h->sps.poc_type==0){
4512 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4514 if(h->nal_unit_type == NAL_IDR_SLICE){
4519 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4520 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4521 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4522 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4524 h->poc_msb = h->prev_poc_msb;
4525 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4527 field_poc[1] = h->poc_msb + h->poc_lsb;
4528 if(s->picture_structure == PICT_FRAME)
4529 field_poc[1] += h->delta_poc_bottom;
4530 }else if(h->sps.poc_type==1){
4531 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4534 if(h->sps.poc_cycle_length != 0)
4535 abs_frame_num = h->frame_num_offset + h->frame_num;
4539 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4542 expected_delta_per_poc_cycle = 0;
4543 for(i=0; i < h->sps.poc_cycle_length; i++)
4544 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4546 if(abs_frame_num > 0){
4547 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4548 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4550 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4551 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4552 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4556 if(h->nal_ref_idc == 0)
4557 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4559 field_poc[0] = expectedpoc + h->delta_poc[0];
4560 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4562 if(s->picture_structure == PICT_FRAME)
4563 field_poc[1] += h->delta_poc[1];
4566 if(h->nal_unit_type == NAL_IDR_SLICE){
4569 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4570 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4576 if(s->picture_structure != PICT_BOTTOM_FIELD)
4577 s->current_picture_ptr->field_poc[0]= field_poc[0];
4578 if(s->picture_structure != PICT_TOP_FIELD)
4579 s->current_picture_ptr->field_poc[1]= field_poc[1];
4580 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4581 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4587 * decodes a slice header.
4588 * this will allso call MPV_common_init() and frame_start() as needed
4590 static int decode_slice_header(H264Context *h){
4591 MpegEncContext * const s = &h->s;
4592 int first_mb_in_slice, pps_id;
4593 int num_ref_idx_active_override_flag;
4594 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4596 int default_ref_list_done = 0;
4598 s->current_picture.reference= h->nal_ref_idc != 0;
4599 s->dropable= h->nal_ref_idc == 0;
4601 first_mb_in_slice= get_ue_golomb(&s->gb);
4603 slice_type= get_ue_golomb(&s->gb);
4605 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4610 h->slice_type_fixed=1;
4612 h->slice_type_fixed=0;
4614 slice_type= slice_type_map[ slice_type ];
4615 if (slice_type == I_TYPE
4616 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4617 default_ref_list_done = 1;
4619 h->slice_type= slice_type;
4621 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4623 pps_id= get_ue_golomb(&s->gb);
4625 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4628 h->pps= h->pps_buffer[pps_id];
4629 if(h->pps.slice_group_count == 0){
4630 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4634 h->sps= h->sps_buffer[ h->pps.sps_id ];
4635 if(h->sps.log2_max_frame_num == 0){
4636 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4640 if(h->dequant_coeff_pps != pps_id){
4641 h->dequant_coeff_pps = pps_id;
4642 init_dequant_tables(h);
4645 s->mb_width= h->sps.mb_width;
4646 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4648 h->b_stride= s->mb_width*4;
4649 h->b8_stride= s->mb_width*2;
4651 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4652 if(h->sps.frame_mbs_only_flag)
4653 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4655 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4657 if (s->context_initialized
4658 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4662 if (!s->context_initialized) {
4663 if (MPV_common_init(s) < 0)
4666 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4667 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4668 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4671 for(i=0; i<16; i++){
4672 #define T(x) (x>>2) | ((x<<2) & 0xF)
4673 h->zigzag_scan[i] = T(zigzag_scan[i]);
4674 h-> field_scan[i] = T( field_scan[i]);
4678 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4679 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4680 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4681 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4682 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4685 for(i=0; i<64; i++){
4686 #define T(x) (x>>3) | ((x&7)<<3)
4687 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4688 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4689 h->field_scan8x8[i] = T(field_scan8x8[i]);
4690 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4694 if(h->sps.transform_bypass){ //FIXME same ugly
4695 h->zigzag_scan_q0 = zigzag_scan;
4696 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4697 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4698 h->field_scan_q0 = field_scan;
4699 h->field_scan8x8_q0 = field_scan8x8;
4700 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4702 h->zigzag_scan_q0 = h->zigzag_scan;
4703 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4704 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4705 h->field_scan_q0 = h->field_scan;
4706 h->field_scan8x8_q0 = h->field_scan8x8;
4707 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4712 s->avctx->width = s->width;
4713 s->avctx->height = s->height;
4714 s->avctx->sample_aspect_ratio= h->sps.sar;
4715 if(!s->avctx->sample_aspect_ratio.den)
4716 s->avctx->sample_aspect_ratio.den = 1;
4718 if(h->sps.timing_info_present_flag){
4719 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4720 if(h->x264_build > 0 && h->x264_build < 44)
4721 s->avctx->time_base.den *= 2;
4722 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4723 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4727 if(h->slice_num == 0){
4728 if(frame_start(h) < 0)
4732 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4733 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4736 h->mb_aff_frame = 0;
4737 if(h->sps.frame_mbs_only_flag){
4738 s->picture_structure= PICT_FRAME;
4740 if(get_bits1(&s->gb)) { //field_pic_flag
4741 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4742 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4744 s->picture_structure= PICT_FRAME;
4745 h->mb_aff_frame = h->sps.mb_aff;
4749 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4750 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4751 if(s->mb_y >= s->mb_height){
4755 if(s->picture_structure==PICT_FRAME){
4756 h->curr_pic_num= h->frame_num;
4757 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4759 h->curr_pic_num= 2*h->frame_num;
4760 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4763 if(h->nal_unit_type == NAL_IDR_SLICE){
4764 get_ue_golomb(&s->gb); /* idr_pic_id */
4767 if(h->sps.poc_type==0){
4768 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4770 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4771 h->delta_poc_bottom= get_se_golomb(&s->gb);
4775 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4776 h->delta_poc[0]= get_se_golomb(&s->gb);
4778 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4779 h->delta_poc[1]= get_se_golomb(&s->gb);
4784 if(h->pps.redundant_pic_cnt_present){
4785 h->redundant_pic_count= get_ue_golomb(&s->gb);
4788 //set defaults, might be overriden a few line later
4789 h->ref_count[0]= h->pps.ref_count[0];
4790 h->ref_count[1]= h->pps.ref_count[1];
4792 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4793 if(h->slice_type == B_TYPE){
4794 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4795 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4796 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4798 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4800 if(num_ref_idx_active_override_flag){
4801 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4802 if(h->slice_type==B_TYPE)
4803 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4805 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4806 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4812 if(!default_ref_list_done){
4813 fill_default_ref_list(h);
4816 if(decode_ref_pic_list_reordering(h) < 0)
4819 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4820 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4821 pred_weight_table(h);
4822 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4823 implicit_weight_table(h);
4827 if(s->current_picture.reference)
4828 decode_ref_pic_marking(h);
4831 fill_mbaff_ref_list(h);
4833 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4834 h->cabac_init_idc = get_ue_golomb(&s->gb);
4836 h->last_qscale_diff = 0;
4837 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4838 if(s->qscale<0 || s->qscale>51){
4839 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4842 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4843 //FIXME qscale / qp ... stuff
4844 if(h->slice_type == SP_TYPE){
4845 get_bits1(&s->gb); /* sp_for_switch_flag */
4847 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4848 get_se_golomb(&s->gb); /* slice_qs_delta */
4851 h->deblocking_filter = 1;
4852 h->slice_alpha_c0_offset = 0;
4853 h->slice_beta_offset = 0;
4854 if( h->pps.deblocking_filter_parameters_present ) {
4855 h->deblocking_filter= get_ue_golomb(&s->gb);
4856 if(h->deblocking_filter < 2)
4857 h->deblocking_filter^= 1; // 1<->0
4859 if( h->deblocking_filter ) {
4860 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4861 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4864 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4865 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4866 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4867 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4868 h->deblocking_filter= 0;
4871 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4872 slice_group_change_cycle= get_bits(&s->gb, ?);
4877 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4878 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4880 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4881 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4883 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4885 av_get_pict_type_char(h->slice_type),
4886 pps_id, h->frame_num,
4887 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4888 h->ref_count[0], h->ref_count[1],
4890 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4892 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4896 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
4897 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4898 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4900 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4901 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4910 static inline int get_level_prefix(GetBitContext *gb){
4914 OPEN_READER(re, gb);
4915 UPDATE_CACHE(re, gb);
4916 buf=GET_CACHE(re, gb);
4918 log= 32 - av_log2(buf);
4920 print_bin(buf>>(32-log), log);
4921 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4924 LAST_SKIP_BITS(re, gb, log);
4925 CLOSE_READER(re, gb);
4930 static inline int get_dct8x8_allowed(H264Context *h){
4933 if(!IS_SUB_8X8(h->sub_mb_type[i])
4934 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4941 * decodes a residual block.
4942 * @param n block index
4943 * @param scantable scantable
4944 * @param max_coeff number of coefficients in the block
4945 * @return <0 if an error occured
4947 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4948 MpegEncContext * const s = &h->s;
4949 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4951 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4953 //FIXME put trailing_onex into the context
4955 if(n == CHROMA_DC_BLOCK_INDEX){
4956 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4957 total_coeff= coeff_token>>2;
4959 if(n == LUMA_DC_BLOCK_INDEX){
4960 total_coeff= pred_non_zero_count(h, 0);
4961 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4962 total_coeff= coeff_token>>2;
4964 total_coeff= pred_non_zero_count(h, n);
4965 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4966 total_coeff= coeff_token>>2;
4967 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4971 //FIXME set last_non_zero?
4976 trailing_ones= coeff_token&3;
4977 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4978 assert(total_coeff<=16);
4980 for(i=0; i<trailing_ones; i++){
4981 level[i]= 1 - 2*get_bits1(gb);
4985 int level_code, mask;
4986 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4987 int prefix= get_level_prefix(gb);
4989 //first coefficient has suffix_length equal to 0 or 1
4990 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4992 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4994 level_code= (prefix<<suffix_length); //part
4995 }else if(prefix==14){
4997 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4999 level_code= prefix + get_bits(gb, 4); //part
5000 }else if(prefix==15){
5001 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
5002 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
5004 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
5008 if(trailing_ones < 3) level_code += 2;
5013 mask= -(level_code&1);
5014 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5017 //remaining coefficients have suffix_length > 0
5018 for(;i<total_coeff;i++) {
5019 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
5020 prefix = get_level_prefix(gb);
5022 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
5023 }else if(prefix==15){
5024 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
5026 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
5029 mask= -(level_code&1);
5030 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5031 if(level_code > suffix_limit[suffix_length])
5036 if(total_coeff == max_coeff)
5039 if(n == CHROMA_DC_BLOCK_INDEX)
5040 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
5042 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
5045 coeff_num = zeros_left + total_coeff - 1;
5046 j = scantable[coeff_num];
5048 block[j] = level[0];
5049 for(i=1;i<total_coeff;i++) {
5052 else if(zeros_left < 7){
5053 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5055 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5057 zeros_left -= run_before;
5058 coeff_num -= 1 + run_before;
5059 j= scantable[ coeff_num ];
5064 block[j] = (level[0] * qmul[j] + 32)>>6;
5065 for(i=1;i<total_coeff;i++) {
5068 else if(zeros_left < 7){
5069 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5071 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5073 zeros_left -= run_before;
5074 coeff_num -= 1 + run_before;
5075 j= scantable[ coeff_num ];
5077 block[j]= (level[i] * qmul[j] + 32)>>6;
5082 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
5089 static void predict_field_decoding_flag(H264Context *h){
5090 MpegEncContext * const s = &h->s;
5091 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5092 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
5093 ? s->current_picture.mb_type[mb_xy-1]
5094 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
5095 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
5097 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
5101 * decodes a P_SKIP or B_SKIP macroblock
5103 static void decode_mb_skip(H264Context *h){
5104 MpegEncContext * const s = &h->s;
5105 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5108 memset(h->non_zero_count[mb_xy], 0, 16);
5109 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
5112 mb_type|= MB_TYPE_INTERLACED;
5114 if( h->slice_type == B_TYPE )
5116 // just for fill_caches. pred_direct_motion will set the real mb_type
5117 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
5119 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5120 pred_direct_motion(h, &mb_type);
5121 mb_type|= MB_TYPE_SKIP;
5126 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
5128 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5129 pred_pskip_motion(h, &mx, &my);
5130 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
5131 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
5134 write_back_motion(h, mb_type);
5135 s->current_picture.mb_type[mb_xy]= mb_type;
5136 s->current_picture.qscale_table[mb_xy]= s->qscale;
5137 h->slice_table[ mb_xy ]= h->slice_num;
5138 h->prev_mb_skipped= 1;
5142 * decodes a macroblock
5143 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5145 static int decode_mb_cavlc(H264Context *h){
5146 MpegEncContext * const s = &h->s;
5147 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5148 int mb_type, partition_count, cbp;
5149 int dct8x8_allowed= h->pps.transform_8x8_mode;
5151 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
5153 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5154 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
5156 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
5157 if(s->mb_skip_run==-1)
5158 s->mb_skip_run= get_ue_golomb(&s->gb);
5160 if (s->mb_skip_run--) {
5161 if(FRAME_MBAFF && (s->mb_y&1) == 0){
5162 if(s->mb_skip_run==0)
5163 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5165 predict_field_decoding_flag(h);
5172 if( (s->mb_y&1) == 0 )
5173 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5175 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5177 h->prev_mb_skipped= 0;
5179 mb_type= get_ue_golomb(&s->gb);
5180 if(h->slice_type == B_TYPE){
5182 partition_count= b_mb_type_info[mb_type].partition_count;
5183 mb_type= b_mb_type_info[mb_type].type;
5186 goto decode_intra_mb;
5188 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
5190 partition_count= p_mb_type_info[mb_type].partition_count;
5191 mb_type= p_mb_type_info[mb_type].type;
5194 goto decode_intra_mb;
5197 assert(h->slice_type == I_TYPE);
5200 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
5204 cbp= i_mb_type_info[mb_type].cbp;
5205 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5206 mb_type= i_mb_type_info[mb_type].type;
5210 mb_type |= MB_TYPE_INTERLACED;
5212 h->slice_table[ mb_xy ]= h->slice_num;
5214 if(IS_INTRA_PCM(mb_type)){
5217 // we assume these blocks are very rare so we dont optimize it
5218 align_get_bits(&s->gb);
5220 // The pixels are stored in the same order as levels in h->mb array.
5221 for(y=0; y<16; y++){
5222 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5223 for(x=0; x<16; x++){
5224 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5225 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
5229 const int index= 256 + 4*(y&3) + 32*(y>>2);
5231 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5232 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5236 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5238 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5239 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5243 // In deblocking, the quantizer is 0
5244 s->current_picture.qscale_table[mb_xy]= 0;
5245 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5246 // All coeffs are present
5247 memset(h->non_zero_count[mb_xy], 16, 16);
5249 s->current_picture.mb_type[mb_xy]= mb_type;
5254 h->ref_count[0] <<= 1;
5255 h->ref_count[1] <<= 1;
5258 fill_caches(h, mb_type, 0);
5261 if(IS_INTRA(mb_type)){
5262 // init_top_left_availability(h);
5263 if(IS_INTRA4x4(mb_type)){
5266 if(dct8x8_allowed && get_bits1(&s->gb)){
5267 mb_type |= MB_TYPE_8x8DCT;
5271 // fill_intra4x4_pred_table(h);
5272 for(i=0; i<16; i+=di){
5273 int mode= pred_intra_mode(h, i);
5275 if(!get_bits1(&s->gb)){
5276 const int rem_mode= get_bits(&s->gb, 3);
5277 mode = rem_mode + (rem_mode >= mode);
5281 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5283 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
5285 write_back_intra_pred_mode(h);
5286 if( check_intra4x4_pred_mode(h) < 0)
5289 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
5290 if(h->intra16x16_pred_mode < 0)
5293 h->chroma_pred_mode= get_ue_golomb(&s->gb);
5295 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
5296 if(h->chroma_pred_mode < 0)
5298 }else if(partition_count==4){
5299 int i, j, sub_partition_count[4], list, ref[2][4];
5301 if(h->slice_type == B_TYPE){
5303 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5304 if(h->sub_mb_type[i] >=13){
5305 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5308 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5309 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5311 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5312 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5313 pred_direct_motion(h, &mb_type);
5314 h->ref_cache[0][scan8[4]] =
5315 h->ref_cache[1][scan8[4]] =
5316 h->ref_cache[0][scan8[12]] =
5317 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5320 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
5322 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5323 if(h->sub_mb_type[i] >=4){
5324 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5327 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5328 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5332 for(list=0; list<2; list++){
5333 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5334 if(ref_count == 0) continue;
5336 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5337 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5338 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
5347 dct8x8_allowed = get_dct8x8_allowed(h);
5349 for(list=0; list<2; list++){
5350 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5351 if(ref_count == 0) continue;
5354 if(IS_DIRECT(h->sub_mb_type[i])) {
5355 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
5358 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
5359 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5361 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5362 const int sub_mb_type= h->sub_mb_type[i];
5363 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5364 for(j=0; j<sub_partition_count[i]; j++){
5366 const int index= 4*i + block_width*j;
5367 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5368 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
5369 mx += get_se_golomb(&s->gb);
5370 my += get_se_golomb(&s->gb);
5371 tprintf("final mv:%d %d\n", mx, my);
5373 if(IS_SUB_8X8(sub_mb_type)){
5374 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5375 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5376 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5377 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5378 }else if(IS_SUB_8X4(sub_mb_type)){
5379 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5380 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5381 }else if(IS_SUB_4X8(sub_mb_type)){
5382 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5383 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5385 assert(IS_SUB_4X4(sub_mb_type));
5386 mv_cache[ 0 ][0]= mx;
5387 mv_cache[ 0 ][1]= my;
5391 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5397 }else if(IS_DIRECT(mb_type)){
5398 pred_direct_motion(h, &mb_type);
5399 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5401 int list, mx, my, i;
5402 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5403 if(IS_16X16(mb_type)){
5404 for(list=0; list<2; list++){
5405 if(h->ref_count[list]>0){
5406 if(IS_DIR(mb_type, 0, list)){
5407 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5408 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5410 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
5413 for(list=0; list<2; list++){
5414 if(IS_DIR(mb_type, 0, list)){
5415 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5416 mx += get_se_golomb(&s->gb);
5417 my += get_se_golomb(&s->gb);
5418 tprintf("final mv:%d %d\n", mx, my);
5420 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5422 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5425 else if(IS_16X8(mb_type)){
5426 for(list=0; list<2; list++){
5427 if(h->ref_count[list]>0){
5429 if(IS_DIR(mb_type, i, list)){
5430 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5431 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5433 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5437 for(list=0; list<2; list++){
5439 if(IS_DIR(mb_type, i, list)){
5440 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5441 mx += get_se_golomb(&s->gb);
5442 my += get_se_golomb(&s->gb);
5443 tprintf("final mv:%d %d\n", mx, my);
5445 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5447 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5451 assert(IS_8X16(mb_type));
5452 for(list=0; list<2; list++){
5453 if(h->ref_count[list]>0){
5455 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5456 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5457 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5459 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5463 for(list=0; list<2; list++){
5465 if(IS_DIR(mb_type, i, list)){
5466 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5467 mx += get_se_golomb(&s->gb);
5468 my += get_se_golomb(&s->gb);
5469 tprintf("final mv:%d %d\n", mx, my);
5471 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5473 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5479 if(IS_INTER(mb_type))
5480 write_back_motion(h, mb_type);
5482 if(!IS_INTRA16x16(mb_type)){
5483 cbp= get_ue_golomb(&s->gb);
5485 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5489 if(IS_INTRA4x4(mb_type))
5490 cbp= golomb_to_intra4x4_cbp[cbp];
5492 cbp= golomb_to_inter_cbp[cbp];
5496 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5497 if(get_bits1(&s->gb))
5498 mb_type |= MB_TYPE_8x8DCT;
5500 s->current_picture.mb_type[mb_xy]= mb_type;
5502 if(cbp || IS_INTRA16x16(mb_type)){
5503 int i8x8, i4x4, chroma_idx;
5504 int chroma_qp, dquant;
5505 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5506 const uint8_t *scan, *scan8x8, *dc_scan;
5508 // fill_non_zero_count_cache(h);
5510 if(IS_INTERLACED(mb_type)){
5511 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5512 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5513 dc_scan= luma_dc_field_scan;
5515 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5516 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5517 dc_scan= luma_dc_zigzag_scan;
5520 dquant= get_se_golomb(&s->gb);
5522 if( dquant > 25 || dquant < -26 ){
5523 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5527 s->qscale += dquant;
5528 if(((unsigned)s->qscale) > 51){
5529 if(s->qscale<0) s->qscale+= 52;
5530 else s->qscale-= 52;
5533 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5534 if(IS_INTRA16x16(mb_type)){
5535 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5536 return -1; //FIXME continue if partitioned and other return -1 too
5539 assert((cbp&15) == 0 || (cbp&15) == 15);
5542 for(i8x8=0; i8x8<4; i8x8++){
5543 for(i4x4=0; i4x4<4; i4x4++){
5544 const int index= i4x4 + 4*i8x8;
5545 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5551 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5554 for(i8x8=0; i8x8<4; i8x8++){
5555 if(cbp & (1<<i8x8)){
5556 if(IS_8x8DCT(mb_type)){
5557 DCTELEM *buf = &h->mb[64*i8x8];
5559 for(i4x4=0; i4x4<4; i4x4++){
5560 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5561 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5564 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5565 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5567 for(i4x4=0; i4x4<4; i4x4++){
5568 const int index= i4x4 + 4*i8x8;
5570 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5576 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5577 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5583 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5584 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5590 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5591 for(i4x4=0; i4x4<4; i4x4++){
5592 const int index= 16 + 4*chroma_idx + i4x4;
5593 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5599 uint8_t * const nnz= &h->non_zero_count_cache[0];
5600 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5601 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5604 uint8_t * const nnz= &h->non_zero_count_cache[0];
5605 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5606 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5607 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5609 s->current_picture.qscale_table[mb_xy]= s->qscale;
5610 write_back_non_zero_count(h);
5613 h->ref_count[0] >>= 1;
5614 h->ref_count[1] >>= 1;
5620 static int decode_cabac_field_decoding_flag(H264Context *h) {
5621 MpegEncContext * const s = &h->s;
5622 const int mb_x = s->mb_x;
5623 const int mb_y = s->mb_y & ~1;
5624 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5625 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5627 unsigned int ctx = 0;
5629 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5632 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5636 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5639 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5640 uint8_t *state= &h->cabac_state[ctx_base];
5644 MpegEncContext * const s = &h->s;
5645 const int mba_xy = h->left_mb_xy[0];
5646 const int mbb_xy = h->top_mb_xy;
5648 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5650 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5652 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5653 return 0; /* I4x4 */
5656 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5657 return 0; /* I4x4 */
5660 if( get_cabac_terminate( &h->cabac ) )
5661 return 25; /* PCM */
5663 mb_type = 1; /* I16x16 */
5664 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5665 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5666 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5667 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5668 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5672 static int decode_cabac_mb_type( H264Context *h ) {
5673 MpegEncContext * const s = &h->s;
5675 if( h->slice_type == I_TYPE ) {
5676 return decode_cabac_intra_mb_type(h, 3, 1);
5677 } else if( h->slice_type == P_TYPE ) {
5678 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5680 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5681 /* P_L0_D16x16, P_8x8 */
5682 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5684 /* P_L0_D8x16, P_L0_D16x8 */
5685 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5688 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5690 } else if( h->slice_type == B_TYPE ) {
5691 const int mba_xy = h->left_mb_xy[0];
5692 const int mbb_xy = h->top_mb_xy;
5696 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5698 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5701 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5702 return 0; /* B_Direct_16x16 */
5704 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5705 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5708 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5709 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5710 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5711 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5713 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5714 else if( bits == 13 ) {
5715 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5716 } else if( bits == 14 )
5717 return 11; /* B_L1_L0_8x16 */
5718 else if( bits == 15 )
5719 return 22; /* B_8x8 */
5721 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5722 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5724 /* TODO SI/SP frames? */
5729 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5730 MpegEncContext * const s = &h->s;
5734 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5735 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5738 && h->slice_table[mba_xy] == h->slice_num
5739 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5740 mba_xy += s->mb_stride;
5742 mbb_xy = mb_xy - s->mb_stride;
5744 && h->slice_table[mbb_xy] == h->slice_num
5745 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5746 mbb_xy -= s->mb_stride;
5748 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5750 int mb_xy = mb_x + mb_y*s->mb_stride;
5752 mbb_xy = mb_xy - s->mb_stride;
5755 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5757 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5760 if( h->slice_type == B_TYPE )
5762 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5765 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5768 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5771 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5772 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5773 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5775 if( mode >= pred_mode )
5781 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5782 const int mba_xy = h->left_mb_xy[0];
5783 const int mbb_xy = h->top_mb_xy;
5787 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5788 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5791 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5794 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5797 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5799 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5805 static const uint8_t block_idx_x[16] = {
5806 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5808 static const uint8_t block_idx_y[16] = {
5809 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5811 static const uint8_t block_idx_xy[4][4] = {
5818 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5823 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5825 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5828 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5833 x = block_idx_x[4*i8x8];
5834 y = block_idx_y[4*i8x8];
5838 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5839 cbp_a = h->left_cbp;
5840 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5846 /* No need to test for skip as we put 0 for skip block */
5847 /* No need to test for IPCM as we put 1 for IPCM block */
5849 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5850 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5855 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5856 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5860 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5866 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5870 cbp_a = (h->left_cbp>>4)&0x03;
5871 cbp_b = (h-> top_cbp>>4)&0x03;
5874 if( cbp_a > 0 ) ctx++;
5875 if( cbp_b > 0 ) ctx += 2;
5876 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5880 if( cbp_a == 2 ) ctx++;
5881 if( cbp_b == 2 ) ctx += 2;
5882 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5884 static int decode_cabac_mb_dqp( H264Context *h) {
5885 MpegEncContext * const s = &h->s;
5891 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5893 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5895 if( h->last_qscale_diff != 0 )
5898 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5904 if(val > 102) //prevent infinite loop
5911 return -(val + 1)/2;
5913 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5914 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5916 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5918 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5922 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5924 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5925 return 0; /* B_Direct_8x8 */
5926 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5927 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5929 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5930 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5931 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5934 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5935 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5939 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5940 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5943 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5944 int refa = h->ref_cache[list][scan8[n] - 1];
5945 int refb = h->ref_cache[list][scan8[n] - 8];
5949 if( h->slice_type == B_TYPE) {
5950 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5952 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5961 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5971 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5972 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5973 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5974 int ctxbase = (l == 0) ? 40 : 47;
5979 else if( amvd > 32 )
5984 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5989 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5997 while( get_cabac_bypass( &h->cabac ) ) {
6002 if( get_cabac_bypass( &h->cabac ) )
6006 return get_cabac_bypass_sign( &h->cabac, -mvd );
6009 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
6014 nza = h->left_cbp&0x100;
6015 nzb = h-> top_cbp&0x100;
6016 } else if( cat == 1 || cat == 2 ) {
6017 nza = h->non_zero_count_cache[scan8[idx] - 1];
6018 nzb = h->non_zero_count_cache[scan8[idx] - 8];
6019 } else if( cat == 3 ) {
6020 nza = (h->left_cbp>>(6+idx))&0x01;
6021 nzb = (h-> top_cbp>>(6+idx))&0x01;
6024 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
6025 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
6034 return ctx + 4 * cat;
6037 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
6038 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
6039 static const int significant_coeff_flag_offset[2][6] = {
6040 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
6041 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
6043 static const int last_coeff_flag_offset[2][6] = {
6044 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
6045 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
6047 static const int coeff_abs_level_m1_offset[6] = {
6048 227+0, 227+10, 227+20, 227+30, 227+39, 426
6050 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
6051 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
6052 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
6053 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
6054 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
6055 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
6056 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
6057 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
6058 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
6060 static const uint8_t last_coeff_flag_offset_8x8[63] = {
6061 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6062 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
6063 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
6064 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
6070 int coeff_count = 0;
6073 int abslevelgt1 = 0;
6075 uint8_t *significant_coeff_ctx_base;
6076 uint8_t *last_coeff_ctx_base;
6077 uint8_t *abs_level_m1_ctx_base;
6080 #define CABAC_ON_STACK
6082 #ifdef CABAC_ON_STACK
6085 cc.range = h->cabac.range;
6086 cc.low = h->cabac.low;
6087 cc.bytestream= h->cabac.bytestream;
6089 #define CC &h->cabac
6093 /* cat: 0-> DC 16x16 n = 0
6094 * 1-> AC 16x16 n = luma4x4idx
6095 * 2-> Luma4x4 n = luma4x4idx
6096 * 3-> DC Chroma n = iCbCr
6097 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
6098 * 5-> Luma8x8 n = 4 * luma8x8idx
6101 /* read coded block flag */
6103 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
6104 if( cat == 1 || cat == 2 )
6105 h->non_zero_count_cache[scan8[n]] = 0;
6107 h->non_zero_count_cache[scan8[16+n]] = 0;
6108 #ifdef CABAC_ON_STACK
6109 h->cabac.range = cc.range ;
6110 h->cabac.low = cc.low ;
6111 h->cabac.bytestream= cc.bytestream;
6117 significant_coeff_ctx_base = h->cabac_state
6118 + significant_coeff_flag_offset[MB_FIELD][cat];
6119 last_coeff_ctx_base = h->cabac_state
6120 + last_coeff_flag_offset[MB_FIELD][cat];
6121 abs_level_m1_ctx_base = h->cabac_state
6122 + coeff_abs_level_m1_offset[cat];
6125 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
6126 for(last= 0; last < coefs; last++) { \
6127 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
6128 if( get_cabac( CC, sig_ctx )) { \
6129 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
6130 index[coeff_count++] = last; \
6131 if( get_cabac( CC, last_ctx ) ) { \
6137 if( last == max_coeff -1 ) {\
6138 index[coeff_count++] = last;\
6140 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
6141 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
6144 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
6146 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
6149 assert(coeff_count > 0);
6152 h->cbp_table[mb_xy] |= 0x100;
6153 else if( cat == 1 || cat == 2 )
6154 h->non_zero_count_cache[scan8[n]] = coeff_count;
6156 h->cbp_table[mb_xy] |= 0x40 << n;
6158 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
6161 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
6164 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
6165 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
6166 int j= scantable[index[coeff_count]];
6168 if( get_cabac( CC, ctx ) == 0 ) {
6170 block[j] = get_cabac_bypass_sign( CC, -1);
6172 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
6178 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
6179 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
6183 if( coeff_abs >= 15 ) {
6185 while( get_cabac_bypass( CC ) ) {
6191 coeff_abs += coeff_abs + get_cabac_bypass( CC );
6197 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
6198 else block[j] = coeff_abs;
6200 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
6201 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
6207 #ifdef CABAC_ON_STACK
6208 h->cabac.range = cc.range ;
6209 h->cabac.low = cc.low ;
6210 h->cabac.bytestream= cc.bytestream;
6215 static void inline compute_mb_neighbors(H264Context *h)
6217 MpegEncContext * const s = &h->s;
6218 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
6219 h->top_mb_xy = mb_xy - s->mb_stride;
6220 h->left_mb_xy[0] = mb_xy - 1;
6222 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
6223 const int top_pair_xy = pair_xy - s->mb_stride;
6224 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
6225 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
6226 const int curr_mb_frame_flag = !MB_FIELD;
6227 const int bottom = (s->mb_y & 1);
6229 ? !curr_mb_frame_flag // bottom macroblock
6230 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
6232 h->top_mb_xy -= s->mb_stride;
6234 if (left_mb_frame_flag != curr_mb_frame_flag) {
6235 h->left_mb_xy[0] = pair_xy - 1;
6242 * decodes a macroblock
6243 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
6245 static int decode_mb_cabac(H264Context *h) {
6246 MpegEncContext * const s = &h->s;
6247 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
6248 int mb_type, partition_count, cbp = 0;
6249 int dct8x8_allowed= h->pps.transform_8x8_mode;
6251 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
6253 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
6254 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
6256 /* a skipped mb needs the aff flag from the following mb */
6257 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
6258 predict_field_decoding_flag(h);
6259 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
6260 skip = h->next_mb_skipped;
6262 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
6263 /* read skip flags */
6265 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
6266 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
6267 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
6268 if(h->next_mb_skipped)
6269 predict_field_decoding_flag(h);
6271 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6276 h->cbp_table[mb_xy] = 0;
6277 h->chroma_pred_mode_table[mb_xy] = 0;
6278 h->last_qscale_diff = 0;
6285 if( (s->mb_y&1) == 0 )
6287 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6289 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
6291 h->prev_mb_skipped = 0;
6293 compute_mb_neighbors(h);
6294 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
6295 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
6299 if( h->slice_type == B_TYPE ) {
6301 partition_count= b_mb_type_info[mb_type].partition_count;
6302 mb_type= b_mb_type_info[mb_type].type;
6305 goto decode_intra_mb;
6307 } else if( h->slice_type == P_TYPE ) {
6309 partition_count= p_mb_type_info[mb_type].partition_count;
6310 mb_type= p_mb_type_info[mb_type].type;
6313 goto decode_intra_mb;
6316 assert(h->slice_type == I_TYPE);
6318 partition_count = 0;
6319 cbp= i_mb_type_info[mb_type].cbp;
6320 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
6321 mb_type= i_mb_type_info[mb_type].type;
6324 mb_type |= MB_TYPE_INTERLACED;
6326 h->slice_table[ mb_xy ]= h->slice_num;
6328 if(IS_INTRA_PCM(mb_type)) {
6332 // We assume these blocks are very rare so we dont optimize it.
6333 // FIXME The two following lines get the bitstream position in the cabac
6334 // decode, I think it should be done by a function in cabac.h (or cabac.c).
6335 ptr= h->cabac.bytestream;
6336 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
6338 // The pixels are stored in the same order as levels in h->mb array.
6339 for(y=0; y<16; y++){
6340 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
6341 for(x=0; x<16; x++){
6342 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
6343 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
6347 const int index= 256 + 4*(y&3) + 32*(y>>2);
6349 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
6350 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6354 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
6356 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
6357 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6361 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6363 // All blocks are present
6364 h->cbp_table[mb_xy] = 0x1ef;
6365 h->chroma_pred_mode_table[mb_xy] = 0;
6366 // In deblocking, the quantizer is 0
6367 s->current_picture.qscale_table[mb_xy]= 0;
6368 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
6369 // All coeffs are present
6370 memset(h->non_zero_count[mb_xy], 16, 16);
6371 s->current_picture.mb_type[mb_xy]= mb_type;
6376 h->ref_count[0] <<= 1;
6377 h->ref_count[1] <<= 1;
6380 fill_caches(h, mb_type, 0);
6382 if( IS_INTRA( mb_type ) ) {
6384 if( IS_INTRA4x4( mb_type ) ) {
6385 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6386 mb_type |= MB_TYPE_8x8DCT;
6387 for( i = 0; i < 16; i+=4 ) {
6388 int pred = pred_intra_mode( h, i );
6389 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6390 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6393 for( i = 0; i < 16; i++ ) {
6394 int pred = pred_intra_mode( h, i );
6395 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6397 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6400 write_back_intra_pred_mode(h);
6401 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6403 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6404 if( h->intra16x16_pred_mode < 0 ) return -1;
6406 h->chroma_pred_mode_table[mb_xy] =
6407 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6409 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
6410 if( h->chroma_pred_mode < 0 ) return -1;
6411 } else if( partition_count == 4 ) {
6412 int i, j, sub_partition_count[4], list, ref[2][4];
6414 if( h->slice_type == B_TYPE ) {
6415 for( i = 0; i < 4; i++ ) {
6416 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6417 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6418 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6420 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
6421 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
6422 pred_direct_motion(h, &mb_type);
6423 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6424 for( i = 0; i < 4; i++ )
6425 if( IS_DIRECT(h->sub_mb_type[i]) )
6426 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6430 for( i = 0; i < 4; i++ ) {
6431 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6432 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6433 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6437 for( list = 0; list < 2; list++ ) {
6438 if( h->ref_count[list] > 0 ) {
6439 for( i = 0; i < 4; i++ ) {
6440 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6441 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6442 if( h->ref_count[list] > 1 )
6443 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6449 h->ref_cache[list][ scan8[4*i]+1 ]=
6450 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6456 dct8x8_allowed = get_dct8x8_allowed(h);
6458 for(list=0; list<2; list++){
6460 if(IS_DIRECT(h->sub_mb_type[i])){
6461 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6464 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6466 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6467 const int sub_mb_type= h->sub_mb_type[i];
6468 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6469 for(j=0; j<sub_partition_count[i]; j++){
6472 const int index= 4*i + block_width*j;
6473 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6474 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6475 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6477 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6478 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6479 tprintf("final mv:%d %d\n", mx, my);
6481 if(IS_SUB_8X8(sub_mb_type)){
6482 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
6483 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6484 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
6485 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6487 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
6488 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6489 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
6490 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6491 }else if(IS_SUB_8X4(sub_mb_type)){
6492 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
6493 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
6495 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
6496 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
6497 }else if(IS_SUB_4X8(sub_mb_type)){
6498 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
6499 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
6501 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
6502 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
6504 assert(IS_SUB_4X4(sub_mb_type));
6505 mv_cache[ 0 ][0]= mx;
6506 mv_cache[ 0 ][1]= my;
6508 mvd_cache[ 0 ][0]= mx - mpx;
6509 mvd_cache[ 0 ][1]= my - mpy;
6513 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6514 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6515 p[0] = p[1] = p[8] = p[9] = 0;
6516 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6520 } else if( IS_DIRECT(mb_type) ) {
6521 pred_direct_motion(h, &mb_type);
6522 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6523 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6524 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6526 int list, mx, my, i, mpx, mpy;
6527 if(IS_16X16(mb_type)){
6528 for(list=0; list<2; list++){
6529 if(IS_DIR(mb_type, 0, list)){
6530 if(h->ref_count[list] > 0 ){
6531 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6532 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6535 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6537 for(list=0; list<2; list++){
6538 if(IS_DIR(mb_type, 0, list)){
6539 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6541 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6542 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6543 tprintf("final mv:%d %d\n", mx, my);
6545 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6546 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6548 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6551 else if(IS_16X8(mb_type)){
6552 for(list=0; list<2; list++){
6553 if(h->ref_count[list]>0){
6555 if(IS_DIR(mb_type, i, list)){
6556 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6557 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6559 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6563 for(list=0; list<2; list++){
6565 if(IS_DIR(mb_type, i, list)){
6566 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6567 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6568 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6569 tprintf("final mv:%d %d\n", mx, my);
6571 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6572 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6574 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6575 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6580 assert(IS_8X16(mb_type));
6581 for(list=0; list<2; list++){
6582 if(h->ref_count[list]>0){
6584 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6585 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6586 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6588 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6592 for(list=0; list<2; list++){
6594 if(IS_DIR(mb_type, i, list)){
6595 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6596 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6597 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6599 tprintf("final mv:%d %d\n", mx, my);
6600 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6601 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6603 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6604 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6611 if( IS_INTER( mb_type ) ) {
6612 h->chroma_pred_mode_table[mb_xy] = 0;
6613 write_back_motion( h, mb_type );
6616 if( !IS_INTRA16x16( mb_type ) ) {
6617 cbp = decode_cabac_mb_cbp_luma( h );
6618 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6621 h->cbp_table[mb_xy] = h->cbp = cbp;
6623 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6624 if( decode_cabac_mb_transform_size( h ) )
6625 mb_type |= MB_TYPE_8x8DCT;
6627 s->current_picture.mb_type[mb_xy]= mb_type;
6629 if( cbp || IS_INTRA16x16( mb_type ) ) {
6630 const uint8_t *scan, *scan8x8, *dc_scan;
6633 if(IS_INTERLACED(mb_type)){
6634 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6635 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6636 dc_scan= luma_dc_field_scan;
6638 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6639 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6640 dc_scan= luma_dc_zigzag_scan;
6643 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6644 if( dqp == INT_MIN ){
6645 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6649 if(((unsigned)s->qscale) > 51){
6650 if(s->qscale<0) s->qscale+= 52;
6651 else s->qscale-= 52;
6653 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6655 if( IS_INTRA16x16( mb_type ) ) {
6657 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6658 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6661 for( i = 0; i < 16; i++ ) {
6662 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6663 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6667 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6671 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6672 if( cbp & (1<<i8x8) ) {
6673 if( IS_8x8DCT(mb_type) ) {
6674 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6675 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6678 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6679 const int index = 4*i8x8 + i4x4;
6680 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6682 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6684 //STOP_TIMER("decode_residual")
6687 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6688 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6695 for( c = 0; c < 2; c++ ) {
6696 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6697 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6704 for( c = 0; c < 2; c++ ) {
6705 for( i = 0; i < 4; i++ ) {
6706 const int index = 16 + 4 * c + i;
6707 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6708 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6713 uint8_t * const nnz= &h->non_zero_count_cache[0];
6714 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6715 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6718 uint8_t * const nnz= &h->non_zero_count_cache[0];
6719 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6720 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6721 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6722 h->last_qscale_diff = 0;
6725 s->current_picture.qscale_table[mb_xy]= s->qscale;
6726 write_back_non_zero_count(h);
6729 h->ref_count[0] >>= 1;
6730 h->ref_count[1] >>= 1;
6737 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6739 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6740 const int alpha = alpha_table[index_a];
6741 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6746 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6747 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6749 /* 16px edge length, because bS=4 is triggered by being at
6750 * the edge of an intra MB, so all 4 bS are the same */
6751 for( d = 0; d < 16; d++ ) {
6752 const int p0 = pix[-1];
6753 const int p1 = pix[-2];
6754 const int p2 = pix[-3];
6756 const int q0 = pix[0];
6757 const int q1 = pix[1];
6758 const int q2 = pix[2];
6760 if( FFABS( p0 - q0 ) < alpha &&
6761 FFABS( p1 - p0 ) < beta &&
6762 FFABS( q1 - q0 ) < beta ) {
6764 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6765 if( FFABS( p2 - p0 ) < beta)
6767 const int p3 = pix[-4];
6769 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6770 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6771 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6774 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6776 if( FFABS( q2 - q0 ) < beta)
6778 const int q3 = pix[3];
6780 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6781 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6782 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6785 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6789 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6790 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6792 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6798 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6800 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6801 const int alpha = alpha_table[index_a];
6802 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6807 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6808 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6810 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6814 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6816 for( i = 0; i < 16; i++, pix += stride) {
6822 int bS_index = (i >> 1);
6825 bS_index |= (i & 1);
6828 if( bS[bS_index] == 0 ) {
6832 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6833 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6834 alpha = alpha_table[index_a];
6835 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6837 if( bS[bS_index] < 4 ) {
6838 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6839 const int p0 = pix[-1];
6840 const int p1 = pix[-2];
6841 const int p2 = pix[-3];
6842 const int q0 = pix[0];
6843 const int q1 = pix[1];
6844 const int q2 = pix[2];
6846 if( FFABS( p0 - q0 ) < alpha &&
6847 FFABS( p1 - p0 ) < beta &&
6848 FFABS( q1 - q0 ) < beta ) {
6852 if( FFABS( p2 - p0 ) < beta ) {
6853 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6856 if( FFABS( q2 - q0 ) < beta ) {
6857 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6861 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6862 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6863 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6864 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6867 const int p0 = pix[-1];
6868 const int p1 = pix[-2];
6869 const int p2 = pix[-3];
6871 const int q0 = pix[0];
6872 const int q1 = pix[1];
6873 const int q2 = pix[2];
6875 if( FFABS( p0 - q0 ) < alpha &&
6876 FFABS( p1 - p0 ) < beta &&
6877 FFABS( q1 - q0 ) < beta ) {
6879 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6880 if( FFABS( p2 - p0 ) < beta)
6882 const int p3 = pix[-4];
6884 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6885 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6886 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6889 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6891 if( FFABS( q2 - q0 ) < beta)
6893 const int q3 = pix[3];
6895 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6896 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6897 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6900 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6904 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6905 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6907 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6912 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6914 for( i = 0; i < 8; i++, pix += stride) {
6922 if( bS[bS_index] == 0 ) {
6926 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6927 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6928 alpha = alpha_table[index_a];
6929 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6931 if( bS[bS_index] < 4 ) {
6932 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6933 const int p0 = pix[-1];
6934 const int p1 = pix[-2];
6935 const int q0 = pix[0];
6936 const int q1 = pix[1];
6938 if( FFABS( p0 - q0 ) < alpha &&
6939 FFABS( p1 - p0 ) < beta &&
6940 FFABS( q1 - q0 ) < beta ) {
6941 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6943 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6944 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6945 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6948 const int p0 = pix[-1];
6949 const int p1 = pix[-2];
6950 const int q0 = pix[0];
6951 const int q1 = pix[1];
6953 if( FFABS( p0 - q0 ) < alpha &&
6954 FFABS( p1 - p0 ) < beta &&
6955 FFABS( q1 - q0 ) < beta ) {
6957 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6958 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6959 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6965 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6967 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6968 const int alpha = alpha_table[index_a];
6969 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6970 const int pix_next = stride;
6975 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6976 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6978 /* 16px edge length, see filter_mb_edgev */
6979 for( d = 0; d < 16; d++ ) {
6980 const int p0 = pix[-1*pix_next];
6981 const int p1 = pix[-2*pix_next];
6982 const int p2 = pix[-3*pix_next];
6983 const int q0 = pix[0];
6984 const int q1 = pix[1*pix_next];
6985 const int q2 = pix[2*pix_next];
6987 if( FFABS( p0 - q0 ) < alpha &&
6988 FFABS( p1 - p0 ) < beta &&
6989 FFABS( q1 - q0 ) < beta ) {
6991 const int p3 = pix[-4*pix_next];
6992 const int q3 = pix[ 3*pix_next];
6994 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6995 if( FFABS( p2 - p0 ) < beta) {
6997 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6998 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6999 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
7002 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
7004 if( FFABS( q2 - q0 ) < beta) {
7006 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
7007 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
7008 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
7011 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
7015 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
7016 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
7018 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
7025 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
7027 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
7028 const int alpha = alpha_table[index_a];
7029 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
7034 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
7035 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
7037 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
7041 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7042 MpegEncContext * const s = &h->s;
7044 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
7046 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) {
7047 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
7050 assert(!FRAME_MBAFF);
7052 mb_xy = mb_x + mb_y*s->mb_stride;
7053 mb_type = s->current_picture.mb_type[mb_xy];
7054 qp = s->current_picture.qscale_table[mb_xy];
7055 qp0 = s->current_picture.qscale_table[mb_xy-1];
7056 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
7057 qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
7058 qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
7059 qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
7060 qp0 = (qp + qp0 + 1) >> 1;
7061 qp1 = (qp + qp1 + 1) >> 1;
7062 qpc0 = (qpc + qpc0 + 1) >> 1;
7063 qpc1 = (qpc + qpc1 + 1) >> 1;
7064 qp_thresh = 15 - h->slice_alpha_c0_offset;
7065 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
7066 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
7069 if( IS_INTRA(mb_type) ) {
7070 int16_t bS4[4] = {4,4,4,4};
7071 int16_t bS3[4] = {3,3,3,3};
7072 if( IS_8x8DCT(mb_type) ) {
7073 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7074 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7075 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7076 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7078 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7079 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
7080 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7081 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
7082 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7083 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
7084 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7085 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
7087 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
7088 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
7089 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
7090 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
7091 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7092 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
7093 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7094 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
7097 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
7098 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
7100 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
7102 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
7104 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
7105 (mb_type & MB_TYPE_16x8) ? 1 : 0;
7106 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
7107 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
7109 int step = IS_8x8DCT(mb_type) ? 2 : 1;
7110 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
7111 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
7112 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
7114 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
7115 bSv[0][0] = 0x0004000400040004ULL;
7116 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
7117 bSv[1][0] = 0x0004000400040004ULL;
7119 #define FILTER(hv,dir,edge)\
7120 if(bSv[dir][edge]) {\
7121 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
7123 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7124 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7130 } else if( IS_8x8DCT(mb_type) ) {
7149 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7150 MpegEncContext * const s = &h->s;
7151 const int mb_xy= mb_x + mb_y*s->mb_stride;
7152 const int mb_type = s->current_picture.mb_type[mb_xy];
7153 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
7154 int first_vertical_edge_done = 0;
7156 /* FIXME: A given frame may occupy more than one position in
7157 * the reference list. So ref2frm should be populated with
7158 * frame numbers, not indices. */
7159 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
7160 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7162 //for sufficiently low qp, filtering wouldn't do anything
7163 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
7165 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
7166 int qp = s->current_picture.qscale_table[mb_xy];
7168 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
7169 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
7175 // left mb is in picture
7176 && h->slice_table[mb_xy-1] != 255
7177 // and current and left pair do not have the same interlaced type
7178 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
7179 // and left mb is in the same slice if deblocking_filter == 2
7180 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
7181 /* First vertical edge is different in MBAFF frames
7182 * There are 8 different bS to compute and 2 different Qp
7184 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
7185 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
7189 int mb_qp, mbn0_qp, mbn1_qp;
7191 first_vertical_edge_done = 1;
7193 if( IS_INTRA(mb_type) )
7194 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
7196 for( i = 0; i < 8; i++ ) {
7197 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
7199 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
7201 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
7202 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
7203 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
7210 mb_qp = s->current_picture.qscale_table[mb_xy];
7211 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
7212 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
7213 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
7214 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7215 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
7216 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
7217 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7218 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
7221 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
7222 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7223 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
7224 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
7225 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
7227 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
7228 for( dir = 0; dir < 2; dir++ )
7231 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
7232 const int mbm_type = s->current_picture.mb_type[mbm_xy];
7233 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
7235 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
7236 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
7237 // how often to recheck mv-based bS when iterating between edges
7238 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
7239 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
7240 // how often to recheck mv-based bS when iterating along each edge
7241 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
7243 if (first_vertical_edge_done) {
7245 first_vertical_edge_done = 0;
7248 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
7251 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
7252 && !IS_INTERLACED(mb_type)
7253 && IS_INTERLACED(mbm_type)
7255 // This is a special case in the norm where the filtering must
7256 // be done twice (one each of the field) even if we are in a
7257 // frame macroblock.
7259 static const int nnz_idx[4] = {4,5,6,3};
7260 unsigned int tmp_linesize = 2 * linesize;
7261 unsigned int tmp_uvlinesize = 2 * uvlinesize;
7262 int mbn_xy = mb_xy - 2 * s->mb_stride;
7267 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
7268 if( IS_INTRA(mb_type) ||
7269 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
7270 bS[0] = bS[1] = bS[2] = bS[3] = 3;
7272 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
7273 for( i = 0; i < 4; i++ ) {
7274 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
7275 mbn_nnz[nnz_idx[i]] != 0 )
7281 // Do not use s->qscale as luma quantizer because it has not the same
7282 // value in IPCM macroblocks.
7283 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7284 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
7285 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7286 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
7287 chroma_qp = ( h->chroma_qp +
7288 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7289 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7290 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7297 for( edge = start; edge < edges; edge++ ) {
7298 /* mbn_xy: neighbor macroblock */
7299 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
7300 const int mbn_type = s->current_picture.mb_type[mbn_xy];
7304 if( (edge&1) && IS_8x8DCT(mb_type) )
7307 if( IS_INTRA(mb_type) ||
7308 IS_INTRA(mbn_type) ) {
7311 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
7312 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
7321 bS[0] = bS[1] = bS[2] = bS[3] = value;
7326 if( edge & mask_edge ) {
7327 bS[0] = bS[1] = bS[2] = bS[3] = 0;
7330 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
7331 bS[0] = bS[1] = bS[2] = bS[3] = 1;
7334 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
7335 int b_idx= 8 + 4 + edge * (dir ? 8:1);
7336 int bn_idx= b_idx - (dir ? 8:1);
7338 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
7339 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7340 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7341 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
7343 bS[0] = bS[1] = bS[2] = bS[3] = v;
7349 for( i = 0; i < 4; i++ ) {
7350 int x = dir == 0 ? edge : i;
7351 int y = dir == 0 ? i : edge;
7352 int b_idx= 8 + 4 + x + 8*y;
7353 int bn_idx= b_idx - (dir ? 8:1);
7355 if( h->non_zero_count_cache[b_idx] != 0 ||
7356 h->non_zero_count_cache[bn_idx] != 0 ) {
7362 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
7363 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7364 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7365 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7373 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7378 // Do not use s->qscale as luma quantizer because it has not the same
7379 // value in IPCM macroblocks.
7380 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7381 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7382 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7383 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7385 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7386 if( (edge&1) == 0 ) {
7387 int chroma_qp = ( h->chroma_qp +
7388 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7389 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
7390 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
7393 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7394 if( (edge&1) == 0 ) {
7395 int chroma_qp = ( h->chroma_qp +
7396 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7397 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7398 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7405 static int decode_slice(H264Context *h){
7406 MpegEncContext * const s = &h->s;
7407 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7411 if( h->pps.cabac ) {
7415 align_get_bits( &s->gb );
7418 ff_init_cabac_states( &h->cabac);
7419 ff_init_cabac_decoder( &h->cabac,
7420 s->gb.buffer + get_bits_count(&s->gb)/8,
7421 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7422 /* calculate pre-state */
7423 for( i= 0; i < 460; i++ ) {
7425 if( h->slice_type == I_TYPE )
7426 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7428 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7431 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
7433 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7438 int ret = decode_mb_cabac(h);
7440 //STOP_TIMER("decode_mb_cabac")
7442 if(ret>=0) hl_decode_mb(h);
7444 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7447 if(ret>=0) ret = decode_mb_cabac(h);
7449 if(ret>=0) hl_decode_mb(h);
7452 eos = get_cabac_terminate( &h->cabac );
7454 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
7455 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7456 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7460 if( ++s->mb_x >= s->mb_width ) {
7462 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7469 if( eos || s->mb_y >= s->mb_height ) {
7470 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7471 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7478 int ret = decode_mb_cavlc(h);
7480 if(ret>=0) hl_decode_mb(h);
7482 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7484 ret = decode_mb_cavlc(h);
7486 if(ret>=0) hl_decode_mb(h);
7491 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7492 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7497 if(++s->mb_x >= s->mb_width){
7499 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7504 if(s->mb_y >= s->mb_height){
7505 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7507 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7508 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7512 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7519 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7520 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7521 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7522 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7526 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7535 for(;s->mb_y < s->mb_height; s->mb_y++){
7536 for(;s->mb_x < s->mb_width; s->mb_x++){
7537 int ret= decode_mb(h);
7542 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7543 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7548 if(++s->mb_x >= s->mb_width){
7550 if(++s->mb_y >= s->mb_height){
7551 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7552 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7556 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7563 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7564 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7565 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7569 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7576 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7579 return -1; //not reached
7582 static int decode_unregistered_user_data(H264Context *h, int size){
7583 MpegEncContext * const s = &h->s;
7584 uint8_t user_data[16+256];
7590 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7591 user_data[i]= get_bits(&s->gb, 8);
7595 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7596 if(e==1 && build>=0)
7597 h->x264_build= build;
7599 if(s->avctx->debug & FF_DEBUG_BUGS)
7600 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7603 skip_bits(&s->gb, 8);
7608 static int decode_sei(H264Context *h){
7609 MpegEncContext * const s = &h->s;
7611 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7616 type+= show_bits(&s->gb, 8);
7617 }while(get_bits(&s->gb, 8) == 255);
7621 size+= show_bits(&s->gb, 8);
7622 }while(get_bits(&s->gb, 8) == 255);
7626 if(decode_unregistered_user_data(h, size) < 0)
7630 skip_bits(&s->gb, 8*size);
7633 //FIXME check bits here
7634 align_get_bits(&s->gb);
7640 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7641 MpegEncContext * const s = &h->s;
7643 cpb_count = get_ue_golomb(&s->gb) + 1;
7644 get_bits(&s->gb, 4); /* bit_rate_scale */
7645 get_bits(&s->gb, 4); /* cpb_size_scale */
7646 for(i=0; i<cpb_count; i++){
7647 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7648 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7649 get_bits1(&s->gb); /* cbr_flag */
7651 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7652 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7653 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7654 get_bits(&s->gb, 5); /* time_offset_length */
7657 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7658 MpegEncContext * const s = &h->s;
7659 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7660 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7662 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7664 if( aspect_ratio_info_present_flag ) {
7665 aspect_ratio_idc= get_bits(&s->gb, 8);
7666 if( aspect_ratio_idc == EXTENDED_SAR ) {
7667 sps->sar.num= get_bits(&s->gb, 16);
7668 sps->sar.den= get_bits(&s->gb, 16);
7669 }else if(aspect_ratio_idc < 14){
7670 sps->sar= pixel_aspect[aspect_ratio_idc];
7672 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7679 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7681 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7682 get_bits1(&s->gb); /* overscan_appropriate_flag */
7685 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7686 get_bits(&s->gb, 3); /* video_format */
7687 get_bits1(&s->gb); /* video_full_range_flag */
7688 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7689 get_bits(&s->gb, 8); /* colour_primaries */
7690 get_bits(&s->gb, 8); /* transfer_characteristics */
7691 get_bits(&s->gb, 8); /* matrix_coefficients */
7695 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7696 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7697 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7700 sps->timing_info_present_flag = get_bits1(&s->gb);
7701 if(sps->timing_info_present_flag){
7702 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7703 sps->time_scale = get_bits_long(&s->gb, 32);
7704 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7707 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7708 if(nal_hrd_parameters_present_flag)
7709 decode_hrd_parameters(h, sps);
7710 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7711 if(vcl_hrd_parameters_present_flag)
7712 decode_hrd_parameters(h, sps);
7713 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7714 get_bits1(&s->gb); /* low_delay_hrd_flag */
7715 get_bits1(&s->gb); /* pic_struct_present_flag */
7717 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7718 if(sps->bitstream_restriction_flag){
7719 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7720 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7721 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7722 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7723 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7724 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7725 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7731 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7732 const uint8_t *jvt_list, const uint8_t *fallback_list){
7733 MpegEncContext * const s = &h->s;
7734 int i, last = 8, next = 8;
7735 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7736 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7737 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7739 for(i=0;i<size;i++){
7741 next = (last + get_se_golomb(&s->gb)) & 0xff;
7742 if(!i && !next){ /* matrix not written, we use the preset one */
7743 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7746 last = factors[scan[i]] = next ? next : last;
7750 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7751 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7752 MpegEncContext * const s = &h->s;
7753 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7754 const uint8_t *fallback[4] = {
7755 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7756 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7757 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7758 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7760 if(get_bits1(&s->gb)){
7761 sps->scaling_matrix_present |= is_sps;
7762 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7763 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7764 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7765 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7766 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7767 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7768 if(is_sps || pps->transform_8x8_mode){
7769 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7770 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7772 } else if(fallback_sps) {
7773 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7774 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7778 static inline int decode_seq_parameter_set(H264Context *h){
7779 MpegEncContext * const s = &h->s;
7780 int profile_idc, level_idc;
7784 profile_idc= get_bits(&s->gb, 8);
7785 get_bits1(&s->gb); //constraint_set0_flag
7786 get_bits1(&s->gb); //constraint_set1_flag
7787 get_bits1(&s->gb); //constraint_set2_flag
7788 get_bits1(&s->gb); //constraint_set3_flag
7789 get_bits(&s->gb, 4); // reserved
7790 level_idc= get_bits(&s->gb, 8);
7791 sps_id= get_ue_golomb(&s->gb);
7793 sps= &h->sps_buffer[ sps_id ];
7794 sps->profile_idc= profile_idc;
7795 sps->level_idc= level_idc;
7797 if(sps->profile_idc >= 100){ //high profile
7798 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7799 get_bits1(&s->gb); //residual_color_transform_flag
7800 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7801 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7802 sps->transform_bypass = get_bits1(&s->gb);
7803 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7805 sps->scaling_matrix_present = 0;
7807 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7808 sps->poc_type= get_ue_golomb(&s->gb);
7810 if(sps->poc_type == 0){ //FIXME #define
7811 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7812 } else if(sps->poc_type == 1){//FIXME #define
7813 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7814 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7815 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7816 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7818 for(i=0; i<sps->poc_cycle_length; i++)
7819 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7821 if(sps->poc_type > 2){
7822 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7826 sps->ref_frame_count= get_ue_golomb(&s->gb);
7827 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7828 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7830 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7831 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7832 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7833 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7834 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7837 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7838 if(!sps->frame_mbs_only_flag)
7839 sps->mb_aff= get_bits1(&s->gb);
7843 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7845 #ifndef ALLOW_INTERLACE
7847 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7849 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7850 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7852 sps->crop= get_bits1(&s->gb);
7854 sps->crop_left = get_ue_golomb(&s->gb);
7855 sps->crop_right = get_ue_golomb(&s->gb);
7856 sps->crop_top = get_ue_golomb(&s->gb);
7857 sps->crop_bottom= get_ue_golomb(&s->gb);
7858 if(sps->crop_left || sps->crop_top){
7859 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7865 sps->crop_bottom= 0;
7868 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7869 if( sps->vui_parameters_present_flag )
7870 decode_vui_parameters(h, sps);
7872 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7873 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7874 sps_id, sps->profile_idc, sps->level_idc,
7876 sps->ref_frame_count,
7877 sps->mb_width, sps->mb_height,
7878 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7879 sps->direct_8x8_inference_flag ? "8B8" : "",
7880 sps->crop_left, sps->crop_right,
7881 sps->crop_top, sps->crop_bottom,
7882 sps->vui_parameters_present_flag ? "VUI" : ""
7888 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7889 MpegEncContext * const s = &h->s;
7890 int pps_id= get_ue_golomb(&s->gb);
7891 PPS *pps= &h->pps_buffer[pps_id];
7893 pps->sps_id= get_ue_golomb(&s->gb);
7894 pps->cabac= get_bits1(&s->gb);
7895 pps->pic_order_present= get_bits1(&s->gb);
7896 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7897 if(pps->slice_group_count > 1 ){
7898 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7899 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7900 switch(pps->mb_slice_group_map_type){
7903 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7904 | run_length[ i ] |1 |ue(v) |
7909 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7911 | top_left_mb[ i ] |1 |ue(v) |
7912 | bottom_right_mb[ i ] |1 |ue(v) |
7920 | slice_group_change_direction_flag |1 |u(1) |
7921 | slice_group_change_rate_minus1 |1 |ue(v) |
7926 | slice_group_id_cnt_minus1 |1 |ue(v) |
7927 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7929 | slice_group_id[ i ] |1 |u(v) |
7934 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7935 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7936 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7937 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7941 pps->weighted_pred= get_bits1(&s->gb);
7942 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7943 pps->init_qp= get_se_golomb(&s->gb) + 26;
7944 pps->init_qs= get_se_golomb(&s->gb) + 26;
7945 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7946 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7947 pps->constrained_intra_pred= get_bits1(&s->gb);
7948 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7950 pps->transform_8x8_mode= 0;
7951 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7952 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7953 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7955 if(get_bits_count(&s->gb) < bit_length){
7956 pps->transform_8x8_mode= get_bits1(&s->gb);
7957 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7958 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7961 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7962 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7963 pps_id, pps->sps_id,
7964 pps->cabac ? "CABAC" : "CAVLC",
7965 pps->slice_group_count,
7966 pps->ref_count[0], pps->ref_count[1],
7967 pps->weighted_pred ? "weighted" : "",
7968 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7969 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7970 pps->constrained_intra_pred ? "CONSTR" : "",
7971 pps->redundant_pic_cnt_present ? "REDU" : "",
7972 pps->transform_8x8_mode ? "8x8DCT" : ""
7980 * finds the end of the current frame in the bitstream.
7981 * @return the position of the first byte of the next frame, or -1
7983 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7986 ParseContext *pc = &(h->s.parse_context);
7987 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7988 // mb_addr= pc->mb_addr - 1;
7990 for(i=0; i<=buf_size; i++){
7991 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7992 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7993 if(pc->frame_start_found){
7994 // If there isn't one more byte in the buffer
7995 // the test on first_mb_in_slice cannot be done yet
7996 // do it at next call.
7997 if (i >= buf_size) break;
7998 if (buf[i] & 0x80) {
7999 // first_mb_in_slice is 0, probably the first nal of a new
8001 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
8003 pc->frame_start_found= 0;
8007 pc->frame_start_found = 1;
8009 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
8010 if(pc->frame_start_found){
8012 pc->frame_start_found= 0;
8017 state= (state<<8) | buf[i];
8021 return END_NOT_FOUND;
8024 #ifdef CONFIG_H264_PARSER
8025 static int h264_parse(AVCodecParserContext *s,
8026 AVCodecContext *avctx,
8027 uint8_t **poutbuf, int *poutbuf_size,
8028 const uint8_t *buf, int buf_size)
8030 H264Context *h = s->priv_data;
8031 ParseContext *pc = &h->s.parse_context;
8034 next= find_frame_end(h, buf, buf_size);
8036 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
8042 *poutbuf = (uint8_t *)buf;
8043 *poutbuf_size = buf_size;
8047 static int h264_split(AVCodecContext *avctx,
8048 const uint8_t *buf, int buf_size)
8051 uint32_t state = -1;
8054 for(i=0; i<=buf_size; i++){
8055 if((state&0xFFFFFF1F) == 0x107)
8057 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
8059 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
8061 while(i>4 && buf[i-5]==0) i--;
8066 state= (state<<8) | buf[i];
8070 #endif /* CONFIG_H264_PARSER */
8072 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
8073 MpegEncContext * const s = &h->s;
8074 AVCodecContext * const avctx= s->avctx;
8078 for(i=0; i<50; i++){
8079 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
8083 s->current_picture_ptr= NULL;
8092 if(buf_index >= buf_size) break;
8094 for(i = 0; i < h->nal_length_size; i++)
8095 nalsize = (nalsize << 8) | buf[buf_index++];
8101 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
8106 // start code prefix search
8107 for(; buf_index + 3 < buf_size; buf_index++){
8108 // this should allways succeed in the first iteration
8109 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
8113 if(buf_index+3 >= buf_size) break;
8118 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
8119 while(ptr[dst_length - 1] == 0 && dst_length > 1)
8121 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
8123 if(s->avctx->debug&FF_DEBUG_STARTCODE){
8124 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
8127 if (h->is_avc && (nalsize != consumed))
8128 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
8130 buf_index += consumed;
8132 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
8133 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
8136 switch(h->nal_unit_type){
8138 idr(h); //FIXME ensure we don't loose some frames if there is reordering
8140 init_get_bits(&s->gb, ptr, bit_length);
8142 h->inter_gb_ptr= &s->gb;
8143 s->data_partitioning = 0;
8145 if(decode_slice_header(h) < 0){
8146 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8149 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
8150 if(h->redundant_pic_count==0 && s->hurry_up < 5
8151 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8152 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8153 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8154 && avctx->skip_frame < AVDISCARD_ALL)
8158 init_get_bits(&s->gb, ptr, bit_length);
8160 h->inter_gb_ptr= NULL;
8161 s->data_partitioning = 1;
8163 if(decode_slice_header(h) < 0){
8164 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8168 init_get_bits(&h->intra_gb, ptr, bit_length);
8169 h->intra_gb_ptr= &h->intra_gb;
8172 init_get_bits(&h->inter_gb, ptr, bit_length);
8173 h->inter_gb_ptr= &h->inter_gb;
8175 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
8177 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8178 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8179 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8180 && avctx->skip_frame < AVDISCARD_ALL)
8184 init_get_bits(&s->gb, ptr, bit_length);
8188 init_get_bits(&s->gb, ptr, bit_length);
8189 decode_seq_parameter_set(h);
8191 if(s->flags& CODEC_FLAG_LOW_DELAY)
8194 if(avctx->has_b_frames < 2)
8195 avctx->has_b_frames= !s->low_delay;
8198 init_get_bits(&s->gb, ptr, bit_length);
8200 decode_picture_parameter_set(h, bit_length);
8204 case NAL_END_SEQUENCE:
8205 case NAL_END_STREAM:
8206 case NAL_FILLER_DATA:
8208 case NAL_AUXILIARY_SLICE:
8211 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
8215 if(!s->current_picture_ptr) return buf_index; //no frame
8217 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
8218 s->current_picture_ptr->pict_type= s->pict_type;
8220 h->prev_frame_num_offset= h->frame_num_offset;
8221 h->prev_frame_num= h->frame_num;
8222 if(s->current_picture_ptr->reference){
8223 h->prev_poc_msb= h->poc_msb;
8224 h->prev_poc_lsb= h->poc_lsb;
8226 if(s->current_picture_ptr->reference)
8227 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
8237 * returns the number of bytes consumed for building the current frame
8239 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
8240 if(s->flags&CODEC_FLAG_TRUNCATED){
8241 pos -= s->parse_context.last_index;
8242 if(pos<0) pos=0; // FIXME remove (unneeded?)
8246 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
8247 if(pos+10>buf_size) pos=buf_size; // oops ;)
8253 static int decode_frame(AVCodecContext *avctx,
8254 void *data, int *data_size,
8255 uint8_t *buf, int buf_size)
8257 H264Context *h = avctx->priv_data;
8258 MpegEncContext *s = &h->s;
8259 AVFrame *pict = data;
8262 s->flags= avctx->flags;
8263 s->flags2= avctx->flags2;
8265 /* no supplementary picture */
8266 if (buf_size == 0) {
8270 if(s->flags&CODEC_FLAG_TRUNCATED){
8271 int next= find_frame_end(h, buf, buf_size);
8273 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
8275 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
8278 if(h->is_avc && !h->got_avcC) {
8279 int i, cnt, nalsize;
8280 unsigned char *p = avctx->extradata;
8281 if(avctx->extradata_size < 7) {
8282 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
8286 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
8289 /* sps and pps in the avcC always have length coded with 2 bytes,
8290 so put a fake nal_length_size = 2 while parsing them */
8291 h->nal_length_size = 2;
8292 // Decode sps from avcC
8293 cnt = *(p+5) & 0x1f; // Number of sps
8295 for (i = 0; i < cnt; i++) {
8296 nalsize = BE_16(p) + 2;
8297 if(decode_nal_units(h, p, nalsize) < 0) {
8298 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
8303 // Decode pps from avcC
8304 cnt = *(p++); // Number of pps
8305 for (i = 0; i < cnt; i++) {
8306 nalsize = BE_16(p) + 2;
8307 if(decode_nal_units(h, p, nalsize) != nalsize) {
8308 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
8313 // Now store right nal length size, that will be use to parse all other nals
8314 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
8315 // Do not reparse avcC
8319 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
8320 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
8324 buf_index=decode_nal_units(h, buf, buf_size);
8328 //FIXME do something with unavailable reference frames
8330 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
8331 if(!s->current_picture_ptr){
8332 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
8337 Picture *out = s->current_picture_ptr;
8338 #if 0 //decode order
8339 *data_size = sizeof(AVFrame);
8341 /* Sort B-frames into display order */
8342 Picture *cur = s->current_picture_ptr;
8343 Picture *prev = h->delayed_output_pic;
8344 int i, pics, cross_idr, out_of_order, out_idx;
8346 if(h->sps.bitstream_restriction_flag
8347 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
8348 s->avctx->has_b_frames = h->sps.num_reorder_frames;
8353 while(h->delayed_pic[pics]) pics++;
8354 h->delayed_pic[pics++] = cur;
8355 if(cur->reference == 0)
8359 for(i=0; h->delayed_pic[i]; i++)
8360 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
8363 out = h->delayed_pic[0];
8365 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8366 if(h->delayed_pic[i]->poc < out->poc){
8367 out = h->delayed_pic[i];
8371 out_of_order = !cross_idr && prev && out->poc < prev->poc;
8372 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
8374 else if(prev && pics <= s->avctx->has_b_frames)
8376 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8378 ((!cross_idr && prev && out->poc > prev->poc + 2)
8379 || cur->pict_type == B_TYPE)))
8382 s->avctx->has_b_frames++;
8385 else if(out_of_order)
8388 if(out_of_order || pics > s->avctx->has_b_frames){
8389 for(i=out_idx; h->delayed_pic[i]; i++)
8390 h->delayed_pic[i] = h->delayed_pic[i+1];
8396 *data_size = sizeof(AVFrame);
8397 if(prev && prev != out && prev->reference == 1)
8398 prev->reference = 0;
8399 h->delayed_output_pic = out;
8403 *pict= *(AVFrame*)out;
8405 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8408 assert(pict->data[0] || !*data_size);
8409 ff_print_debug_info(s, pict);
8410 //printf("out %d\n", (int)pict->data[0]);
8413 /* Return the Picture timestamp as the frame number */
8414 /* we substract 1 because it is added on utils.c */
8415 avctx->frame_number = s->picture_number - 1;
8417 return get_consumed_bytes(s, buf_index, buf_size);
8420 static inline void fill_mb_avail(H264Context *h){
8421 MpegEncContext * const s = &h->s;
8422 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8425 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8426 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8427 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8433 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8434 h->mb_avail[4]= 1; //FIXME move out
8435 h->mb_avail[5]= 0; //FIXME move out
8441 #define SIZE (COUNT*40)
8447 // int int_temp[10000];
8449 AVCodecContext avctx;
8451 dsputil_init(&dsp, &avctx);
8453 init_put_bits(&pb, temp, SIZE);
8454 printf("testing unsigned exp golomb\n");
8455 for(i=0; i<COUNT; i++){
8457 set_ue_golomb(&pb, i);
8458 STOP_TIMER("set_ue_golomb");
8460 flush_put_bits(&pb);
8462 init_get_bits(&gb, temp, 8*SIZE);
8463 for(i=0; i<COUNT; i++){
8466 s= show_bits(&gb, 24);
8469 j= get_ue_golomb(&gb);
8471 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8474 STOP_TIMER("get_ue_golomb");
8478 init_put_bits(&pb, temp, SIZE);
8479 printf("testing signed exp golomb\n");
8480 for(i=0; i<COUNT; i++){
8482 set_se_golomb(&pb, i - COUNT/2);
8483 STOP_TIMER("set_se_golomb");
8485 flush_put_bits(&pb);
8487 init_get_bits(&gb, temp, 8*SIZE);
8488 for(i=0; i<COUNT; i++){
8491 s= show_bits(&gb, 24);
8494 j= get_se_golomb(&gb);
8495 if(j != i - COUNT/2){
8496 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8499 STOP_TIMER("get_se_golomb");
8502 printf("testing 4x4 (I)DCT\n");
8505 uint8_t src[16], ref[16];
8506 uint64_t error= 0, max_error=0;
8508 for(i=0; i<COUNT; i++){
8510 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8511 for(j=0; j<16; j++){
8512 ref[j]= random()%255;
8513 src[j]= random()%255;
8516 h264_diff_dct_c(block, src, ref, 4);
8519 for(j=0; j<16; j++){
8520 // printf("%d ", block[j]);
8521 block[j]= block[j]*4;
8522 if(j&1) block[j]= (block[j]*4 + 2)/5;
8523 if(j&4) block[j]= (block[j]*4 + 2)/5;
8527 s->dsp.h264_idct_add(ref, block, 4);
8528 /* for(j=0; j<16; j++){
8529 printf("%d ", ref[j]);
8533 for(j=0; j<16; j++){
8534 int diff= FFABS(src[j] - ref[j]);
8537 max_error= FFMAX(max_error, diff);
8540 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8542 printf("testing quantizer\n");
8543 for(qp=0; qp<52; qp++){
8545 src1_block[i]= src2_block[i]= random()%255;
8549 printf("Testing NAL layer\n");
8551 uint8_t bitstream[COUNT];
8552 uint8_t nal[COUNT*2];
8554 memset(&h, 0, sizeof(H264Context));
8556 for(i=0; i<COUNT; i++){
8564 for(j=0; j<COUNT; j++){
8565 bitstream[j]= (random() % 255) + 1;
8568 for(j=0; j<zeros; j++){
8569 int pos= random() % COUNT;
8570 while(bitstream[pos] == 0){
8579 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8581 printf("encoding failed\n");
8585 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8589 if(out_length != COUNT){
8590 printf("incorrect length %d %d\n", out_length, COUNT);
8594 if(consumed != nal_length){
8595 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8599 if(memcmp(bitstream, out, COUNT)){
8600 printf("missmatch\n");
8605 printf("Testing RBSP\n");
8613 static int decode_end(AVCodecContext *avctx)
8615 H264Context *h = avctx->priv_data;
8616 MpegEncContext *s = &h->s;
8618 av_freep(&h->rbsp_buffer);
8619 free_tables(h); //FIXME cleanup init stuff perhaps
8622 // memset(h, 0, sizeof(H264Context));
8628 AVCodec h264_decoder = {
8632 sizeof(H264Context),
8637 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8641 #ifdef CONFIG_H264_PARSER
8642 AVCodecParser h264_parser = {
8644 sizeof(H264Context),