2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * H.264 / AVC / MPEG4 part10 codec.
26 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
41 #define interlaced_dct interlaced_dct_is_a_bad_name
42 #define mb_intra mb_intra_isnt_initalized_see_mb_type
44 #define LUMA_DC_BLOCK_INDEX 25
45 #define CHROMA_DC_BLOCK_INDEX 26
47 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
48 #define COEFF_TOKEN_VLC_BITS 8
49 #define TOTAL_ZEROS_VLC_BITS 9
50 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
51 #define RUN_VLC_BITS 3
52 #define RUN7_VLC_BITS 6
54 #define MAX_SPS_COUNT 32
55 #define MAX_PPS_COUNT 256
57 #define MAX_MMCO_COUNT 66
59 /* Compiling in interlaced support reduces the speed
60 * of progressive decoding by about 2%. */
61 #define ALLOW_INTERLACE
63 #ifdef ALLOW_INTERLACE
64 #define MB_MBAFF h->mb_mbaff
65 #define MB_FIELD h->mb_field_decoding_flag
66 #define FRAME_MBAFF h->mb_aff_frame
72 #define IS_INTERLACED(mb_type) 0
76 * Sequence parameter set
82 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
83 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
84 int poc_type; ///< pic_order_cnt_type
85 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
86 int delta_pic_order_always_zero_flag;
87 int offset_for_non_ref_pic;
88 int offset_for_top_to_bottom_field;
89 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
90 int ref_frame_count; ///< num_ref_frames
91 int gaps_in_frame_num_allowed_flag;
92 int mb_width; ///< frame_width_in_mbs_minus1 + 1
93 int mb_height; ///< frame_height_in_mbs_minus1 + 1
94 int frame_mbs_only_flag;
95 int mb_aff; ///<mb_adaptive_frame_field_flag
96 int direct_8x8_inference_flag;
97 int crop; ///< frame_cropping_flag
98 int crop_left; ///< frame_cropping_rect_left_offset
99 int crop_right; ///< frame_cropping_rect_right_offset
100 int crop_top; ///< frame_cropping_rect_top_offset
101 int crop_bottom; ///< frame_cropping_rect_bottom_offset
102 int vui_parameters_present_flag;
104 int timing_info_present_flag;
105 uint32_t num_units_in_tick;
107 int fixed_frame_rate_flag;
108 short offset_for_ref_frame[256]; //FIXME dyn aloc?
109 int bitstream_restriction_flag;
110 int num_reorder_frames;
111 int scaling_matrix_present;
112 uint8_t scaling_matrix4[6][16];
113 uint8_t scaling_matrix8[2][64];
117 * Picture parameter set
121 int cabac; ///< entropy_coding_mode_flag
122 int pic_order_present; ///< pic_order_present_flag
123 int slice_group_count; ///< num_slice_groups_minus1 + 1
124 int mb_slice_group_map_type;
125 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
126 int weighted_pred; ///< weighted_pred_flag
127 int weighted_bipred_idc;
128 int init_qp; ///< pic_init_qp_minus26 + 26
129 int init_qs; ///< pic_init_qs_minus26 + 26
130 int chroma_qp_index_offset;
131 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
132 int constrained_intra_pred; ///< constrained_intra_pred_flag
133 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
134 int transform_8x8_mode; ///< transform_8x8_mode_flag
135 uint8_t scaling_matrix4[6][16];
136 uint8_t scaling_matrix8[2][64];
140 * Memory management control operation opcode.
142 typedef enum MMCOOpcode{
153 * Memory management control operation.
164 typedef struct H264Context{
172 #define NAL_IDR_SLICE 5
177 #define NAL_END_SEQUENCE 10
178 #define NAL_END_STREAM 11
179 #define NAL_FILLER_DATA 12
180 #define NAL_SPS_EXT 13
181 #define NAL_AUXILIARY_SLICE 19
182 uint8_t *rbsp_buffer;
183 unsigned int rbsp_buffer_size;
186 * Used to parse AVC variant of h264
188 int is_avc; ///< this flag is != 0 if codec is avc1
189 int got_avcC; ///< flag used to parse avcC data only once
190 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
198 int chroma_pred_mode;
199 int intra16x16_pred_mode;
204 int8_t intra4x4_pred_mode_cache[5*8];
205 int8_t (*intra4x4_pred_mode)[8];
206 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
207 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
208 void (*pred8x8 [4+3])(uint8_t *src, int stride);
209 void (*pred16x16[4+3])(uint8_t *src, int stride);
210 unsigned int topleft_samples_available;
211 unsigned int top_samples_available;
212 unsigned int topright_samples_available;
213 unsigned int left_samples_available;
214 uint8_t (*top_borders[2])[16+2*8];
215 uint8_t left_border[2*(17+2*9)];
218 * non zero coeff count cache.
219 * is 64 if not available.
221 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
222 uint8_t (*non_zero_count)[16];
225 * Motion vector cache.
227 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
228 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
229 #define LIST_NOT_USED -1 //FIXME rename?
230 #define PART_NOT_AVAILABLE -2
233 * is 1 if the specific list MV&references are set to 0,0,-2.
235 int mv_cache_clean[2];
238 * number of neighbors (top and/or left) that used 8x8 dct
240 int neighbor_transform_size;
243 * block_offset[ 0..23] for frame macroblocks
244 * block_offset[24..47] for field macroblocks
246 int block_offset[2*(16+8)];
248 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
250 int b_stride; //FIXME use s->b4_stride
253 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff
262 int unknown_svq3_flag;
263 int next_slice_index;
265 SPS sps_buffer[MAX_SPS_COUNT];
266 SPS sps; ///< current sps
268 PPS pps_buffer[MAX_PPS_COUNT];
272 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
274 uint32_t dequant4_buffer[6][52][16];
275 uint32_t dequant8_buffer[2][52][64];
276 uint32_t (*dequant4_coeff[6])[16];
277 uint32_t (*dequant8_coeff[2])[64];
278 int dequant_coeff_pps; ///< reinit tables when pps changes
281 uint8_t *slice_table_base;
282 uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
284 int slice_type_fixed;
286 //interlacing specific flags
288 int mb_field_decoding_flag;
289 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
296 int delta_poc_bottom;
299 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
300 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
301 int frame_num_offset; ///< for POC type 2
302 int prev_frame_num_offset; ///< for POC type 2
303 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
306 * frame_num for frames or 2*frame_num for field pics.
311 * max_frame_num or 2*max_frame_num for field pics.
315 //Weighted pred stuff
317 int use_weight_chroma;
318 int luma_log2_weight_denom;
319 int chroma_log2_weight_denom;
320 int luma_weight[2][48];
321 int luma_offset[2][48];
322 int chroma_weight[2][48][2];
323 int chroma_offset[2][48][2];
324 int implicit_weight[48][48];
327 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
328 int slice_alpha_c0_offset;
329 int slice_beta_offset;
331 int redundant_pic_count;
333 int direct_spatial_mv_pred;
334 int dist_scale_factor[16];
335 int dist_scale_factor_field[32];
336 int map_col_to_list0[2][16];
337 int map_col_to_list0_field[2][32];
340 * num_ref_idx_l0/1_active_minus1 + 1
342 int ref_count[2]; ///< counts frames or fields, depending on current mb mode
343 Picture *short_ref[32];
344 Picture *long_ref[32];
345 Picture default_ref_list[2][32];
346 Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs
347 Picture *delayed_pic[16]; //FIXME size?
348 Picture *delayed_output_pic;
351 * memory management control operations buffer.
353 MMCO mmco[MAX_MMCO_COUNT];
356 int long_ref_count; ///< number of actual long term references
357 int short_ref_count; ///< number of actual short term references
360 GetBitContext intra_gb;
361 GetBitContext inter_gb;
362 GetBitContext *intra_gb_ptr;
363 GetBitContext *inter_gb_ptr;
365 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
371 uint8_t cabac_state[460];
374 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
379 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
380 uint8_t *chroma_pred_mode_table;
381 int last_qscale_diff;
382 int16_t (*mvd_table[2])[2];
383 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
384 uint8_t *direct_table;
385 uint8_t direct_cache[5*8];
387 uint8_t zigzag_scan[16];
388 uint8_t zigzag_scan8x8[64];
389 uint8_t zigzag_scan8x8_cavlc[64];
390 uint8_t field_scan[16];
391 uint8_t field_scan8x8[64];
392 uint8_t field_scan8x8_cavlc[64];
393 const uint8_t *zigzag_scan_q0;
394 const uint8_t *zigzag_scan8x8_q0;
395 const uint8_t *zigzag_scan8x8_cavlc_q0;
396 const uint8_t *field_scan_q0;
397 const uint8_t *field_scan8x8_q0;
398 const uint8_t *field_scan8x8_cavlc_q0;
403 static VLC coeff_token_vlc[4];
404 static VLC chroma_dc_coeff_token_vlc;
406 static VLC total_zeros_vlc[15];
407 static VLC chroma_dc_total_zeros_vlc[3];
409 static VLC run_vlc[6];
412 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
413 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
414 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
415 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
417 static always_inline uint32_t pack16to32(int a, int b){
418 #ifdef WORDS_BIGENDIAN
419 return (b&0xFFFF) + (a<<16);
421 return (a&0xFFFF) + (b<<16);
427 * @param h height of the rectangle, should be a constant
428 * @param w width of the rectangle, should be a constant
429 * @param size the size of val (1 or 4), should be a constant
431 static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
432 uint8_t *p= (uint8_t*)vp;
433 assert(size==1 || size==4);
439 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
440 assert((stride&(w-1))==0);
442 const uint16_t v= size==4 ? val : val*0x0101;
443 *(uint16_t*)(p + 0*stride)= v;
445 *(uint16_t*)(p + 1*stride)= v;
447 *(uint16_t*)(p + 2*stride)=
448 *(uint16_t*)(p + 3*stride)= v;
450 const uint32_t v= size==4 ? val : val*0x01010101;
451 *(uint32_t*)(p + 0*stride)= v;
453 *(uint32_t*)(p + 1*stride)= v;
455 *(uint32_t*)(p + 2*stride)=
456 *(uint32_t*)(p + 3*stride)= v;
458 //gcc can't optimize 64bit math on x86_32
459 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
460 const uint64_t v= val*0x0100000001ULL;
461 *(uint64_t*)(p + 0*stride)= v;
463 *(uint64_t*)(p + 1*stride)= v;
465 *(uint64_t*)(p + 2*stride)=
466 *(uint64_t*)(p + 3*stride)= v;
468 const uint64_t v= val*0x0100000001ULL;
469 *(uint64_t*)(p + 0+0*stride)=
470 *(uint64_t*)(p + 8+0*stride)=
471 *(uint64_t*)(p + 0+1*stride)=
472 *(uint64_t*)(p + 8+1*stride)= v;
474 *(uint64_t*)(p + 0+2*stride)=
475 *(uint64_t*)(p + 8+2*stride)=
476 *(uint64_t*)(p + 0+3*stride)=
477 *(uint64_t*)(p + 8+3*stride)= v;
479 *(uint32_t*)(p + 0+0*stride)=
480 *(uint32_t*)(p + 4+0*stride)= val;
482 *(uint32_t*)(p + 0+1*stride)=
483 *(uint32_t*)(p + 4+1*stride)= val;
485 *(uint32_t*)(p + 0+2*stride)=
486 *(uint32_t*)(p + 4+2*stride)=
487 *(uint32_t*)(p + 0+3*stride)=
488 *(uint32_t*)(p + 4+3*stride)= val;
490 *(uint32_t*)(p + 0+0*stride)=
491 *(uint32_t*)(p + 4+0*stride)=
492 *(uint32_t*)(p + 8+0*stride)=
493 *(uint32_t*)(p +12+0*stride)=
494 *(uint32_t*)(p + 0+1*stride)=
495 *(uint32_t*)(p + 4+1*stride)=
496 *(uint32_t*)(p + 8+1*stride)=
497 *(uint32_t*)(p +12+1*stride)= val;
499 *(uint32_t*)(p + 0+2*stride)=
500 *(uint32_t*)(p + 4+2*stride)=
501 *(uint32_t*)(p + 8+2*stride)=
502 *(uint32_t*)(p +12+2*stride)=
503 *(uint32_t*)(p + 0+3*stride)=
504 *(uint32_t*)(p + 4+3*stride)=
505 *(uint32_t*)(p + 8+3*stride)=
506 *(uint32_t*)(p +12+3*stride)= val;
513 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
514 MpegEncContext * const s = &h->s;
515 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
516 int topleft_xy, top_xy, topright_xy, left_xy[2];
517 int topleft_type, top_type, topright_type, left_type[2];
521 //FIXME deblocking could skip the intra and nnz parts.
522 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
525 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
527 top_xy = mb_xy - s->mb_stride;
528 topleft_xy = top_xy - 1;
529 topright_xy= top_xy + 1;
530 left_xy[1] = left_xy[0] = mb_xy-1;
540 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
541 const int top_pair_xy = pair_xy - s->mb_stride;
542 const int topleft_pair_xy = top_pair_xy - 1;
543 const int topright_pair_xy = top_pair_xy + 1;
544 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
545 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
546 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
547 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
548 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
549 const int bottom = (s->mb_y & 1);
550 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
552 ? !curr_mb_frame_flag // bottom macroblock
553 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
555 top_xy -= s->mb_stride;
558 ? !curr_mb_frame_flag // bottom macroblock
559 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
561 topleft_xy -= s->mb_stride;
564 ? !curr_mb_frame_flag // bottom macroblock
565 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
567 topright_xy -= s->mb_stride;
569 if (left_mb_frame_flag != curr_mb_frame_flag) {
570 left_xy[1] = left_xy[0] = pair_xy - 1;
571 if (curr_mb_frame_flag) {
592 left_xy[1] += s->mb_stride;
605 h->top_mb_xy = top_xy;
606 h->left_mb_xy[0] = left_xy[0];
607 h->left_mb_xy[1] = left_xy[1];
611 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
612 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
613 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
615 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
617 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
619 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
620 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
621 if(USES_LIST(mb_type,list)){
622 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
623 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
624 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
625 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
631 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
632 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
634 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
635 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
637 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
638 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
643 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
644 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
645 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
646 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
647 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
650 if(IS_INTRA(mb_type)){
651 h->topleft_samples_available=
652 h->top_samples_available=
653 h->left_samples_available= 0xFFFF;
654 h->topright_samples_available= 0xEEEA;
656 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
657 h->topleft_samples_available= 0xB3FF;
658 h->top_samples_available= 0x33FF;
659 h->topright_samples_available= 0x26EA;
662 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
663 h->topleft_samples_available&= 0xDF5F;
664 h->left_samples_available&= 0x5F5F;
668 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
669 h->topleft_samples_available&= 0x7FFF;
671 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
672 h->topright_samples_available&= 0xFBFF;
674 if(IS_INTRA4x4(mb_type)){
675 if(IS_INTRA4x4(top_type)){
676 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
677 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
678 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
679 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
682 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
687 h->intra4x4_pred_mode_cache[4+8*0]=
688 h->intra4x4_pred_mode_cache[5+8*0]=
689 h->intra4x4_pred_mode_cache[6+8*0]=
690 h->intra4x4_pred_mode_cache[7+8*0]= pred;
693 if(IS_INTRA4x4(left_type[i])){
694 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
695 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
698 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
703 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
704 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
719 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
721 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
722 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
723 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
724 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
726 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
727 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
729 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
730 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
733 h->non_zero_count_cache[4+8*0]=
734 h->non_zero_count_cache[5+8*0]=
735 h->non_zero_count_cache[6+8*0]=
736 h->non_zero_count_cache[7+8*0]=
738 h->non_zero_count_cache[1+8*0]=
739 h->non_zero_count_cache[2+8*0]=
741 h->non_zero_count_cache[1+8*3]=
742 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
746 for (i=0; i<2; i++) {
748 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
749 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
750 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
751 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
753 h->non_zero_count_cache[3+8*1 + 2*8*i]=
754 h->non_zero_count_cache[3+8*2 + 2*8*i]=
755 h->non_zero_count_cache[0+8*1 + 8*i]=
756 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
763 h->top_cbp = h->cbp_table[top_xy];
764 } else if(IS_INTRA(mb_type)) {
771 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
772 } else if(IS_INTRA(mb_type)) {
778 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
781 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
786 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
788 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
789 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
790 /*if(!h->mv_cache_clean[list]){
791 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
792 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
793 h->mv_cache_clean[list]= 1;
797 h->mv_cache_clean[list]= 0;
799 if(USES_LIST(top_type, list)){
800 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
801 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
802 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
803 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
804 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
805 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
806 h->ref_cache[list][scan8[0] + 0 - 1*8]=
807 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
808 h->ref_cache[list][scan8[0] + 2 - 1*8]=
809 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
811 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
812 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
813 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
814 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
815 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
818 //FIXME unify cleanup or sth
819 if(USES_LIST(left_type[0], list)){
820 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
821 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
822 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
823 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
824 h->ref_cache[list][scan8[0] - 1 + 0*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
825 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1]>>1)];
827 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
828 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
829 h->ref_cache[list][scan8[0] - 1 + 0*8]=
830 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
833 if(USES_LIST(left_type[1], list)){
834 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
835 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
836 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
837 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
838 h->ref_cache[list][scan8[0] - 1 + 2*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
839 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[3]>>1)];
841 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
842 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
843 h->ref_cache[list][scan8[0] - 1 + 2*8]=
844 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
845 assert((!left_type[0]) == (!left_type[1]));
848 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
851 if(USES_LIST(topleft_type, list)){
852 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
853 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
854 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
855 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
857 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
858 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
861 if(USES_LIST(topright_type, list)){
862 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
863 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
864 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
865 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
867 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
868 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
871 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
874 h->ref_cache[list][scan8[5 ]+1] =
875 h->ref_cache[list][scan8[7 ]+1] =
876 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
877 h->ref_cache[list][scan8[4 ]] =
878 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
879 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
880 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
881 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
882 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
883 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
886 /* XXX beurk, Load mvd */
887 if(USES_LIST(top_type, list)){
888 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
889 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
890 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
891 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
892 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
894 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
895 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
896 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
897 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
899 if(USES_LIST(left_type[0], list)){
900 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
901 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
902 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
904 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
905 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
907 if(USES_LIST(left_type[1], list)){
908 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
909 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
910 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
912 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
913 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
915 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
916 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
917 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
918 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
919 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
921 if(h->slice_type == B_TYPE){
922 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
924 if(IS_DIRECT(top_type)){
925 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
926 }else if(IS_8X8(top_type)){
927 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
928 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
929 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
931 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
934 if(IS_DIRECT(left_type[0]))
935 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
936 else if(IS_8X8(left_type[0]))
937 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
939 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
941 if(IS_DIRECT(left_type[1]))
942 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
943 else if(IS_8X8(left_type[1]))
944 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
946 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
952 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
953 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
954 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
955 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
956 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
957 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
958 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
959 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
960 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
961 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
963 #define MAP_F2F(idx, mb_type)\
964 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
965 h->ref_cache[list][idx] <<= 1;\
966 h->mv_cache[list][idx][1] /= 2;\
967 h->mvd_cache[list][idx][1] /= 2;\
972 #define MAP_F2F(idx, mb_type)\
973 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
974 h->ref_cache[list][idx] >>= 1;\
975 h->mv_cache[list][idx][1] <<= 1;\
976 h->mvd_cache[list][idx][1] <<= 1;\
986 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
989 static inline void write_back_intra_pred_mode(H264Context *h){
990 MpegEncContext * const s = &h->s;
991 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
993 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
994 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
995 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
996 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
997 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
998 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
999 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
1003 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1005 static inline int check_intra4x4_pred_mode(H264Context *h){
1006 MpegEncContext * const s = &h->s;
1007 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
1008 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
1011 if(!(h->top_samples_available&0x8000)){
1013 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
1015 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1018 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
1023 if(!(h->left_samples_available&0x8000)){
1025 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
1027 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1030 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
1036 } //FIXME cleanup like next
1039 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1041 static inline int check_intra_pred_mode(H264Context *h, int mode){
1042 MpegEncContext * const s = &h->s;
1043 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
1044 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
1046 if(mode < 0 || mode > 6) {
1047 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
1051 if(!(h->top_samples_available&0x8000)){
1054 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1059 if(!(h->left_samples_available&0x8000)){
1062 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1071 * gets the predicted intra4x4 prediction mode.
1073 static inline int pred_intra_mode(H264Context *h, int n){
1074 const int index8= scan8[n];
1075 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
1076 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
1077 const int min= FFMIN(left, top);
1079 tprintf("mode:%d %d min:%d\n", left ,top, min);
1081 if(min<0) return DC_PRED;
1085 static inline void write_back_non_zero_count(H264Context *h){
1086 MpegEncContext * const s = &h->s;
1087 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1089 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
1090 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
1091 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
1092 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
1093 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
1094 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
1095 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
1097 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
1098 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
1099 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
1101 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1102 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1103 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1106 // store all luma nnzs, for deblocking
1109 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
1110 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
1115 * gets the predicted number of non zero coefficients.
1116 * @param n block index
1118 static inline int pred_non_zero_count(H264Context *h, int n){
1119 const int index8= scan8[n];
1120 const int left= h->non_zero_count_cache[index8 - 1];
1121 const int top = h->non_zero_count_cache[index8 - 8];
1124 if(i<64) i= (i+1)>>1;
1126 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1131 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1132 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1134 /* there is no consistent mapping of mvs to neighboring locations that will
1135 * make mbaff happy, so we can't move all this logic to fill_caches */
1137 MpegEncContext *s = &h->s;
1138 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
1140 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
1141 *C = h->mv_cache[list][scan8[0]-2];
1144 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
1145 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
1146 if(IS_INTERLACED(mb_types[topright_xy])){
1147 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
1148 const int x4 = X4, y4 = Y4;\
1149 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
1150 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
1151 return LIST_NOT_USED;\
1152 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
1153 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
1154 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
1155 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
1157 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
1160 if(topright_ref == PART_NOT_AVAILABLE
1161 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
1162 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
1164 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
1165 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
1168 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
1169 && i >= scan8[0]+8){
1170 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
1171 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
1177 if(topright_ref != PART_NOT_AVAILABLE){
1178 *C= h->mv_cache[list][ i - 8 + part_width ];
1179 return topright_ref;
1181 tprintf("topright MV not available\n");
1183 *C= h->mv_cache[list][ i - 8 - 1 ];
1184 return h->ref_cache[list][ i - 8 - 1 ];
1189 * gets the predicted MV.
1190 * @param n the block index
1191 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1192 * @param mx the x component of the predicted motion vector
1193 * @param my the y component of the predicted motion vector
1195 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1196 const int index8= scan8[n];
1197 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1198 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1199 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1200 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1202 int diagonal_ref, match_count;
1204 assert(part_width==1 || part_width==2 || part_width==4);
1214 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1215 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1216 tprintf("pred_motion match_count=%d\n", match_count);
1217 if(match_count > 1){ //most common
1218 *mx= mid_pred(A[0], B[0], C[0]);
1219 *my= mid_pred(A[1], B[1], C[1]);
1220 }else if(match_count==1){
1224 }else if(top_ref==ref){
1232 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1236 *mx= mid_pred(A[0], B[0], C[0]);
1237 *my= mid_pred(A[1], B[1], C[1]);
1241 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1245 * gets the directionally predicted 16x8 MV.
1246 * @param n the block index
1247 * @param mx the x component of the predicted motion vector
1248 * @param my the y component of the predicted motion vector
1250 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1252 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1253 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1255 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1263 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1264 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1266 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1268 if(left_ref == ref){
1276 pred_motion(h, n, 4, list, ref, mx, my);
1280 * gets the directionally predicted 8x16 MV.
1281 * @param n the block index
1282 * @param mx the x component of the predicted motion vector
1283 * @param my the y component of the predicted motion vector
1285 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1287 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1288 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1290 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1292 if(left_ref == ref){
1301 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1303 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1305 if(diagonal_ref == ref){
1313 pred_motion(h, n, 2, list, ref, mx, my);
1316 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1317 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1318 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1320 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1322 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1323 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1324 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1330 pred_motion(h, 0, 4, 0, 0, mx, my);
1335 static inline void direct_dist_scale_factor(H264Context * const h){
1336 const int poc = h->s.current_picture_ptr->poc;
1337 const int poc1 = h->ref_list[1][0].poc;
1339 for(i=0; i<h->ref_count[0]; i++){
1340 int poc0 = h->ref_list[0][i].poc;
1341 int td = clip(poc1 - poc0, -128, 127);
1342 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1343 h->dist_scale_factor[i] = 256;
1345 int tb = clip(poc - poc0, -128, 127);
1346 int tx = (16384 + (FFABS(td) >> 1)) / td;
1347 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1351 for(i=0; i<h->ref_count[0]; i++){
1352 h->dist_scale_factor_field[2*i] =
1353 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
1357 static inline void direct_ref_list_init(H264Context * const h){
1358 MpegEncContext * const s = &h->s;
1359 Picture * const ref1 = &h->ref_list[1][0];
1360 Picture * const cur = s->current_picture_ptr;
1362 if(cur->pict_type == I_TYPE)
1363 cur->ref_count[0] = 0;
1364 if(cur->pict_type != B_TYPE)
1365 cur->ref_count[1] = 0;
1366 for(list=0; list<2; list++){
1367 cur->ref_count[list] = h->ref_count[list];
1368 for(j=0; j<h->ref_count[list]; j++)
1369 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1371 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1373 for(list=0; list<2; list++){
1374 for(i=0; i<ref1->ref_count[list]; i++){
1375 const int poc = ref1->ref_poc[list][i];
1376 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1377 for(j=0; j<h->ref_count[list]; j++)
1378 if(h->ref_list[list][j].poc == poc){
1379 h->map_col_to_list0[list][i] = j;
1385 for(list=0; list<2; list++){
1386 for(i=0; i<ref1->ref_count[list]; i++){
1387 j = h->map_col_to_list0[list][i];
1388 h->map_col_to_list0_field[list][2*i] = 2*j;
1389 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1395 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1396 MpegEncContext * const s = &h->s;
1397 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1398 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1399 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1400 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1401 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1402 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1403 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1404 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1405 const int is_b8x8 = IS_8X8(*mb_type);
1409 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1410 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1411 /* FIXME save sub mb types from previous frames (or derive from MVs)
1412 * so we know exactly what block size to use */
1413 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1414 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1415 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1416 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1417 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1419 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1420 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1423 *mb_type |= MB_TYPE_DIRECT2;
1425 *mb_type |= MB_TYPE_INTERLACED;
1427 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1429 if(h->direct_spatial_mv_pred){
1434 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1436 /* ref = min(neighbors) */
1437 for(list=0; list<2; list++){
1438 int refa = h->ref_cache[list][scan8[0] - 1];
1439 int refb = h->ref_cache[list][scan8[0] - 8];
1440 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1442 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1444 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1446 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1452 if(ref[0] < 0 && ref[1] < 0){
1453 ref[0] = ref[1] = 0;
1454 mv[0][0] = mv[0][1] =
1455 mv[1][0] = mv[1][1] = 0;
1457 for(list=0; list<2; list++){
1459 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1461 mv[list][0] = mv[list][1] = 0;
1466 *mb_type &= ~MB_TYPE_P0L1;
1467 sub_mb_type &= ~MB_TYPE_P0L1;
1468 }else if(ref[0] < 0){
1469 *mb_type &= ~MB_TYPE_P0L0;
1470 sub_mb_type &= ~MB_TYPE_P0L0;
1473 if(IS_16X16(*mb_type)){
1474 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1475 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1476 if(!IS_INTRA(mb_type_col)
1477 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1478 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1479 && (h->x264_build>33 || !h->x264_build)))){
1481 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1483 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1485 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1487 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1489 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1490 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1493 for(i8=0; i8<4; i8++){
1494 const int x8 = i8&1;
1495 const int y8 = i8>>1;
1497 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1499 h->sub_mb_type[i8] = sub_mb_type;
1501 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1502 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1503 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1504 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1507 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1508 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1509 && (h->x264_build>33 || !h->x264_build)))){
1510 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1511 if(IS_SUB_8X8(sub_mb_type)){
1512 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1513 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1515 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1517 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1520 for(i4=0; i4<4; i4++){
1521 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1522 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1524 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1526 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1532 }else{ /* direct temporal mv pred */
1533 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1534 const int *dist_scale_factor = h->dist_scale_factor;
1537 if(IS_INTERLACED(*mb_type)){
1538 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1539 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1540 dist_scale_factor = h->dist_scale_factor_field;
1542 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1543 /* FIXME assumes direct_8x8_inference == 1 */
1544 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1545 int mb_types_col[2];
1548 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1549 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1550 | (*mb_type & MB_TYPE_INTERLACED);
1551 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1553 if(IS_INTERLACED(*mb_type)){
1554 /* frame to field scaling */
1555 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1556 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1558 l1ref0 -= 2*h->b8_stride;
1559 l1ref1 -= 2*h->b8_stride;
1560 l1mv0 -= 4*h->b_stride;
1561 l1mv1 -= 4*h->b_stride;
1565 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1566 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1568 *mb_type |= MB_TYPE_16x8;
1570 *mb_type |= MB_TYPE_8x8;
1572 /* field to frame scaling */
1573 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1574 * but in MBAFF, top and bottom POC are equal */
1575 int dy = (s->mb_y&1) ? 1 : 2;
1577 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1578 l1ref0 += dy*h->b8_stride;
1579 l1ref1 += dy*h->b8_stride;
1580 l1mv0 += 2*dy*h->b_stride;
1581 l1mv1 += 2*dy*h->b_stride;
1584 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1586 *mb_type |= MB_TYPE_16x16;
1588 *mb_type |= MB_TYPE_8x8;
1591 for(i8=0; i8<4; i8++){
1592 const int x8 = i8&1;
1593 const int y8 = i8>>1;
1595 const int16_t (*l1mv)[2]= l1mv0;
1597 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1599 h->sub_mb_type[i8] = sub_mb_type;
1601 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1602 if(IS_INTRA(mb_types_col[y8])){
1603 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1604 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1605 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1609 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1611 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1613 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1616 scale = dist_scale_factor[ref0];
1617 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1620 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1621 int my_col = (mv_col[1]<<y_shift)/2;
1622 int mx = (scale * mv_col[0] + 128) >> 8;
1623 int my = (scale * my_col + 128) >> 8;
1624 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1625 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1632 /* one-to-one mv scaling */
1634 if(IS_16X16(*mb_type)){
1635 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1636 if(IS_INTRA(mb_type_col)){
1637 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1638 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1639 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1641 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1642 : map_col_to_list0[1][l1ref1[0]];
1643 const int scale = dist_scale_factor[ref0];
1644 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1646 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1647 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1648 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1649 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1650 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1653 for(i8=0; i8<4; i8++){
1654 const int x8 = i8&1;
1655 const int y8 = i8>>1;
1657 const int16_t (*l1mv)[2]= l1mv0;
1659 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1661 h->sub_mb_type[i8] = sub_mb_type;
1662 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1663 if(IS_INTRA(mb_type_col)){
1664 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1665 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1666 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1670 ref0 = l1ref0[x8 + y8*h->b8_stride];
1672 ref0 = map_col_to_list0[0][ref0];
1674 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1677 scale = dist_scale_factor[ref0];
1679 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1680 if(IS_SUB_8X8(sub_mb_type)){
1681 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1682 int mx = (scale * mv_col[0] + 128) >> 8;
1683 int my = (scale * mv_col[1] + 128) >> 8;
1684 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1685 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1687 for(i4=0; i4<4; i4++){
1688 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1689 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1690 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1691 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1692 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1693 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1700 static inline void write_back_motion(H264Context *h, int mb_type){
1701 MpegEncContext * const s = &h->s;
1702 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1703 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1706 if(!USES_LIST(mb_type, 0))
1707 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1709 for(list=0; list<2; list++){
1711 if(!USES_LIST(mb_type, list))
1715 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1716 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1718 if( h->pps.cabac ) {
1719 if(IS_SKIP(mb_type))
1720 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1723 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1724 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1729 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1730 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1731 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1732 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1733 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1737 if(h->slice_type == B_TYPE && h->pps.cabac){
1738 if(IS_8X8(mb_type)){
1739 uint8_t *direct_table = &h->direct_table[b8_xy];
1740 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1741 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1742 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1748 * Decodes a network abstraction layer unit.
1749 * @param consumed is the number of bytes used as input
1750 * @param length is the length of the array
1751 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1752 * @returns decoded bytes, might be src+1 if no escapes
1754 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1758 // src[0]&0x80; //forbidden bit
1759 h->nal_ref_idc= src[0]>>5;
1760 h->nal_unit_type= src[0]&0x1F;
1764 for(i=0; i<length; i++)
1765 printf("%2X ", src[i]);
1767 for(i=0; i+1<length; i+=2){
1768 if(src[i]) continue;
1769 if(i>0 && src[i-1]==0) i--;
1770 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1772 /* startcode, so we must be past the end */
1779 if(i>=length-1){ //no escaped 0
1780 *dst_length= length;
1781 *consumed= length+1; //+1 for the header
1785 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1786 dst= h->rbsp_buffer;
1788 //printf("decoding esc\n");
1791 //remove escapes (very rare 1:2^22)
1792 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1793 if(src[si+2]==3){ //escape
1798 }else //next start code
1802 dst[di++]= src[si++];
1806 *consumed= si + 1;//+1 for the header
1807 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1813 * @param src the data which should be escaped
1814 * @param dst the target buffer, dst+1 == src is allowed as a special case
1815 * @param length the length of the src data
1816 * @param dst_length the length of the dst array
1817 * @returns length of escaped data in bytes or -1 if an error occured
1819 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1820 int i, escape_count, si, di;
1824 assert(dst_length>0);
1826 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1828 if(length==0) return 1;
1831 for(i=0; i<length; i+=2){
1832 if(src[i]) continue;
1833 if(i>0 && src[i-1]==0)
1835 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1841 if(escape_count==0){
1843 memcpy(dst+1, src, length);
1847 if(length + escape_count + 1> dst_length)
1850 //this should be damn rare (hopefully)
1852 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1853 temp= h->rbsp_buffer;
1854 //printf("encoding esc\n");
1859 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1860 temp[di++]= 0; si++;
1861 temp[di++]= 0; si++;
1863 temp[di++]= src[si++];
1866 temp[di++]= src[si++];
1868 memcpy(dst+1, temp, length+escape_count);
1870 assert(di == length+escape_count);
1876 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1878 static void encode_rbsp_trailing(PutBitContext *pb){
1881 length= (-put_bits_count(pb))&7;
1882 if(length) put_bits(pb, length, 0);
1887 * identifies the exact end of the bitstream
1888 * @return the length of the trailing, or 0 if damaged
1890 static int decode_rbsp_trailing(uint8_t *src){
1894 tprintf("rbsp trailing %X\n", v);
1904 * idct tranforms the 16 dc values and dequantize them.
1905 * @param qp quantization parameter
1907 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1910 int temp[16]; //FIXME check if this is a good idea
1911 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1912 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1914 //memset(block, 64, 2*256);
1917 const int offset= y_offset[i];
1918 const int z0= block[offset+stride*0] + block[offset+stride*4];
1919 const int z1= block[offset+stride*0] - block[offset+stride*4];
1920 const int z2= block[offset+stride*1] - block[offset+stride*5];
1921 const int z3= block[offset+stride*1] + block[offset+stride*5];
1930 const int offset= x_offset[i];
1931 const int z0= temp[4*0+i] + temp[4*2+i];
1932 const int z1= temp[4*0+i] - temp[4*2+i];
1933 const int z2= temp[4*1+i] - temp[4*3+i];
1934 const int z3= temp[4*1+i] + temp[4*3+i];
1936 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1937 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1938 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1939 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1945 * dct tranforms the 16 dc values.
1946 * @param qp quantization parameter ??? FIXME
1948 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1949 // const int qmul= dequant_coeff[qp][0];
1951 int temp[16]; //FIXME check if this is a good idea
1952 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1953 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1956 const int offset= y_offset[i];
1957 const int z0= block[offset+stride*0] + block[offset+stride*4];
1958 const int z1= block[offset+stride*0] - block[offset+stride*4];
1959 const int z2= block[offset+stride*1] - block[offset+stride*5];
1960 const int z3= block[offset+stride*1] + block[offset+stride*5];
1969 const int offset= x_offset[i];
1970 const int z0= temp[4*0+i] + temp[4*2+i];
1971 const int z1= temp[4*0+i] - temp[4*2+i];
1972 const int z2= temp[4*1+i] - temp[4*3+i];
1973 const int z3= temp[4*1+i] + temp[4*3+i];
1975 block[stride*0 +offset]= (z0 + z3)>>1;
1976 block[stride*2 +offset]= (z1 + z2)>>1;
1977 block[stride*8 +offset]= (z1 - z2)>>1;
1978 block[stride*10+offset]= (z0 - z3)>>1;
1986 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1987 const int stride= 16*2;
1988 const int xStride= 16;
1991 a= block[stride*0 + xStride*0];
1992 b= block[stride*0 + xStride*1];
1993 c= block[stride*1 + xStride*0];
1994 d= block[stride*1 + xStride*1];
2001 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
2002 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
2003 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
2004 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
2008 static void chroma_dc_dct_c(DCTELEM *block){
2009 const int stride= 16*2;
2010 const int xStride= 16;
2013 a= block[stride*0 + xStride*0];
2014 b= block[stride*0 + xStride*1];
2015 c= block[stride*1 + xStride*0];
2016 d= block[stride*1 + xStride*1];
2023 block[stride*0 + xStride*0]= (a+c);
2024 block[stride*0 + xStride*1]= (e+b);
2025 block[stride*1 + xStride*0]= (a-c);
2026 block[stride*1 + xStride*1]= (e-b);
2031 * gets the chroma qp.
2033 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
2035 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
2040 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
2042 //FIXME try int temp instead of block
2045 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
2046 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
2047 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
2048 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
2049 const int z0= d0 + d3;
2050 const int z3= d0 - d3;
2051 const int z1= d1 + d2;
2052 const int z2= d1 - d2;
2054 block[0 + 4*i]= z0 + z1;
2055 block[1 + 4*i]= 2*z3 + z2;
2056 block[2 + 4*i]= z0 - z1;
2057 block[3 + 4*i]= z3 - 2*z2;
2061 const int z0= block[0*4 + i] + block[3*4 + i];
2062 const int z3= block[0*4 + i] - block[3*4 + i];
2063 const int z1= block[1*4 + i] + block[2*4 + i];
2064 const int z2= block[1*4 + i] - block[2*4 + i];
2066 block[0*4 + i]= z0 + z1;
2067 block[1*4 + i]= 2*z3 + z2;
2068 block[2*4 + i]= z0 - z1;
2069 block[3*4 + i]= z3 - 2*z2;
2074 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
2075 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
2076 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
2078 const int * const quant_table= quant_coeff[qscale];
2079 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
2080 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
2081 const unsigned int threshold2= (threshold1<<1);
2087 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
2088 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
2089 const unsigned int dc_threshold2= (dc_threshold1<<1);
2091 int level= block[0]*quant_coeff[qscale+18][0];
2092 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2094 level= (dc_bias + level)>>(QUANT_SHIFT-2);
2097 level= (dc_bias - level)>>(QUANT_SHIFT-2);
2100 // last_non_zero = i;
2105 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
2106 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
2107 const unsigned int dc_threshold2= (dc_threshold1<<1);
2109 int level= block[0]*quant_table[0];
2110 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2112 level= (dc_bias + level)>>(QUANT_SHIFT+1);
2115 level= (dc_bias - level)>>(QUANT_SHIFT+1);
2118 // last_non_zero = i;
2131 const int j= scantable[i];
2132 int level= block[j]*quant_table[j];
2134 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
2135 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
2136 if(((unsigned)(level+threshold1))>threshold2){
2138 level= (bias + level)>>QUANT_SHIFT;
2141 level= (bias - level)>>QUANT_SHIFT;
2150 return last_non_zero;
2153 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
2154 const uint32_t a= ((uint32_t*)(src-stride))[0];
2155 ((uint32_t*)(src+0*stride))[0]= a;
2156 ((uint32_t*)(src+1*stride))[0]= a;
2157 ((uint32_t*)(src+2*stride))[0]= a;
2158 ((uint32_t*)(src+3*stride))[0]= a;
2161 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
2162 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
2163 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
2164 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
2165 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
2168 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
2169 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
2170 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
2172 ((uint32_t*)(src+0*stride))[0]=
2173 ((uint32_t*)(src+1*stride))[0]=
2174 ((uint32_t*)(src+2*stride))[0]=
2175 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2178 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
2179 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
2181 ((uint32_t*)(src+0*stride))[0]=
2182 ((uint32_t*)(src+1*stride))[0]=
2183 ((uint32_t*)(src+2*stride))[0]=
2184 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2187 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
2188 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
2190 ((uint32_t*)(src+0*stride))[0]=
2191 ((uint32_t*)(src+1*stride))[0]=
2192 ((uint32_t*)(src+2*stride))[0]=
2193 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2196 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
2197 ((uint32_t*)(src+0*stride))[0]=
2198 ((uint32_t*)(src+1*stride))[0]=
2199 ((uint32_t*)(src+2*stride))[0]=
2200 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
2204 #define LOAD_TOP_RIGHT_EDGE\
2205 const int t4= topright[0];\
2206 const int t5= topright[1];\
2207 const int t6= topright[2];\
2208 const int t7= topright[3];\
2210 #define LOAD_LEFT_EDGE\
2211 const int l0= src[-1+0*stride];\
2212 const int l1= src[-1+1*stride];\
2213 const int l2= src[-1+2*stride];\
2214 const int l3= src[-1+3*stride];\
2216 #define LOAD_TOP_EDGE\
2217 const int t0= src[ 0-1*stride];\
2218 const int t1= src[ 1-1*stride];\
2219 const int t2= src[ 2-1*stride];\
2220 const int t3= src[ 3-1*stride];\
2222 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
2223 const int lt= src[-1-1*stride];
2227 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
2229 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
2232 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
2236 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2239 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
2241 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2242 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2245 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
2250 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
2252 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
2255 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
2259 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
2262 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
2264 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
2265 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
2268 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
2269 const int lt= src[-1-1*stride];
2272 const __attribute__((unused)) int unu= l3;
2275 src[1+2*stride]=(lt + t0 + 1)>>1;
2277 src[2+2*stride]=(t0 + t1 + 1)>>1;
2279 src[3+2*stride]=(t1 + t2 + 1)>>1;
2280 src[3+0*stride]=(t2 + t3 + 1)>>1;
2282 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2284 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2286 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2287 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2288 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2289 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2292 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2295 const __attribute__((unused)) int unu= t7;
2297 src[0+0*stride]=(t0 + t1 + 1)>>1;
2299 src[0+2*stride]=(t1 + t2 + 1)>>1;
2301 src[1+2*stride]=(t2 + t3 + 1)>>1;
2303 src[2+2*stride]=(t3 + t4+ 1)>>1;
2304 src[3+2*stride]=(t4 + t5+ 1)>>1;
2305 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2307 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2309 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2311 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2312 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2315 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2318 src[0+0*stride]=(l0 + l1 + 1)>>1;
2319 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2321 src[0+1*stride]=(l1 + l2 + 1)>>1;
2323 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2325 src[0+2*stride]=(l2 + l3 + 1)>>1;
2327 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2336 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2337 const int lt= src[-1-1*stride];
2340 const __attribute__((unused)) int unu= t3;
2343 src[2+1*stride]=(lt + l0 + 1)>>1;
2345 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2346 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2347 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2349 src[2+2*stride]=(l0 + l1 + 1)>>1;
2351 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2353 src[2+3*stride]=(l1 + l2+ 1)>>1;
2355 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2356 src[0+3*stride]=(l2 + l3 + 1)>>1;
2357 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2360 static void pred16x16_vertical_c(uint8_t *src, int stride){
2362 const uint32_t a= ((uint32_t*)(src-stride))[0];
2363 const uint32_t b= ((uint32_t*)(src-stride))[1];
2364 const uint32_t c= ((uint32_t*)(src-stride))[2];
2365 const uint32_t d= ((uint32_t*)(src-stride))[3];
2367 for(i=0; i<16; i++){
2368 ((uint32_t*)(src+i*stride))[0]= a;
2369 ((uint32_t*)(src+i*stride))[1]= b;
2370 ((uint32_t*)(src+i*stride))[2]= c;
2371 ((uint32_t*)(src+i*stride))[3]= d;
2375 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2378 for(i=0; i<16; i++){
2379 ((uint32_t*)(src+i*stride))[0]=
2380 ((uint32_t*)(src+i*stride))[1]=
2381 ((uint32_t*)(src+i*stride))[2]=
2382 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2386 static void pred16x16_dc_c(uint8_t *src, int stride){
2390 dc+= src[-1+i*stride];
2397 dc= 0x01010101*((dc + 16)>>5);
2399 for(i=0; i<16; i++){
2400 ((uint32_t*)(src+i*stride))[0]=
2401 ((uint32_t*)(src+i*stride))[1]=
2402 ((uint32_t*)(src+i*stride))[2]=
2403 ((uint32_t*)(src+i*stride))[3]= dc;
2407 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2411 dc+= src[-1+i*stride];
2414 dc= 0x01010101*((dc + 8)>>4);
2416 for(i=0; i<16; i++){
2417 ((uint32_t*)(src+i*stride))[0]=
2418 ((uint32_t*)(src+i*stride))[1]=
2419 ((uint32_t*)(src+i*stride))[2]=
2420 ((uint32_t*)(src+i*stride))[3]= dc;
2424 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2430 dc= 0x01010101*((dc + 8)>>4);
2432 for(i=0; i<16; i++){
2433 ((uint32_t*)(src+i*stride))[0]=
2434 ((uint32_t*)(src+i*stride))[1]=
2435 ((uint32_t*)(src+i*stride))[2]=
2436 ((uint32_t*)(src+i*stride))[3]= dc;
2440 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2443 for(i=0; i<16; i++){
2444 ((uint32_t*)(src+i*stride))[0]=
2445 ((uint32_t*)(src+i*stride))[1]=
2446 ((uint32_t*)(src+i*stride))[2]=
2447 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2451 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2454 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2455 const uint8_t * const src0 = src+7-stride;
2456 const uint8_t *src1 = src+8*stride-1;
2457 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2458 int H = src0[1] - src0[-1];
2459 int V = src1[0] - src2[ 0];
2460 for(k=2; k<=8; ++k) {
2461 src1 += stride; src2 -= stride;
2462 H += k*(src0[k] - src0[-k]);
2463 V += k*(src1[0] - src2[ 0]);
2466 H = ( 5*(H/4) ) / 16;
2467 V = ( 5*(V/4) ) / 16;
2469 /* required for 100% accuracy */
2470 i = H; H = V; V = i;
2472 H = ( 5*H+32 ) >> 6;
2473 V = ( 5*V+32 ) >> 6;
2476 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2477 for(j=16; j>0; --j) {
2480 for(i=-16; i<0; i+=4) {
2481 src[16+i] = cm[ (b ) >> 5 ];
2482 src[17+i] = cm[ (b+ H) >> 5 ];
2483 src[18+i] = cm[ (b+2*H) >> 5 ];
2484 src[19+i] = cm[ (b+3*H) >> 5 ];
2491 static void pred16x16_plane_c(uint8_t *src, int stride){
2492 pred16x16_plane_compat_c(src, stride, 0);
2495 static void pred8x8_vertical_c(uint8_t *src, int stride){
2497 const uint32_t a= ((uint32_t*)(src-stride))[0];
2498 const uint32_t b= ((uint32_t*)(src-stride))[1];
2501 ((uint32_t*)(src+i*stride))[0]= a;
2502 ((uint32_t*)(src+i*stride))[1]= b;
2506 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2510 ((uint32_t*)(src+i*stride))[0]=
2511 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2515 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2519 ((uint32_t*)(src+i*stride))[0]=
2520 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2524 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2530 dc0+= src[-1+i*stride];
2531 dc2+= src[-1+(i+4)*stride];
2533 dc0= 0x01010101*((dc0 + 2)>>2);
2534 dc2= 0x01010101*((dc2 + 2)>>2);
2537 ((uint32_t*)(src+i*stride))[0]=
2538 ((uint32_t*)(src+i*stride))[1]= dc0;
2541 ((uint32_t*)(src+i*stride))[0]=
2542 ((uint32_t*)(src+i*stride))[1]= dc2;
2546 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2552 dc0+= src[i-stride];
2553 dc1+= src[4+i-stride];
2555 dc0= 0x01010101*((dc0 + 2)>>2);
2556 dc1= 0x01010101*((dc1 + 2)>>2);
2559 ((uint32_t*)(src+i*stride))[0]= dc0;
2560 ((uint32_t*)(src+i*stride))[1]= dc1;
2563 ((uint32_t*)(src+i*stride))[0]= dc0;
2564 ((uint32_t*)(src+i*stride))[1]= dc1;
2569 static void pred8x8_dc_c(uint8_t *src, int stride){
2571 int dc0, dc1, dc2, dc3;
2575 dc0+= src[-1+i*stride] + src[i-stride];
2576 dc1+= src[4+i-stride];
2577 dc2+= src[-1+(i+4)*stride];
2579 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2580 dc0= 0x01010101*((dc0 + 4)>>3);
2581 dc1= 0x01010101*((dc1 + 2)>>2);
2582 dc2= 0x01010101*((dc2 + 2)>>2);
2585 ((uint32_t*)(src+i*stride))[0]= dc0;
2586 ((uint32_t*)(src+i*stride))[1]= dc1;
2589 ((uint32_t*)(src+i*stride))[0]= dc2;
2590 ((uint32_t*)(src+i*stride))[1]= dc3;
2594 static void pred8x8_plane_c(uint8_t *src, int stride){
2597 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2598 const uint8_t * const src0 = src+3-stride;
2599 const uint8_t *src1 = src+4*stride-1;
2600 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2601 int H = src0[1] - src0[-1];
2602 int V = src1[0] - src2[ 0];
2603 for(k=2; k<=4; ++k) {
2604 src1 += stride; src2 -= stride;
2605 H += k*(src0[k] - src0[-k]);
2606 V += k*(src1[0] - src2[ 0]);
2608 H = ( 17*H+16 ) >> 5;
2609 V = ( 17*V+16 ) >> 5;
2611 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2612 for(j=8; j>0; --j) {
2615 src[0] = cm[ (b ) >> 5 ];
2616 src[1] = cm[ (b+ H) >> 5 ];
2617 src[2] = cm[ (b+2*H) >> 5 ];
2618 src[3] = cm[ (b+3*H) >> 5 ];
2619 src[4] = cm[ (b+4*H) >> 5 ];
2620 src[5] = cm[ (b+5*H) >> 5 ];
2621 src[6] = cm[ (b+6*H) >> 5 ];
2622 src[7] = cm[ (b+7*H) >> 5 ];
2627 #define SRC(x,y) src[(x)+(y)*stride]
2629 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2630 #define PREDICT_8x8_LOAD_LEFT \
2631 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2632 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2633 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2634 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2637 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2638 #define PREDICT_8x8_LOAD_TOP \
2639 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2640 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2641 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2642 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2643 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2646 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2647 #define PREDICT_8x8_LOAD_TOPRIGHT \
2648 int t8, t9, t10, t11, t12, t13, t14, t15; \
2649 if(has_topright) { \
2650 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2651 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2652 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2654 #define PREDICT_8x8_LOAD_TOPLEFT \
2655 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2657 #define PREDICT_8x8_DC(v) \
2659 for( y = 0; y < 8; y++ ) { \
2660 ((uint32_t*)src)[0] = \
2661 ((uint32_t*)src)[1] = v; \
2665 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2667 PREDICT_8x8_DC(0x80808080);
2669 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2671 PREDICT_8x8_LOAD_LEFT;
2672 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2675 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2677 PREDICT_8x8_LOAD_TOP;
2678 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2681 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2683 PREDICT_8x8_LOAD_LEFT;
2684 PREDICT_8x8_LOAD_TOP;
2685 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2686 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2689 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2691 PREDICT_8x8_LOAD_LEFT;
2692 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2693 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2694 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2697 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2700 PREDICT_8x8_LOAD_TOP;
2709 for( y = 1; y < 8; y++ )
2710 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2712 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2714 PREDICT_8x8_LOAD_TOP;
2715 PREDICT_8x8_LOAD_TOPRIGHT;
2716 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2717 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2718 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2719 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2720 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2721 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2722 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2723 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2724 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2725 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2726 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2727 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2728 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2729 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2730 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2732 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2734 PREDICT_8x8_LOAD_TOP;
2735 PREDICT_8x8_LOAD_LEFT;
2736 PREDICT_8x8_LOAD_TOPLEFT;
2737 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2738 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2739 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2740 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2741 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2742 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2743 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2744 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2745 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2746 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2747 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2748 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2749 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2750 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2751 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2754 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2756 PREDICT_8x8_LOAD_TOP;
2757 PREDICT_8x8_LOAD_LEFT;
2758 PREDICT_8x8_LOAD_TOPLEFT;
2759 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2760 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2761 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2762 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2763 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2764 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2765 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2766 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2767 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2768 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2769 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2770 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2771 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2772 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2773 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2774 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2775 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2776 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2777 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2778 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2779 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2780 SRC(7,0)= (t6 + t7 + 1) >> 1;
2782 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2784 PREDICT_8x8_LOAD_TOP;
2785 PREDICT_8x8_LOAD_LEFT;
2786 PREDICT_8x8_LOAD_TOPLEFT;
2787 SRC(0,7)= (l6 + l7 + 1) >> 1;
2788 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2789 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2790 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2791 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2792 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2793 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2794 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2795 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2796 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2797 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2798 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2799 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2800 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2801 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2802 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2803 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2804 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2805 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2806 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2807 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2808 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2810 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2812 PREDICT_8x8_LOAD_TOP;
2813 PREDICT_8x8_LOAD_TOPRIGHT;
2814 SRC(0,0)= (t0 + t1 + 1) >> 1;
2815 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2816 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2817 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2818 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2819 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2820 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2821 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2822 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2823 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2824 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2825 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2826 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2827 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2828 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2829 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2830 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2831 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2832 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2833 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2834 SRC(7,6)= (t10 + t11 + 1) >> 1;
2835 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2837 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2839 PREDICT_8x8_LOAD_LEFT;
2840 SRC(0,0)= (l0 + l1 + 1) >> 1;
2841 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2842 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2843 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2844 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2845 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2846 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2847 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2848 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2849 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2850 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2851 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2852 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2853 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2854 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2855 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2856 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2857 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2859 #undef PREDICT_8x8_LOAD_LEFT
2860 #undef PREDICT_8x8_LOAD_TOP
2861 #undef PREDICT_8x8_LOAD_TOPLEFT
2862 #undef PREDICT_8x8_LOAD_TOPRIGHT
2863 #undef PREDICT_8x8_DC
2869 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2870 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2871 int src_x_offset, int src_y_offset,
2872 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2873 MpegEncContext * const s = &h->s;
2874 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2875 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2876 const int luma_xy= (mx&3) + ((my&3)<<2);
2877 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2878 uint8_t * src_cb, * src_cr;
2879 int extra_width= h->emu_edge_width;
2880 int extra_height= h->emu_edge_height;
2882 const int full_mx= mx>>2;
2883 const int full_my= my>>2;
2884 const int pic_width = 16*s->mb_width;
2885 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2890 if(mx&7) extra_width -= 3;
2891 if(my&7) extra_height -= 3;
2893 if( full_mx < 0-extra_width
2894 || full_my < 0-extra_height
2895 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2896 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2897 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2898 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2902 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2904 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2907 if(s->flags&CODEC_FLAG_GRAY) return;
2910 // chroma offset when predicting from a field of opposite parity
2911 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2912 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2914 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2915 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2918 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2919 src_cb= s->edge_emu_buffer;
2921 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2924 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2925 src_cr= s->edge_emu_buffer;
2927 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2930 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2931 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2932 int x_offset, int y_offset,
2933 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2934 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2935 int list0, int list1){
2936 MpegEncContext * const s = &h->s;
2937 qpel_mc_func *qpix_op= qpix_put;
2938 h264_chroma_mc_func chroma_op= chroma_put;
2940 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2941 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2942 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2943 x_offset += 8*s->mb_x;
2944 y_offset += 8*(s->mb_y >> MB_MBAFF);
2947 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2948 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2949 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2950 qpix_op, chroma_op);
2953 chroma_op= chroma_avg;
2957 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2958 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2959 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2960 qpix_op, chroma_op);
2964 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2965 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2966 int x_offset, int y_offset,
2967 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2968 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2969 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2970 int list0, int list1){
2971 MpegEncContext * const s = &h->s;
2973 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2974 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2975 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2976 x_offset += 8*s->mb_x;
2977 y_offset += 8*(s->mb_y >> MB_MBAFF);
2980 /* don't optimize for luma-only case, since B-frames usually
2981 * use implicit weights => chroma too. */
2982 uint8_t *tmp_cb = s->obmc_scratchpad;
2983 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2984 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2985 int refn0 = h->ref_cache[0][ scan8[n] ];
2986 int refn1 = h->ref_cache[1][ scan8[n] ];
2988 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2989 dest_y, dest_cb, dest_cr,
2990 x_offset, y_offset, qpix_put, chroma_put);
2991 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2992 tmp_y, tmp_cb, tmp_cr,
2993 x_offset, y_offset, qpix_put, chroma_put);
2995 if(h->use_weight == 2){
2996 int weight0 = h->implicit_weight[refn0][refn1];
2997 int weight1 = 64 - weight0;
2998 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2999 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
3000 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
3002 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
3003 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
3004 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
3005 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3006 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
3007 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
3008 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3009 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
3010 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
3013 int list = list1 ? 1 : 0;
3014 int refn = h->ref_cache[list][ scan8[n] ];
3015 Picture *ref= &h->ref_list[list][refn];
3016 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
3017 dest_y, dest_cb, dest_cr, x_offset, y_offset,
3018 qpix_put, chroma_put);
3020 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
3021 h->luma_weight[list][refn], h->luma_offset[list][refn]);
3022 if(h->use_weight_chroma){
3023 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3024 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
3025 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3026 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
3031 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
3032 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3033 int x_offset, int y_offset,
3034 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
3035 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
3036 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
3037 int list0, int list1){
3038 if((h->use_weight==2 && list0 && list1
3039 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
3040 || h->use_weight==1)
3041 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3042 x_offset, y_offset, qpix_put, chroma_put,
3043 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
3045 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3046 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
3049 static inline void prefetch_motion(H264Context *h, int list){
3050 /* fetch pixels for estimated mv 4 macroblocks ahead
3051 * optimized for 64byte cache lines */
3052 MpegEncContext * const s = &h->s;
3053 const int refn = h->ref_cache[list][scan8[0]];
3055 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
3056 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
3057 uint8_t **src= h->ref_list[list][refn].data;
3058 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
3059 s->dsp.prefetch(src[0]+off, s->linesize, 4);
3060 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3061 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
3065 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3066 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
3067 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
3068 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
3069 MpegEncContext * const s = &h->s;
3070 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
3071 const int mb_type= s->current_picture.mb_type[mb_xy];
3073 assert(IS_INTER(mb_type));
3075 prefetch_motion(h, 0);
3077 if(IS_16X16(mb_type)){
3078 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
3079 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
3080 &weight_op[0], &weight_avg[0],
3081 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3082 }else if(IS_16X8(mb_type)){
3083 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
3084 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3085 &weight_op[1], &weight_avg[1],
3086 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3087 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
3088 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3089 &weight_op[1], &weight_avg[1],
3090 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3091 }else if(IS_8X16(mb_type)){
3092 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
3093 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3094 &weight_op[2], &weight_avg[2],
3095 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3096 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
3097 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3098 &weight_op[2], &weight_avg[2],
3099 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3103 assert(IS_8X8(mb_type));
3106 const int sub_mb_type= h->sub_mb_type[i];
3108 int x_offset= (i&1)<<2;
3109 int y_offset= (i&2)<<1;
3111 if(IS_SUB_8X8(sub_mb_type)){
3112 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3113 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3114 &weight_op[3], &weight_avg[3],
3115 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3116 }else if(IS_SUB_8X4(sub_mb_type)){
3117 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3118 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3119 &weight_op[4], &weight_avg[4],
3120 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3121 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
3122 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3123 &weight_op[4], &weight_avg[4],
3124 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3125 }else if(IS_SUB_4X8(sub_mb_type)){
3126 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3127 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3128 &weight_op[5], &weight_avg[5],
3129 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3130 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
3131 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3132 &weight_op[5], &weight_avg[5],
3133 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3136 assert(IS_SUB_4X4(sub_mb_type));
3138 int sub_x_offset= x_offset + 2*(j&1);
3139 int sub_y_offset= y_offset + (j&2);
3140 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
3141 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3142 &weight_op[6], &weight_avg[6],
3143 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3149 prefetch_motion(h, 1);
3152 static void decode_init_vlc(){
3153 static int done = 0;
3159 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
3160 &chroma_dc_coeff_token_len [0], 1, 1,
3161 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
3164 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
3165 &coeff_token_len [i][0], 1, 1,
3166 &coeff_token_bits[i][0], 1, 1, 1);
3170 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
3171 &chroma_dc_total_zeros_len [i][0], 1, 1,
3172 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
3174 for(i=0; i<15; i++){
3175 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
3176 &total_zeros_len [i][0], 1, 1,
3177 &total_zeros_bits[i][0], 1, 1, 1);
3181 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
3182 &run_len [i][0], 1, 1,
3183 &run_bits[i][0], 1, 1, 1);
3185 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
3186 &run_len [6][0], 1, 1,
3187 &run_bits[6][0], 1, 1, 1);
3192 * Sets the intra prediction function pointers.
3194 static void init_pred_ptrs(H264Context *h){
3195 // MpegEncContext * const s = &h->s;
3197 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
3198 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
3199 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
3200 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
3201 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
3202 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
3203 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
3204 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
3205 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
3206 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
3207 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
3208 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
3210 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
3211 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
3212 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
3213 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
3214 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
3215 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
3216 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
3217 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
3218 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
3219 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
3220 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
3221 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
3223 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
3224 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
3225 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
3226 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
3227 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
3228 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
3229 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
3231 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
3232 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
3233 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
3234 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
3235 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
3236 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
3237 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
3240 static void free_tables(H264Context *h){
3241 av_freep(&h->intra4x4_pred_mode);
3242 av_freep(&h->chroma_pred_mode_table);
3243 av_freep(&h->cbp_table);
3244 av_freep(&h->mvd_table[0]);
3245 av_freep(&h->mvd_table[1]);
3246 av_freep(&h->direct_table);
3247 av_freep(&h->non_zero_count);
3248 av_freep(&h->slice_table_base);
3249 av_freep(&h->top_borders[1]);
3250 av_freep(&h->top_borders[0]);
3251 h->slice_table= NULL;
3253 av_freep(&h->mb2b_xy);
3254 av_freep(&h->mb2b8_xy);
3256 av_freep(&h->s.obmc_scratchpad);
3259 static void init_dequant8_coeff_table(H264Context *h){
3261 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
3262 h->dequant8_coeff[0] = h->dequant8_buffer[0];
3263 h->dequant8_coeff[1] = h->dequant8_buffer[1];
3265 for(i=0; i<2; i++ ){
3266 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
3267 h->dequant8_coeff[1] = h->dequant8_buffer[0];
3271 for(q=0; q<52; q++){
3272 int shift = div6[q];
3275 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
3276 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
3277 h->pps.scaling_matrix8[i][x]) << shift;
3282 static void init_dequant4_coeff_table(H264Context *h){
3284 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
3285 for(i=0; i<6; i++ ){
3286 h->dequant4_coeff[i] = h->dequant4_buffer[i];
3288 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
3289 h->dequant4_coeff[i] = h->dequant4_buffer[j];
3296 for(q=0; q<52; q++){
3297 int shift = div6[q] + 2;
3300 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
3301 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
3302 h->pps.scaling_matrix4[i][x]) << shift;
3307 static void init_dequant_tables(H264Context *h){
3309 init_dequant4_coeff_table(h);
3310 if(h->pps.transform_8x8_mode)
3311 init_dequant8_coeff_table(h);
3312 if(h->sps.transform_bypass){
3315 h->dequant4_coeff[i][0][x] = 1<<6;
3316 if(h->pps.transform_8x8_mode)
3319 h->dequant8_coeff[i][0][x] = 1<<6;
3326 * needs width/height
3328 static int alloc_tables(H264Context *h){
3329 MpegEncContext * const s = &h->s;
3330 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3333 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3335 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3336 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
3337 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3338 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3339 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3341 if( h->pps.cabac ) {
3342 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3343 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3344 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3345 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3348 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
3349 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
3351 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3352 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3353 for(y=0; y<s->mb_height; y++){
3354 for(x=0; x<s->mb_width; x++){
3355 const int mb_xy= x + y*s->mb_stride;
3356 const int b_xy = 4*x + 4*y*h->b_stride;
3357 const int b8_xy= 2*x + 2*y*h->b8_stride;
3359 h->mb2b_xy [mb_xy]= b_xy;
3360 h->mb2b8_xy[mb_xy]= b8_xy;
3364 s->obmc_scratchpad = NULL;
3366 if(!h->dequant4_coeff[0])
3367 init_dequant_tables(h);
3375 static void common_init(H264Context *h){
3376 MpegEncContext * const s = &h->s;
3378 s->width = s->avctx->width;
3379 s->height = s->avctx->height;
3380 s->codec_id= s->avctx->codec->id;
3384 h->dequant_coeff_pps= -1;
3385 s->unrestricted_mv=1;
3386 s->decode=1; //FIXME
3388 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3389 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3392 static int decode_init(AVCodecContext *avctx){
3393 H264Context *h= avctx->priv_data;
3394 MpegEncContext * const s = &h->s;
3396 MPV_decode_defaults(s);
3401 s->out_format = FMT_H264;
3402 s->workaround_bugs= avctx->workaround_bugs;
3405 // s->decode_mb= ff_h263_decode_mb;
3407 avctx->pix_fmt= PIX_FMT_YUV420P;
3411 if(avctx->extradata_size > 0 && avctx->extradata &&
3412 *(char *)avctx->extradata == 1){
3422 static int frame_start(H264Context *h){
3423 MpegEncContext * const s = &h->s;
3426 if(MPV_frame_start(s, s->avctx) < 0)
3428 ff_er_frame_start(s);
3430 assert(s->linesize && s->uvlinesize);
3432 for(i=0; i<16; i++){
3433 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3434 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3437 h->block_offset[16+i]=
3438 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3439 h->block_offset[24+16+i]=
3440 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3443 /* can't be in alloc_tables because linesize isn't known there.
3444 * FIXME: redo bipred weight to not require extra buffer? */
3445 if(!s->obmc_scratchpad)
3446 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
3448 /* some macroblocks will be accessed before they're available */
3450 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
3452 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3456 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3457 MpegEncContext * const s = &h->s;
3461 src_cb -= uvlinesize;
3462 src_cr -= uvlinesize;
3464 // There are two lines saved, the line above the the top macroblock of a pair,
3465 // and the line above the bottom macroblock
3466 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3467 for(i=1; i<17; i++){
3468 h->left_border[i]= src_y[15+i* linesize];
3471 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3472 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3474 if(!(s->flags&CODEC_FLAG_GRAY)){
3475 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3476 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3478 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3479 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3481 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3482 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3486 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3487 MpegEncContext * const s = &h->s;
3490 int deblock_left = (s->mb_x > 0);
3491 int deblock_top = (s->mb_y > 0);
3493 src_y -= linesize + 1;
3494 src_cb -= uvlinesize + 1;
3495 src_cr -= uvlinesize + 1;
3497 #define XCHG(a,b,t,xchg)\
3504 for(i = !deblock_top; i<17; i++){
3505 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3510 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3511 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3512 if(s->mb_x+1 < s->mb_width){
3513 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3517 if(!(s->flags&CODEC_FLAG_GRAY)){
3519 for(i = !deblock_top; i<9; i++){
3520 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3521 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3525 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3526 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3531 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3532 MpegEncContext * const s = &h->s;
3535 src_y -= 2 * linesize;
3536 src_cb -= 2 * uvlinesize;
3537 src_cr -= 2 * uvlinesize;
3539 // There are two lines saved, the line above the the top macroblock of a pair,
3540 // and the line above the bottom macroblock
3541 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3542 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3543 for(i=2; i<34; i++){
3544 h->left_border[i]= src_y[15+i* linesize];
3547 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3548 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3549 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3550 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3552 if(!(s->flags&CODEC_FLAG_GRAY)){
3553 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3554 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3555 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3556 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3557 for(i=2; i<18; i++){
3558 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3559 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3561 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3562 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3563 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3564 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3568 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3569 MpegEncContext * const s = &h->s;
3572 int deblock_left = (s->mb_x > 0);
3573 int deblock_top = (s->mb_y > 1);
3575 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3577 src_y -= 2 * linesize + 1;
3578 src_cb -= 2 * uvlinesize + 1;
3579 src_cr -= 2 * uvlinesize + 1;
3581 #define XCHG(a,b,t,xchg)\
3588 for(i = (!deblock_top)<<1; i<34; i++){
3589 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3594 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3595 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3596 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3597 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3598 if(s->mb_x+1 < s->mb_width){
3599 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3600 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3604 if(!(s->flags&CODEC_FLAG_GRAY)){
3606 for(i = (!deblock_top) << 1; i<18; i++){
3607 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3608 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3612 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3613 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3614 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3615 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3620 static void hl_decode_mb(H264Context *h){
3621 MpegEncContext * const s = &h->s;
3622 const int mb_x= s->mb_x;
3623 const int mb_y= s->mb_y;
3624 const int mb_xy= mb_x + mb_y*s->mb_stride;
3625 const int mb_type= s->current_picture.mb_type[mb_xy];
3626 uint8_t *dest_y, *dest_cb, *dest_cr;
3627 int linesize, uvlinesize /*dct_offset*/;
3629 int *block_offset = &h->block_offset[0];
3630 const unsigned int bottom = mb_y & 1;
3631 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3632 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3633 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3638 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3639 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3640 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3642 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
3643 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
3646 linesize = h->mb_linesize = s->linesize * 2;
3647 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3648 block_offset = &h->block_offset[24];
3649 if(mb_y&1){ //FIXME move out of this func?
3650 dest_y -= s->linesize*15;
3651 dest_cb-= s->uvlinesize*7;
3652 dest_cr-= s->uvlinesize*7;
3656 for(list=0; list<2; list++){
3657 if(!USES_LIST(mb_type, list))
3659 if(IS_16X16(mb_type)){
3660 int8_t *ref = &h->ref_cache[list][scan8[0]];
3661 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3663 for(i=0; i<16; i+=4){
3664 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3665 int ref = h->ref_cache[list][scan8[i]];
3667 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3673 linesize = h->mb_linesize = s->linesize;
3674 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3675 // dct_offset = s->linesize * 16;
3678 if(transform_bypass){
3680 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3681 }else if(IS_8x8DCT(mb_type)){
3682 idct_dc_add = s->dsp.h264_idct8_dc_add;
3683 idct_add = s->dsp.h264_idct8_add;
3685 idct_dc_add = s->dsp.h264_idct_dc_add;
3686 idct_add = s->dsp.h264_idct_add;
3689 if(FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3690 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3691 int mbt_y = mb_y&~1;
3692 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3693 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3694 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3695 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3698 if (IS_INTRA_PCM(mb_type)) {
3701 // The pixels are stored in h->mb array in the same order as levels,
3702 // copy them in output in the correct order.
3703 for(i=0; i<16; i++) {
3704 for (y=0; y<4; y++) {
3705 for (x=0; x<4; x++) {
3706 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3710 for(i=16; i<16+4; i++) {
3711 for (y=0; y<4; y++) {
3712 for (x=0; x<4; x++) {
3713 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3717 for(i=20; i<20+4; i++) {
3718 for (y=0; y<4; y++) {
3719 for (x=0; x<4; x++) {
3720 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3725 if(IS_INTRA(mb_type)){
3726 if(h->deblocking_filter && !FRAME_MBAFF)
3727 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3729 if(!(s->flags&CODEC_FLAG_GRAY)){
3730 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3731 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3734 if(IS_INTRA4x4(mb_type)){
3736 if(IS_8x8DCT(mb_type)){
3737 for(i=0; i<16; i+=4){
3738 uint8_t * const ptr= dest_y + block_offset[i];
3739 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3740 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3741 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3742 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3744 if(nnz == 1 && h->mb[i*16])
3745 idct_dc_add(ptr, h->mb + i*16, linesize);
3747 idct_add(ptr, h->mb + i*16, linesize);
3751 for(i=0; i<16; i++){
3752 uint8_t * const ptr= dest_y + block_offset[i];
3754 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3757 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3758 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3759 assert(mb_y || linesize <= block_offset[i]);
3760 if(!topright_avail){
3761 tr= ptr[3 - linesize]*0x01010101;
3762 topright= (uint8_t*) &tr;
3764 topright= ptr + 4 - linesize;
3768 h->pred4x4[ dir ](ptr, topright, linesize);
3769 nnz = h->non_zero_count_cache[ scan8[i] ];
3771 if(s->codec_id == CODEC_ID_H264){
3772 if(nnz == 1 && h->mb[i*16])
3773 idct_dc_add(ptr, h->mb + i*16, linesize);
3775 idct_add(ptr, h->mb + i*16, linesize);
3777 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3782 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3783 if(s->codec_id == CODEC_ID_H264){
3784 if(!transform_bypass)
3785 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3787 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3789 if(h->deblocking_filter && !FRAME_MBAFF)
3790 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3791 }else if(s->codec_id == CODEC_ID_H264){
3792 hl_motion(h, dest_y, dest_cb, dest_cr,
3793 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
3794 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
3795 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3799 if(!IS_INTRA4x4(mb_type)){
3800 if(s->codec_id == CODEC_ID_H264){
3801 if(IS_INTRA16x16(mb_type)){
3802 for(i=0; i<16; i++){
3803 if(h->non_zero_count_cache[ scan8[i] ])
3804 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3805 else if(h->mb[i*16])
3806 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3809 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3810 for(i=0; i<16; i+=di){
3811 int nnz = h->non_zero_count_cache[ scan8[i] ];
3813 if(nnz==1 && h->mb[i*16])
3814 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3816 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3821 for(i=0; i<16; i++){
3822 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3823 uint8_t * const ptr= dest_y + block_offset[i];
3824 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3830 if(!(s->flags&CODEC_FLAG_GRAY)){
3831 uint8_t *dest[2] = {dest_cb, dest_cr};
3832 if(transform_bypass){
3833 idct_add = idct_dc_add = s->dsp.add_pixels4;
3835 idct_add = s->dsp.h264_idct_add;
3836 idct_dc_add = s->dsp.h264_idct_dc_add;
3837 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3838 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3840 if(s->codec_id == CODEC_ID_H264){
3841 for(i=16; i<16+8; i++){
3842 if(h->non_zero_count_cache[ scan8[i] ])
3843 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3844 else if(h->mb[i*16])
3845 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3848 for(i=16; i<16+8; i++){
3849 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3850 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3851 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3857 if(h->deblocking_filter) {
3859 //FIXME try deblocking one mb at a time?
3860 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3861 const int mb_y = s->mb_y - 1;
3862 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3863 const int mb_xy= mb_x + mb_y*s->mb_stride;
3864 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3865 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3866 if (!bottom) return;
3867 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3868 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3869 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3871 if(IS_INTRA(mb_type_top | mb_type_bottom))
3872 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3874 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3878 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3879 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3880 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
3881 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3884 tprintf("call mbaff filter_mb\n");
3885 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3886 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3887 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3889 tprintf("call filter_mb\n");
3890 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3891 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3892 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3898 * fills the default_ref_list.
3900 static int fill_default_ref_list(H264Context *h){
3901 MpegEncContext * const s = &h->s;
3903 int smallest_poc_greater_than_current = -1;
3904 Picture sorted_short_ref[32];
3906 if(h->slice_type==B_TYPE){
3910 /* sort frame according to poc in B slice */
3911 for(out_i=0; out_i<h->short_ref_count; out_i++){
3913 int best_poc=INT_MAX;
3915 for(i=0; i<h->short_ref_count; i++){
3916 const int poc= h->short_ref[i]->poc;
3917 if(poc > limit && poc < best_poc){
3923 assert(best_i != INT_MIN);
3926 sorted_short_ref[out_i]= *h->short_ref[best_i];
3927 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3928 if (-1 == smallest_poc_greater_than_current) {
3929 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3930 smallest_poc_greater_than_current = out_i;
3936 if(s->picture_structure == PICT_FRAME){
3937 if(h->slice_type==B_TYPE){
3939 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3941 // find the largest poc
3942 for(list=0; list<2; list++){
3945 int step= list ? -1 : 1;
3947 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3948 while(j<0 || j>= h->short_ref_count){
3949 if(j != -99 && step == (list ? -1 : 1))
3952 j= smallest_poc_greater_than_current + (step>>1);
3954 if(sorted_short_ref[j].reference != 3) continue;
3955 h->default_ref_list[list][index ]= sorted_short_ref[j];
3956 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3959 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3960 if(h->long_ref[i] == NULL) continue;
3961 if(h->long_ref[i]->reference != 3) continue;
3963 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3964 h->default_ref_list[ list ][index++].pic_id= i;;
3967 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3968 // swap the two first elements of L1 when
3969 // L0 and L1 are identical
3970 Picture temp= h->default_ref_list[1][0];
3971 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3972 h->default_ref_list[1][1] = temp;
3975 if(index < h->ref_count[ list ])
3976 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3980 for(i=0; i<h->short_ref_count; i++){
3981 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3982 h->default_ref_list[0][index ]= *h->short_ref[i];
3983 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3985 for(i = 0; i < 16; i++){
3986 if(h->long_ref[i] == NULL) continue;
3987 if(h->long_ref[i]->reference != 3) continue;
3988 h->default_ref_list[0][index ]= *h->long_ref[i];
3989 h->default_ref_list[0][index++].pic_id= i;;
3991 if(index < h->ref_count[0])
3992 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3995 if(h->slice_type==B_TYPE){
3997 //FIXME second field balh
4001 for (i=0; i<h->ref_count[0]; i++) {
4002 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
4004 if(h->slice_type==B_TYPE){
4005 for (i=0; i<h->ref_count[1]; i++) {
4006 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
4013 static void print_short_term(H264Context *h);
4014 static void print_long_term(H264Context *h);
4016 static int decode_ref_pic_list_reordering(H264Context *h){
4017 MpegEncContext * const s = &h->s;
4020 print_short_term(h);
4022 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
4024 for(list=0; list<2; list++){
4025 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
4027 if(get_bits1(&s->gb)){
4028 int pred= h->curr_pic_num;
4030 for(index=0; ; index++){
4031 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
4034 Picture *ref = NULL;
4036 if(reordering_of_pic_nums_idc==3)
4039 if(index >= h->ref_count[list]){
4040 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
4044 if(reordering_of_pic_nums_idc<3){
4045 if(reordering_of_pic_nums_idc<2){
4046 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
4048 if(abs_diff_pic_num >= h->max_pic_num){
4049 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
4053 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
4054 else pred+= abs_diff_pic_num;
4055 pred &= h->max_pic_num - 1;
4057 for(i= h->short_ref_count-1; i>=0; i--){
4058 ref = h->short_ref[i];
4059 assert(ref->reference == 3);
4060 assert(!ref->long_ref);
4061 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
4065 ref->pic_id= ref->frame_num;
4067 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
4068 ref = h->long_ref[pic_id];
4069 ref->pic_id= pic_id;
4070 assert(ref->reference == 3);
4071 assert(ref->long_ref);
4076 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
4077 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
4079 for(i=index; i+1<h->ref_count[list]; i++){
4080 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
4083 for(; i > index; i--){
4084 h->ref_list[list][i]= h->ref_list[list][i-1];
4086 h->ref_list[list][index]= *ref;
4089 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
4095 if(h->slice_type!=B_TYPE) break;
4097 for(list=0; list<2; list++){
4098 for(index= 0; index < h->ref_count[list]; index++){
4099 if(!h->ref_list[list][index].data[0])
4100 h->ref_list[list][index]= s->current_picture;
4102 if(h->slice_type!=B_TYPE) break;
4105 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
4106 direct_dist_scale_factor(h);
4107 direct_ref_list_init(h);
4111 static void fill_mbaff_ref_list(H264Context *h){
4113 for(list=0; list<2; list++){
4114 for(i=0; i<h->ref_count[list]; i++){
4115 Picture *frame = &h->ref_list[list][i];
4116 Picture *field = &h->ref_list[list][16+2*i];
4119 field[0].linesize[j] <<= 1;
4120 field[1] = field[0];
4122 field[1].data[j] += frame->linesize[j];
4124 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
4125 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
4127 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
4128 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
4132 for(j=0; j<h->ref_count[1]; j++){
4133 for(i=0; i<h->ref_count[0]; i++)
4134 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
4135 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
4136 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
4140 static int pred_weight_table(H264Context *h){
4141 MpegEncContext * const s = &h->s;
4143 int luma_def, chroma_def;
4146 h->use_weight_chroma= 0;
4147 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
4148 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
4149 luma_def = 1<<h->luma_log2_weight_denom;
4150 chroma_def = 1<<h->chroma_log2_weight_denom;
4152 for(list=0; list<2; list++){
4153 for(i=0; i<h->ref_count[list]; i++){
4154 int luma_weight_flag, chroma_weight_flag;
4156 luma_weight_flag= get_bits1(&s->gb);
4157 if(luma_weight_flag){
4158 h->luma_weight[list][i]= get_se_golomb(&s->gb);
4159 h->luma_offset[list][i]= get_se_golomb(&s->gb);
4160 if( h->luma_weight[list][i] != luma_def
4161 || h->luma_offset[list][i] != 0)
4164 h->luma_weight[list][i]= luma_def;
4165 h->luma_offset[list][i]= 0;
4168 chroma_weight_flag= get_bits1(&s->gb);
4169 if(chroma_weight_flag){
4172 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
4173 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
4174 if( h->chroma_weight[list][i][j] != chroma_def
4175 || h->chroma_offset[list][i][j] != 0)
4176 h->use_weight_chroma= 1;
4181 h->chroma_weight[list][i][j]= chroma_def;
4182 h->chroma_offset[list][i][j]= 0;
4186 if(h->slice_type != B_TYPE) break;
4188 h->use_weight= h->use_weight || h->use_weight_chroma;
4192 static void implicit_weight_table(H264Context *h){
4193 MpegEncContext * const s = &h->s;
4195 int cur_poc = s->current_picture_ptr->poc;
4197 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
4198 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
4200 h->use_weight_chroma= 0;
4205 h->use_weight_chroma= 2;
4206 h->luma_log2_weight_denom= 5;
4207 h->chroma_log2_weight_denom= 5;
4209 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
4210 int poc0 = h->ref_list[0][ref0].poc;
4211 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
4212 int poc1 = h->ref_list[1][ref1].poc;
4213 int td = clip(poc1 - poc0, -128, 127);
4215 int tb = clip(cur_poc - poc0, -128, 127);
4216 int tx = (16384 + (FFABS(td) >> 1)) / td;
4217 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
4218 if(dist_scale_factor < -64 || dist_scale_factor > 128)
4219 h->implicit_weight[ref0][ref1] = 32;
4221 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
4223 h->implicit_weight[ref0][ref1] = 32;
4228 static inline void unreference_pic(H264Context *h, Picture *pic){
4231 if(pic == h->delayed_output_pic)
4234 for(i = 0; h->delayed_pic[i]; i++)
4235 if(pic == h->delayed_pic[i]){
4243 * instantaneous decoder refresh.
4245 static void idr(H264Context *h){
4248 for(i=0; i<16; i++){
4249 if (h->long_ref[i] != NULL) {
4250 unreference_pic(h, h->long_ref[i]);
4251 h->long_ref[i]= NULL;
4254 h->long_ref_count=0;
4256 for(i=0; i<h->short_ref_count; i++){
4257 unreference_pic(h, h->short_ref[i]);
4258 h->short_ref[i]= NULL;
4260 h->short_ref_count=0;
4263 /* forget old pics after a seek */
4264 static void flush_dpb(AVCodecContext *avctx){
4265 H264Context *h= avctx->priv_data;
4267 for(i=0; i<16; i++) {
4268 if(h->delayed_pic[i])
4269 h->delayed_pic[i]->reference= 0;
4270 h->delayed_pic[i]= NULL;
4272 if(h->delayed_output_pic)
4273 h->delayed_output_pic->reference= 0;
4274 h->delayed_output_pic= NULL;
4276 if(h->s.current_picture_ptr)
4277 h->s.current_picture_ptr->reference= 0;
4282 * @return the removed picture or NULL if an error occurs
4284 static Picture * remove_short(H264Context *h, int frame_num){
4285 MpegEncContext * const s = &h->s;
4288 if(s->avctx->debug&FF_DEBUG_MMCO)
4289 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
4291 for(i=0; i<h->short_ref_count; i++){
4292 Picture *pic= h->short_ref[i];
4293 if(s->avctx->debug&FF_DEBUG_MMCO)
4294 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
4295 if(pic->frame_num == frame_num){
4296 h->short_ref[i]= NULL;
4297 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
4298 h->short_ref_count--;
4307 * @return the removed picture or NULL if an error occurs
4309 static Picture * remove_long(H264Context *h, int i){
4312 pic= h->long_ref[i];
4313 h->long_ref[i]= NULL;
4314 if(pic) h->long_ref_count--;
4320 * print short term list
4322 static void print_short_term(H264Context *h) {
4324 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4325 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
4326 for(i=0; i<h->short_ref_count; i++){
4327 Picture *pic= h->short_ref[i];
4328 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4334 * print long term list
4336 static void print_long_term(H264Context *h) {
4338 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4339 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
4340 for(i = 0; i < 16; i++){
4341 Picture *pic= h->long_ref[i];
4343 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4350 * Executes the reference picture marking (memory management control operations).
4352 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
4353 MpegEncContext * const s = &h->s;
4355 int current_is_long=0;
4358 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
4359 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4361 for(i=0; i<mmco_count; i++){
4362 if(s->avctx->debug&FF_DEBUG_MMCO)
4363 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4365 switch(mmco[i].opcode){
4366 case MMCO_SHORT2UNUSED:
4367 pic= remove_short(h, mmco[i].short_frame_num);
4369 unreference_pic(h, pic);
4370 else if(s->avctx->debug&FF_DEBUG_MMCO)
4371 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4373 case MMCO_SHORT2LONG:
4374 pic= remove_long(h, mmco[i].long_index);
4375 if(pic) unreference_pic(h, pic);
4377 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4378 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4379 h->long_ref_count++;
4381 case MMCO_LONG2UNUSED:
4382 pic= remove_long(h, mmco[i].long_index);
4384 unreference_pic(h, pic);
4385 else if(s->avctx->debug&FF_DEBUG_MMCO)
4386 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4389 pic= remove_long(h, mmco[i].long_index);
4390 if(pic) unreference_pic(h, pic);
4392 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4393 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4394 h->long_ref_count++;
4398 case MMCO_SET_MAX_LONG:
4399 assert(mmco[i].long_index <= 16);
4400 // just remove the long term which index is greater than new max
4401 for(j = mmco[i].long_index; j<16; j++){
4402 pic = remove_long(h, j);
4403 if (pic) unreference_pic(h, pic);
4407 while(h->short_ref_count){
4408 pic= remove_short(h, h->short_ref[0]->frame_num);
4409 unreference_pic(h, pic);
4411 for(j = 0; j < 16; j++) {
4412 pic= remove_long(h, j);
4413 if(pic) unreference_pic(h, pic);
4420 if(!current_is_long){
4421 pic= remove_short(h, s->current_picture_ptr->frame_num);
4423 unreference_pic(h, pic);
4424 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4427 if(h->short_ref_count)
4428 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4430 h->short_ref[0]= s->current_picture_ptr;
4431 h->short_ref[0]->long_ref=0;
4432 h->short_ref_count++;
4435 print_short_term(h);
4440 static int decode_ref_pic_marking(H264Context *h){
4441 MpegEncContext * const s = &h->s;
4444 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4445 s->broken_link= get_bits1(&s->gb) -1;
4446 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4447 if(h->mmco[0].long_index == -1)
4450 h->mmco[0].opcode= MMCO_LONG;
4454 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4455 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4456 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4458 h->mmco[i].opcode= opcode;
4459 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4460 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4461 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4462 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4466 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4467 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4468 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4469 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4474 if(opcode > MMCO_LONG){
4475 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4478 if(opcode == MMCO_END)
4483 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4485 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4486 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4487 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4497 static int init_poc(H264Context *h){
4498 MpegEncContext * const s = &h->s;
4499 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4502 if(h->nal_unit_type == NAL_IDR_SLICE){
4503 h->frame_num_offset= 0;
4505 if(h->frame_num < h->prev_frame_num)
4506 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4508 h->frame_num_offset= h->prev_frame_num_offset;
4511 if(h->sps.poc_type==0){
4512 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4514 if(h->nal_unit_type == NAL_IDR_SLICE){
4519 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4520 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4521 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4522 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4524 h->poc_msb = h->prev_poc_msb;
4525 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4527 field_poc[1] = h->poc_msb + h->poc_lsb;
4528 if(s->picture_structure == PICT_FRAME)
4529 field_poc[1] += h->delta_poc_bottom;
4530 }else if(h->sps.poc_type==1){
4531 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4534 if(h->sps.poc_cycle_length != 0)
4535 abs_frame_num = h->frame_num_offset + h->frame_num;
4539 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4542 expected_delta_per_poc_cycle = 0;
4543 for(i=0; i < h->sps.poc_cycle_length; i++)
4544 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4546 if(abs_frame_num > 0){
4547 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4548 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4550 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4551 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4552 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4556 if(h->nal_ref_idc == 0)
4557 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4559 field_poc[0] = expectedpoc + h->delta_poc[0];
4560 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4562 if(s->picture_structure == PICT_FRAME)
4563 field_poc[1] += h->delta_poc[1];
4566 if(h->nal_unit_type == NAL_IDR_SLICE){
4569 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4570 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4576 if(s->picture_structure != PICT_BOTTOM_FIELD)
4577 s->current_picture_ptr->field_poc[0]= field_poc[0];
4578 if(s->picture_structure != PICT_TOP_FIELD)
4579 s->current_picture_ptr->field_poc[1]= field_poc[1];
4580 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4581 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4587 * decodes a slice header.
4588 * this will allso call MPV_common_init() and frame_start() as needed
4590 static int decode_slice_header(H264Context *h){
4591 MpegEncContext * const s = &h->s;
4592 int first_mb_in_slice, pps_id;
4593 int num_ref_idx_active_override_flag;
4594 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4596 int default_ref_list_done = 0;
4598 s->current_picture.reference= h->nal_ref_idc != 0;
4599 s->dropable= h->nal_ref_idc == 0;
4601 first_mb_in_slice= get_ue_golomb(&s->gb);
4603 slice_type= get_ue_golomb(&s->gb);
4605 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4610 h->slice_type_fixed=1;
4612 h->slice_type_fixed=0;
4614 slice_type= slice_type_map[ slice_type ];
4615 if (slice_type == I_TYPE
4616 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4617 default_ref_list_done = 1;
4619 h->slice_type= slice_type;
4621 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4623 pps_id= get_ue_golomb(&s->gb);
4625 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4628 h->pps= h->pps_buffer[pps_id];
4629 if(h->pps.slice_group_count == 0){
4630 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4634 h->sps= h->sps_buffer[ h->pps.sps_id ];
4635 if(h->sps.log2_max_frame_num == 0){
4636 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4640 if(h->dequant_coeff_pps != pps_id){
4641 h->dequant_coeff_pps = pps_id;
4642 init_dequant_tables(h);
4645 s->mb_width= h->sps.mb_width;
4646 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4648 h->b_stride= s->mb_width*4;
4649 h->b8_stride= s->mb_width*2;
4651 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4652 if(h->sps.frame_mbs_only_flag)
4653 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4655 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4657 if (s->context_initialized
4658 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4662 if (!s->context_initialized) {
4663 if (MPV_common_init(s) < 0)
4666 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4667 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4668 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4671 for(i=0; i<16; i++){
4672 #define T(x) (x>>2) | ((x<<2) & 0xF)
4673 h->zigzag_scan[i] = T(zigzag_scan[i]);
4674 h-> field_scan[i] = T( field_scan[i]);
4678 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4679 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4680 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4681 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4682 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4685 for(i=0; i<64; i++){
4686 #define T(x) (x>>3) | ((x&7)<<3)
4687 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4688 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4689 h->field_scan8x8[i] = T(field_scan8x8[i]);
4690 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4694 if(h->sps.transform_bypass){ //FIXME same ugly
4695 h->zigzag_scan_q0 = zigzag_scan;
4696 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4697 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4698 h->field_scan_q0 = field_scan;
4699 h->field_scan8x8_q0 = field_scan8x8;
4700 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4702 h->zigzag_scan_q0 = h->zigzag_scan;
4703 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4704 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4705 h->field_scan_q0 = h->field_scan;
4706 h->field_scan8x8_q0 = h->field_scan8x8;
4707 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4712 s->avctx->width = s->width;
4713 s->avctx->height = s->height;
4714 s->avctx->sample_aspect_ratio= h->sps.sar;
4715 if(!s->avctx->sample_aspect_ratio.den)
4716 s->avctx->sample_aspect_ratio.den = 1;
4718 if(h->sps.timing_info_present_flag){
4719 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4720 if(h->x264_build > 0 && h->x264_build < 44)
4721 s->avctx->time_base.den *= 2;
4722 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4723 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4727 if(h->slice_num == 0){
4728 if(frame_start(h) < 0)
4732 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4733 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4736 h->mb_aff_frame = 0;
4737 if(h->sps.frame_mbs_only_flag){
4738 s->picture_structure= PICT_FRAME;
4740 if(get_bits1(&s->gb)) { //field_pic_flag
4741 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4742 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4744 s->picture_structure= PICT_FRAME;
4745 h->mb_aff_frame = h->sps.mb_aff;
4749 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4750 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4751 if(s->mb_y >= s->mb_height){
4755 if(s->picture_structure==PICT_FRAME){
4756 h->curr_pic_num= h->frame_num;
4757 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4759 h->curr_pic_num= 2*h->frame_num;
4760 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4763 if(h->nal_unit_type == NAL_IDR_SLICE){
4764 get_ue_golomb(&s->gb); /* idr_pic_id */
4767 if(h->sps.poc_type==0){
4768 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4770 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4771 h->delta_poc_bottom= get_se_golomb(&s->gb);
4775 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4776 h->delta_poc[0]= get_se_golomb(&s->gb);
4778 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4779 h->delta_poc[1]= get_se_golomb(&s->gb);
4784 if(h->pps.redundant_pic_cnt_present){
4785 h->redundant_pic_count= get_ue_golomb(&s->gb);
4788 //set defaults, might be overriden a few line later
4789 h->ref_count[0]= h->pps.ref_count[0];
4790 h->ref_count[1]= h->pps.ref_count[1];
4792 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4793 if(h->slice_type == B_TYPE){
4794 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4795 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4796 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4798 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4800 if(num_ref_idx_active_override_flag){
4801 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4802 if(h->slice_type==B_TYPE)
4803 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4805 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4806 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4812 if(!default_ref_list_done){
4813 fill_default_ref_list(h);
4816 if(decode_ref_pic_list_reordering(h) < 0)
4819 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4820 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4821 pred_weight_table(h);
4822 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4823 implicit_weight_table(h);
4827 if(s->current_picture.reference)
4828 decode_ref_pic_marking(h);
4831 fill_mbaff_ref_list(h);
4833 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4834 h->cabac_init_idc = get_ue_golomb(&s->gb);
4836 h->last_qscale_diff = 0;
4837 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4838 if(s->qscale<0 || s->qscale>51){
4839 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4842 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4843 //FIXME qscale / qp ... stuff
4844 if(h->slice_type == SP_TYPE){
4845 get_bits1(&s->gb); /* sp_for_switch_flag */
4847 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4848 get_se_golomb(&s->gb); /* slice_qs_delta */
4851 h->deblocking_filter = 1;
4852 h->slice_alpha_c0_offset = 0;
4853 h->slice_beta_offset = 0;
4854 if( h->pps.deblocking_filter_parameters_present ) {
4855 h->deblocking_filter= get_ue_golomb(&s->gb);
4856 if(h->deblocking_filter < 2)
4857 h->deblocking_filter^= 1; // 1<->0
4859 if( h->deblocking_filter ) {
4860 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4861 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4864 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4865 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4866 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4867 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4868 h->deblocking_filter= 0;
4871 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4872 slice_group_change_cycle= get_bits(&s->gb, ?);
4877 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4878 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4880 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4881 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4883 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4885 av_get_pict_type_char(h->slice_type),
4886 pps_id, h->frame_num,
4887 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4888 h->ref_count[0], h->ref_count[1],
4890 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4892 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4896 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
4897 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4898 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4900 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4901 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4910 static inline int get_level_prefix(GetBitContext *gb){
4914 OPEN_READER(re, gb);
4915 UPDATE_CACHE(re, gb);
4916 buf=GET_CACHE(re, gb);
4918 log= 32 - av_log2(buf);
4920 print_bin(buf>>(32-log), log);
4921 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4924 LAST_SKIP_BITS(re, gb, log);
4925 CLOSE_READER(re, gb);
4930 static inline int get_dct8x8_allowed(H264Context *h){
4933 if(!IS_SUB_8X8(h->sub_mb_type[i])
4934 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4941 * decodes a residual block.
4942 * @param n block index
4943 * @param scantable scantable
4944 * @param max_coeff number of coefficients in the block
4945 * @return <0 if an error occured
4947 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4948 MpegEncContext * const s = &h->s;
4949 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4951 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4953 //FIXME put trailing_onex into the context
4955 if(n == CHROMA_DC_BLOCK_INDEX){
4956 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4957 total_coeff= coeff_token>>2;
4959 if(n == LUMA_DC_BLOCK_INDEX){
4960 total_coeff= pred_non_zero_count(h, 0);
4961 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4962 total_coeff= coeff_token>>2;
4964 total_coeff= pred_non_zero_count(h, n);
4965 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4966 total_coeff= coeff_token>>2;
4967 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4971 //FIXME set last_non_zero?
4976 trailing_ones= coeff_token&3;
4977 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4978 assert(total_coeff<=16);
4980 for(i=0; i<trailing_ones; i++){
4981 level[i]= 1 - 2*get_bits1(gb);
4985 int level_code, mask;
4986 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4987 int prefix= get_level_prefix(gb);
4989 //first coefficient has suffix_length equal to 0 or 1
4990 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4992 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4994 level_code= (prefix<<suffix_length); //part
4995 }else if(prefix==14){
4997 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4999 level_code= prefix + get_bits(gb, 4); //part
5000 }else if(prefix==15){
5001 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
5002 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
5004 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
5008 if(trailing_ones < 3) level_code += 2;
5013 mask= -(level_code&1);
5014 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5017 //remaining coefficients have suffix_length > 0
5018 for(;i<total_coeff;i++) {
5019 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
5020 prefix = get_level_prefix(gb);
5022 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
5023 }else if(prefix==15){
5024 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
5026 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
5029 mask= -(level_code&1);
5030 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5031 if(level_code > suffix_limit[suffix_length])
5036 if(total_coeff == max_coeff)
5039 if(n == CHROMA_DC_BLOCK_INDEX)
5040 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
5042 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
5045 coeff_num = zeros_left + total_coeff - 1;
5046 j = scantable[coeff_num];
5048 block[j] = level[0];
5049 for(i=1;i<total_coeff;i++) {
5052 else if(zeros_left < 7){
5053 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5055 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5057 zeros_left -= run_before;
5058 coeff_num -= 1 + run_before;
5059 j= scantable[ coeff_num ];
5064 block[j] = (level[0] * qmul[j] + 32)>>6;
5065 for(i=1;i<total_coeff;i++) {
5068 else if(zeros_left < 7){
5069 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5071 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5073 zeros_left -= run_before;
5074 coeff_num -= 1 + run_before;
5075 j= scantable[ coeff_num ];
5077 block[j]= (level[i] * qmul[j] + 32)>>6;
5082 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
5089 static void predict_field_decoding_flag(H264Context *h){
5090 MpegEncContext * const s = &h->s;
5091 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5092 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
5093 ? s->current_picture.mb_type[mb_xy-1]
5094 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
5095 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
5097 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
5101 * decodes a P_SKIP or B_SKIP macroblock
5103 static void decode_mb_skip(H264Context *h){
5104 MpegEncContext * const s = &h->s;
5105 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5108 memset(h->non_zero_count[mb_xy], 0, 16);
5109 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
5112 mb_type|= MB_TYPE_INTERLACED;
5114 if( h->slice_type == B_TYPE )
5116 // just for fill_caches. pred_direct_motion will set the real mb_type
5117 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
5119 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5120 pred_direct_motion(h, &mb_type);
5121 mb_type|= MB_TYPE_SKIP;
5126 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
5128 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5129 pred_pskip_motion(h, &mx, &my);
5130 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
5131 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
5134 write_back_motion(h, mb_type);
5135 s->current_picture.mb_type[mb_xy]= mb_type;
5136 s->current_picture.qscale_table[mb_xy]= s->qscale;
5137 h->slice_table[ mb_xy ]= h->slice_num;
5138 h->prev_mb_skipped= 1;
5142 * decodes a macroblock
5143 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5145 static int decode_mb_cavlc(H264Context *h){
5146 MpegEncContext * const s = &h->s;
5147 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5148 int mb_type, partition_count, cbp;
5149 int dct8x8_allowed= h->pps.transform_8x8_mode;
5151 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
5153 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5154 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
5156 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
5157 if(s->mb_skip_run==-1)
5158 s->mb_skip_run= get_ue_golomb(&s->gb);
5160 if (s->mb_skip_run--) {
5161 if(FRAME_MBAFF && (s->mb_y&1) == 0){
5162 if(s->mb_skip_run==0)
5163 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5165 predict_field_decoding_flag(h);
5172 if( (s->mb_y&1) == 0 )
5173 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5175 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5177 h->prev_mb_skipped= 0;
5179 mb_type= get_ue_golomb(&s->gb);
5180 if(h->slice_type == B_TYPE){
5182 partition_count= b_mb_type_info[mb_type].partition_count;
5183 mb_type= b_mb_type_info[mb_type].type;
5186 goto decode_intra_mb;
5188 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
5190 partition_count= p_mb_type_info[mb_type].partition_count;
5191 mb_type= p_mb_type_info[mb_type].type;
5194 goto decode_intra_mb;
5197 assert(h->slice_type == I_TYPE);
5200 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
5204 cbp= i_mb_type_info[mb_type].cbp;
5205 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5206 mb_type= i_mb_type_info[mb_type].type;
5210 mb_type |= MB_TYPE_INTERLACED;
5212 h->slice_table[ mb_xy ]= h->slice_num;
5214 if(IS_INTRA_PCM(mb_type)){
5217 // we assume these blocks are very rare so we dont optimize it
5218 align_get_bits(&s->gb);
5220 // The pixels are stored in the same order as levels in h->mb array.
5221 for(y=0; y<16; y++){
5222 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5223 for(x=0; x<16; x++){
5224 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5225 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
5229 const int index= 256 + 4*(y&3) + 32*(y>>2);
5231 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5232 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5236 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5238 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5239 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5243 // In deblocking, the quantizer is 0
5244 s->current_picture.qscale_table[mb_xy]= 0;
5245 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5246 // All coeffs are present
5247 memset(h->non_zero_count[mb_xy], 16, 16);
5249 s->current_picture.mb_type[mb_xy]= mb_type;
5254 h->ref_count[0] <<= 1;
5255 h->ref_count[1] <<= 1;
5258 fill_caches(h, mb_type, 0);
5261 if(IS_INTRA(mb_type)){
5262 // init_top_left_availability(h);
5263 if(IS_INTRA4x4(mb_type)){
5266 if(dct8x8_allowed && get_bits1(&s->gb)){
5267 mb_type |= MB_TYPE_8x8DCT;
5271 // fill_intra4x4_pred_table(h);
5272 for(i=0; i<16; i+=di){
5273 int mode= pred_intra_mode(h, i);
5275 if(!get_bits1(&s->gb)){
5276 const int rem_mode= get_bits(&s->gb, 3);
5277 mode = rem_mode + (rem_mode >= mode);
5281 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5283 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
5285 write_back_intra_pred_mode(h);
5286 if( check_intra4x4_pred_mode(h) < 0)
5289 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
5290 if(h->intra16x16_pred_mode < 0)
5293 h->chroma_pred_mode= get_ue_golomb(&s->gb);
5295 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
5296 if(h->chroma_pred_mode < 0)
5298 }else if(partition_count==4){
5299 int i, j, sub_partition_count[4], list, ref[2][4];
5301 if(h->slice_type == B_TYPE){
5303 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5304 if(h->sub_mb_type[i] >=13){
5305 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5308 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5309 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5311 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5312 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5313 pred_direct_motion(h, &mb_type);
5314 h->ref_cache[0][scan8[4]] =
5315 h->ref_cache[1][scan8[4]] =
5316 h->ref_cache[0][scan8[12]] =
5317 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5320 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
5322 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5323 if(h->sub_mb_type[i] >=4){
5324 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5327 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5328 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5332 for(list=0; list<2; list++){
5333 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5334 if(ref_count == 0) continue;
5336 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5337 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5338 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
5347 dct8x8_allowed = get_dct8x8_allowed(h);
5349 for(list=0; list<2; list++){
5350 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5351 if(ref_count == 0) continue;
5354 if(IS_DIRECT(h->sub_mb_type[i])) {
5355 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
5358 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
5359 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5361 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5362 const int sub_mb_type= h->sub_mb_type[i];
5363 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5364 for(j=0; j<sub_partition_count[i]; j++){
5366 const int index= 4*i + block_width*j;
5367 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5368 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
5369 mx += get_se_golomb(&s->gb);
5370 my += get_se_golomb(&s->gb);
5371 tprintf("final mv:%d %d\n", mx, my);
5373 if(IS_SUB_8X8(sub_mb_type)){
5374 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5375 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5376 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5377 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5378 }else if(IS_SUB_8X4(sub_mb_type)){
5379 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5380 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5381 }else if(IS_SUB_4X8(sub_mb_type)){
5382 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5383 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5385 assert(IS_SUB_4X4(sub_mb_type));
5386 mv_cache[ 0 ][0]= mx;
5387 mv_cache[ 0 ][1]= my;
5391 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5397 }else if(IS_DIRECT(mb_type)){
5398 pred_direct_motion(h, &mb_type);
5399 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5401 int list, mx, my, i;
5402 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5403 if(IS_16X16(mb_type)){
5404 for(list=0; list<2; list++){
5405 if(h->ref_count[list]>0){
5406 if(IS_DIR(mb_type, 0, list)){
5407 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5408 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5410 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
5413 for(list=0; list<2; list++){
5414 if(IS_DIR(mb_type, 0, list)){
5415 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5416 mx += get_se_golomb(&s->gb);
5417 my += get_se_golomb(&s->gb);
5418 tprintf("final mv:%d %d\n", mx, my);
5420 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5422 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5425 else if(IS_16X8(mb_type)){
5426 for(list=0; list<2; list++){
5427 if(h->ref_count[list]>0){
5429 if(IS_DIR(mb_type, i, list)){
5430 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5431 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5433 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5437 for(list=0; list<2; list++){
5439 if(IS_DIR(mb_type, i, list)){
5440 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5441 mx += get_se_golomb(&s->gb);
5442 my += get_se_golomb(&s->gb);
5443 tprintf("final mv:%d %d\n", mx, my);
5445 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5447 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5451 assert(IS_8X16(mb_type));
5452 for(list=0; list<2; list++){
5453 if(h->ref_count[list]>0){
5455 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5456 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5457 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5459 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5463 for(list=0; list<2; list++){
5465 if(IS_DIR(mb_type, i, list)){
5466 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5467 mx += get_se_golomb(&s->gb);
5468 my += get_se_golomb(&s->gb);
5469 tprintf("final mv:%d %d\n", mx, my);
5471 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5473 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5479 if(IS_INTER(mb_type))
5480 write_back_motion(h, mb_type);
5482 if(!IS_INTRA16x16(mb_type)){
5483 cbp= get_ue_golomb(&s->gb);
5485 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5489 if(IS_INTRA4x4(mb_type))
5490 cbp= golomb_to_intra4x4_cbp[cbp];
5492 cbp= golomb_to_inter_cbp[cbp];
5496 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5497 if(get_bits1(&s->gb))
5498 mb_type |= MB_TYPE_8x8DCT;
5500 s->current_picture.mb_type[mb_xy]= mb_type;
5502 if(cbp || IS_INTRA16x16(mb_type)){
5503 int i8x8, i4x4, chroma_idx;
5504 int chroma_qp, dquant;
5505 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5506 const uint8_t *scan, *scan8x8, *dc_scan;
5508 // fill_non_zero_count_cache(h);
5510 if(IS_INTERLACED(mb_type)){
5511 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5512 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5513 dc_scan= luma_dc_field_scan;
5515 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5516 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5517 dc_scan= luma_dc_zigzag_scan;
5520 dquant= get_se_golomb(&s->gb);
5522 if( dquant > 25 || dquant < -26 ){
5523 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5527 s->qscale += dquant;
5528 if(((unsigned)s->qscale) > 51){
5529 if(s->qscale<0) s->qscale+= 52;
5530 else s->qscale-= 52;
5533 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5534 if(IS_INTRA16x16(mb_type)){
5535 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5536 return -1; //FIXME continue if partitioned and other return -1 too
5539 assert((cbp&15) == 0 || (cbp&15) == 15);
5542 for(i8x8=0; i8x8<4; i8x8++){
5543 for(i4x4=0; i4x4<4; i4x4++){
5544 const int index= i4x4 + 4*i8x8;
5545 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5551 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5554 for(i8x8=0; i8x8<4; i8x8++){
5555 if(cbp & (1<<i8x8)){
5556 if(IS_8x8DCT(mb_type)){
5557 DCTELEM *buf = &h->mb[64*i8x8];
5559 for(i4x4=0; i4x4<4; i4x4++){
5560 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5561 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5564 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5565 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5567 for(i4x4=0; i4x4<4; i4x4++){
5568 const int index= i4x4 + 4*i8x8;
5570 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5576 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5577 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5583 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5584 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5590 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5591 for(i4x4=0; i4x4<4; i4x4++){
5592 const int index= 16 + 4*chroma_idx + i4x4;
5593 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5599 uint8_t * const nnz= &h->non_zero_count_cache[0];
5600 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5601 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5604 uint8_t * const nnz= &h->non_zero_count_cache[0];
5605 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5606 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5607 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5609 s->current_picture.qscale_table[mb_xy]= s->qscale;
5610 write_back_non_zero_count(h);
5613 h->ref_count[0] >>= 1;
5614 h->ref_count[1] >>= 1;
5620 static int decode_cabac_field_decoding_flag(H264Context *h) {
5621 MpegEncContext * const s = &h->s;
5622 const int mb_x = s->mb_x;
5623 const int mb_y = s->mb_y & ~1;
5624 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5625 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5627 unsigned int ctx = 0;
5629 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5632 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5636 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5639 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5640 uint8_t *state= &h->cabac_state[ctx_base];
5644 MpegEncContext * const s = &h->s;
5645 const int mba_xy = h->left_mb_xy[0];
5646 const int mbb_xy = h->top_mb_xy;
5648 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5650 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5652 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5653 return 0; /* I4x4 */
5656 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5657 return 0; /* I4x4 */
5660 if( get_cabac_terminate( &h->cabac ) )
5661 return 25; /* PCM */
5663 mb_type = 1; /* I16x16 */
5664 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5665 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5666 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5667 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5668 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5672 static int decode_cabac_mb_type( H264Context *h ) {
5673 MpegEncContext * const s = &h->s;
5675 if( h->slice_type == I_TYPE ) {
5676 return decode_cabac_intra_mb_type(h, 3, 1);
5677 } else if( h->slice_type == P_TYPE ) {
5678 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5680 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5681 /* P_L0_D16x16, P_8x8 */
5682 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5684 /* P_L0_D8x16, P_L0_D16x8 */
5685 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5688 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5690 } else if( h->slice_type == B_TYPE ) {
5691 const int mba_xy = h->left_mb_xy[0];
5692 const int mbb_xy = h->top_mb_xy;
5696 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5698 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5701 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5702 return 0; /* B_Direct_16x16 */
5704 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5705 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5708 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5709 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5710 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5711 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5713 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5714 else if( bits == 13 ) {
5715 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5716 } else if( bits == 14 )
5717 return 11; /* B_L1_L0_8x16 */
5718 else if( bits == 15 )
5719 return 22; /* B_8x8 */
5721 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5722 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5724 /* TODO SI/SP frames? */
5729 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5730 MpegEncContext * const s = &h->s;
5734 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5735 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5738 && h->slice_table[mba_xy] == h->slice_num
5739 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5740 mba_xy += s->mb_stride;
5742 mbb_xy = mb_xy - s->mb_stride;
5744 && h->slice_table[mbb_xy] == h->slice_num
5745 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5746 mbb_xy -= s->mb_stride;
5748 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5750 int mb_xy = mb_x + mb_y*s->mb_stride;
5752 mbb_xy = mb_xy - s->mb_stride;
5755 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5757 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5760 if( h->slice_type == B_TYPE )
5762 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5765 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5768 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5771 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5772 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5773 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5775 if( mode >= pred_mode )
5781 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5782 const int mba_xy = h->left_mb_xy[0];
5783 const int mbb_xy = h->top_mb_xy;
5787 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5788 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5791 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5794 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5797 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5799 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5805 static const uint8_t block_idx_x[16] = {
5806 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5808 static const uint8_t block_idx_y[16] = {
5809 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5811 static const uint8_t block_idx_xy[4][4] = {
5818 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5823 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5825 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5828 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5833 x = block_idx_x[4*i8x8];
5834 y = block_idx_y[4*i8x8];
5838 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5839 cbp_a = h->left_cbp;
5840 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5846 /* No need to test for skip as we put 0 for skip block */
5847 /* No need to test for IPCM as we put 1 for IPCM block */
5849 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5850 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5855 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5856 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5860 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5866 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5870 cbp_a = (h->left_cbp>>4)&0x03;
5871 cbp_b = (h-> top_cbp>>4)&0x03;
5874 if( cbp_a > 0 ) ctx++;
5875 if( cbp_b > 0 ) ctx += 2;
5876 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5880 if( cbp_a == 2 ) ctx++;
5881 if( cbp_b == 2 ) ctx += 2;
5882 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5884 static int decode_cabac_mb_dqp( H264Context *h) {
5885 MpegEncContext * const s = &h->s;
5891 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5893 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5895 if( h->last_qscale_diff != 0 )
5898 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5904 if(val > 102) //prevent infinite loop
5911 return -(val + 1)/2;
5913 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5914 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5916 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5918 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5922 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5924 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5925 return 0; /* B_Direct_8x8 */
5926 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5927 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5929 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5930 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5931 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5934 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5935 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5939 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5940 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5943 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5944 int refa = h->ref_cache[list][scan8[n] - 1];
5945 int refb = h->ref_cache[list][scan8[n] - 8];
5949 if( h->slice_type == B_TYPE) {
5950 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5952 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5961 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5971 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5972 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5973 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5974 int ctxbase = (l == 0) ? 40 : 47;
5979 else if( amvd > 32 )
5984 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5989 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5997 while( get_cabac_bypass( &h->cabac ) ) {
6002 if( get_cabac_bypass( &h->cabac ) )
6006 return get_cabac_bypass_sign( &h->cabac, -mvd );
6009 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
6014 nza = h->left_cbp&0x100;
6015 nzb = h-> top_cbp&0x100;
6016 } else if( cat == 1 || cat == 2 ) {
6017 nza = h->non_zero_count_cache[scan8[idx] - 1];
6018 nzb = h->non_zero_count_cache[scan8[idx] - 8];
6019 } else if( cat == 3 ) {
6020 nza = (h->left_cbp>>(6+idx))&0x01;
6021 nzb = (h-> top_cbp>>(6+idx))&0x01;
6024 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
6025 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
6034 return ctx + 4 * cat;
6037 static const __attribute((used)) uint8_t last_coeff_flag_offset_8x8[63] = {
6038 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6039 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
6040 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
6041 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
6044 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
6045 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
6046 static const int significant_coeff_flag_offset[2][6] = {
6047 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
6048 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
6050 static const int last_coeff_flag_offset[2][6] = {
6051 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
6052 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
6054 static const int coeff_abs_level_m1_offset[6] = {
6055 227+0, 227+10, 227+20, 227+30, 227+39, 426
6057 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
6058 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
6059 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
6060 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
6061 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
6062 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
6063 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
6064 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
6065 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
6071 int coeff_count = 0;
6074 int abslevelgt1 = 0;
6076 uint8_t *significant_coeff_ctx_base;
6077 uint8_t *last_coeff_ctx_base;
6078 uint8_t *abs_level_m1_ctx_base;
6081 #define CABAC_ON_STACK
6083 #ifdef CABAC_ON_STACK
6086 cc.range = h->cabac.range;
6087 cc.low = h->cabac.low;
6088 cc.bytestream= h->cabac.bytestream;
6090 #define CC &h->cabac
6094 /* cat: 0-> DC 16x16 n = 0
6095 * 1-> AC 16x16 n = luma4x4idx
6096 * 2-> Luma4x4 n = luma4x4idx
6097 * 3-> DC Chroma n = iCbCr
6098 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
6099 * 5-> Luma8x8 n = 4 * luma8x8idx
6102 /* read coded block flag */
6104 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
6105 if( cat == 1 || cat == 2 )
6106 h->non_zero_count_cache[scan8[n]] = 0;
6108 h->non_zero_count_cache[scan8[16+n]] = 0;
6109 #ifdef CABAC_ON_STACK
6110 h->cabac.range = cc.range ;
6111 h->cabac.low = cc.low ;
6112 h->cabac.bytestream= cc.bytestream;
6118 significant_coeff_ctx_base = h->cabac_state
6119 + significant_coeff_flag_offset[MB_FIELD][cat];
6120 last_coeff_ctx_base = h->cabac_state
6121 + last_coeff_flag_offset[MB_FIELD][cat];
6122 abs_level_m1_ctx_base = h->cabac_state
6123 + coeff_abs_level_m1_offset[cat];
6126 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
6127 for(last= 0; last < coefs; last++) { \
6128 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
6129 if( get_cabac( CC, sig_ctx )) { \
6130 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
6131 index[coeff_count++] = last; \
6132 if( get_cabac( CC, last_ctx ) ) { \
6138 if( last == max_coeff -1 ) {\
6139 index[coeff_count++] = last;\
6141 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
6142 #if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
6143 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
6145 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
6147 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
6149 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
6152 assert(coeff_count > 0);
6155 h->cbp_table[mb_xy] |= 0x100;
6156 else if( cat == 1 || cat == 2 )
6157 h->non_zero_count_cache[scan8[n]] = coeff_count;
6159 h->cbp_table[mb_xy] |= 0x40 << n;
6161 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
6164 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
6167 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
6168 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
6169 int j= scantable[index[coeff_count]];
6171 if( get_cabac( CC, ctx ) == 0 ) {
6173 block[j] = get_cabac_bypass_sign( CC, -1);
6175 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
6181 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
6182 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
6186 if( coeff_abs >= 15 ) {
6188 while( get_cabac_bypass( CC ) ) {
6194 coeff_abs += coeff_abs + get_cabac_bypass( CC );
6200 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
6201 else block[j] = coeff_abs;
6203 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
6204 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
6210 #ifdef CABAC_ON_STACK
6211 h->cabac.range = cc.range ;
6212 h->cabac.low = cc.low ;
6213 h->cabac.bytestream= cc.bytestream;
6218 static void inline compute_mb_neighbors(H264Context *h)
6220 MpegEncContext * const s = &h->s;
6221 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
6222 h->top_mb_xy = mb_xy - s->mb_stride;
6223 h->left_mb_xy[0] = mb_xy - 1;
6225 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
6226 const int top_pair_xy = pair_xy - s->mb_stride;
6227 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
6228 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
6229 const int curr_mb_frame_flag = !MB_FIELD;
6230 const int bottom = (s->mb_y & 1);
6232 ? !curr_mb_frame_flag // bottom macroblock
6233 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
6235 h->top_mb_xy -= s->mb_stride;
6237 if (left_mb_frame_flag != curr_mb_frame_flag) {
6238 h->left_mb_xy[0] = pair_xy - 1;
6245 * decodes a macroblock
6246 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
6248 static int decode_mb_cabac(H264Context *h) {
6249 MpegEncContext * const s = &h->s;
6250 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
6251 int mb_type, partition_count, cbp = 0;
6252 int dct8x8_allowed= h->pps.transform_8x8_mode;
6254 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
6256 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
6257 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
6259 /* a skipped mb needs the aff flag from the following mb */
6260 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
6261 predict_field_decoding_flag(h);
6262 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
6263 skip = h->next_mb_skipped;
6265 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
6266 /* read skip flags */
6268 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
6269 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
6270 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
6271 if(h->next_mb_skipped)
6272 predict_field_decoding_flag(h);
6274 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6279 h->cbp_table[mb_xy] = 0;
6280 h->chroma_pred_mode_table[mb_xy] = 0;
6281 h->last_qscale_diff = 0;
6288 if( (s->mb_y&1) == 0 )
6290 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6292 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
6294 h->prev_mb_skipped = 0;
6296 compute_mb_neighbors(h);
6297 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
6298 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
6302 if( h->slice_type == B_TYPE ) {
6304 partition_count= b_mb_type_info[mb_type].partition_count;
6305 mb_type= b_mb_type_info[mb_type].type;
6308 goto decode_intra_mb;
6310 } else if( h->slice_type == P_TYPE ) {
6312 partition_count= p_mb_type_info[mb_type].partition_count;
6313 mb_type= p_mb_type_info[mb_type].type;
6316 goto decode_intra_mb;
6319 assert(h->slice_type == I_TYPE);
6321 partition_count = 0;
6322 cbp= i_mb_type_info[mb_type].cbp;
6323 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
6324 mb_type= i_mb_type_info[mb_type].type;
6327 mb_type |= MB_TYPE_INTERLACED;
6329 h->slice_table[ mb_xy ]= h->slice_num;
6331 if(IS_INTRA_PCM(mb_type)) {
6335 // We assume these blocks are very rare so we dont optimize it.
6336 // FIXME The two following lines get the bitstream position in the cabac
6337 // decode, I think it should be done by a function in cabac.h (or cabac.c).
6338 ptr= h->cabac.bytestream;
6339 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
6341 // The pixels are stored in the same order as levels in h->mb array.
6342 for(y=0; y<16; y++){
6343 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
6344 for(x=0; x<16; x++){
6345 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
6346 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
6350 const int index= 256 + 4*(y&3) + 32*(y>>2);
6352 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
6353 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6357 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
6359 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
6360 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6364 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6366 // All blocks are present
6367 h->cbp_table[mb_xy] = 0x1ef;
6368 h->chroma_pred_mode_table[mb_xy] = 0;
6369 // In deblocking, the quantizer is 0
6370 s->current_picture.qscale_table[mb_xy]= 0;
6371 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
6372 // All coeffs are present
6373 memset(h->non_zero_count[mb_xy], 16, 16);
6374 s->current_picture.mb_type[mb_xy]= mb_type;
6379 h->ref_count[0] <<= 1;
6380 h->ref_count[1] <<= 1;
6383 fill_caches(h, mb_type, 0);
6385 if( IS_INTRA( mb_type ) ) {
6387 if( IS_INTRA4x4( mb_type ) ) {
6388 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6389 mb_type |= MB_TYPE_8x8DCT;
6390 for( i = 0; i < 16; i+=4 ) {
6391 int pred = pred_intra_mode( h, i );
6392 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6393 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6396 for( i = 0; i < 16; i++ ) {
6397 int pred = pred_intra_mode( h, i );
6398 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6400 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6403 write_back_intra_pred_mode(h);
6404 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6406 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6407 if( h->intra16x16_pred_mode < 0 ) return -1;
6409 h->chroma_pred_mode_table[mb_xy] =
6410 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6412 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
6413 if( h->chroma_pred_mode < 0 ) return -1;
6414 } else if( partition_count == 4 ) {
6415 int i, j, sub_partition_count[4], list, ref[2][4];
6417 if( h->slice_type == B_TYPE ) {
6418 for( i = 0; i < 4; i++ ) {
6419 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6420 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6421 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6423 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
6424 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
6425 pred_direct_motion(h, &mb_type);
6426 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6427 for( i = 0; i < 4; i++ )
6428 if( IS_DIRECT(h->sub_mb_type[i]) )
6429 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6433 for( i = 0; i < 4; i++ ) {
6434 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6435 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6436 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6440 for( list = 0; list < 2; list++ ) {
6441 if( h->ref_count[list] > 0 ) {
6442 for( i = 0; i < 4; i++ ) {
6443 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6444 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6445 if( h->ref_count[list] > 1 )
6446 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6452 h->ref_cache[list][ scan8[4*i]+1 ]=
6453 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6459 dct8x8_allowed = get_dct8x8_allowed(h);
6461 for(list=0; list<2; list++){
6463 if(IS_DIRECT(h->sub_mb_type[i])){
6464 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6467 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6469 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6470 const int sub_mb_type= h->sub_mb_type[i];
6471 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6472 for(j=0; j<sub_partition_count[i]; j++){
6475 const int index= 4*i + block_width*j;
6476 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6477 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6478 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6480 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6481 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6482 tprintf("final mv:%d %d\n", mx, my);
6484 if(IS_SUB_8X8(sub_mb_type)){
6485 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
6486 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6487 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
6488 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6490 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
6491 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6492 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
6493 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6494 }else if(IS_SUB_8X4(sub_mb_type)){
6495 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
6496 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
6498 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
6499 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
6500 }else if(IS_SUB_4X8(sub_mb_type)){
6501 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
6502 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
6504 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
6505 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
6507 assert(IS_SUB_4X4(sub_mb_type));
6508 mv_cache[ 0 ][0]= mx;
6509 mv_cache[ 0 ][1]= my;
6511 mvd_cache[ 0 ][0]= mx - mpx;
6512 mvd_cache[ 0 ][1]= my - mpy;
6516 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6517 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6518 p[0] = p[1] = p[8] = p[9] = 0;
6519 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6523 } else if( IS_DIRECT(mb_type) ) {
6524 pred_direct_motion(h, &mb_type);
6525 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6526 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6527 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6529 int list, mx, my, i, mpx, mpy;
6530 if(IS_16X16(mb_type)){
6531 for(list=0; list<2; list++){
6532 if(IS_DIR(mb_type, 0, list)){
6533 if(h->ref_count[list] > 0 ){
6534 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6535 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6538 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6540 for(list=0; list<2; list++){
6541 if(IS_DIR(mb_type, 0, list)){
6542 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6544 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6545 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6546 tprintf("final mv:%d %d\n", mx, my);
6548 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6549 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6551 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6554 else if(IS_16X8(mb_type)){
6555 for(list=0; list<2; list++){
6556 if(h->ref_count[list]>0){
6558 if(IS_DIR(mb_type, i, list)){
6559 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6560 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6562 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6566 for(list=0; list<2; list++){
6568 if(IS_DIR(mb_type, i, list)){
6569 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6570 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6571 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6572 tprintf("final mv:%d %d\n", mx, my);
6574 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6575 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6577 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6578 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6583 assert(IS_8X16(mb_type));
6584 for(list=0; list<2; list++){
6585 if(h->ref_count[list]>0){
6587 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6588 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6589 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6591 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6595 for(list=0; list<2; list++){
6597 if(IS_DIR(mb_type, i, list)){
6598 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6599 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6600 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6602 tprintf("final mv:%d %d\n", mx, my);
6603 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6604 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6606 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6607 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6614 if( IS_INTER( mb_type ) ) {
6615 h->chroma_pred_mode_table[mb_xy] = 0;
6616 write_back_motion( h, mb_type );
6619 if( !IS_INTRA16x16( mb_type ) ) {
6620 cbp = decode_cabac_mb_cbp_luma( h );
6621 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6624 h->cbp_table[mb_xy] = h->cbp = cbp;
6626 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6627 if( decode_cabac_mb_transform_size( h ) )
6628 mb_type |= MB_TYPE_8x8DCT;
6630 s->current_picture.mb_type[mb_xy]= mb_type;
6632 if( cbp || IS_INTRA16x16( mb_type ) ) {
6633 const uint8_t *scan, *scan8x8, *dc_scan;
6636 if(IS_INTERLACED(mb_type)){
6637 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6638 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6639 dc_scan= luma_dc_field_scan;
6641 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6642 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6643 dc_scan= luma_dc_zigzag_scan;
6646 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6647 if( dqp == INT_MIN ){
6648 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6652 if(((unsigned)s->qscale) > 51){
6653 if(s->qscale<0) s->qscale+= 52;
6654 else s->qscale-= 52;
6656 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6658 if( IS_INTRA16x16( mb_type ) ) {
6660 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6661 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6664 for( i = 0; i < 16; i++ ) {
6665 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6666 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6670 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6674 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6675 if( cbp & (1<<i8x8) ) {
6676 if( IS_8x8DCT(mb_type) ) {
6677 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6678 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6681 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6682 const int index = 4*i8x8 + i4x4;
6683 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6685 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6687 //STOP_TIMER("decode_residual")
6690 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6691 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6698 for( c = 0; c < 2; c++ ) {
6699 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6700 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6707 for( c = 0; c < 2; c++ ) {
6708 for( i = 0; i < 4; i++ ) {
6709 const int index = 16 + 4 * c + i;
6710 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6711 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6716 uint8_t * const nnz= &h->non_zero_count_cache[0];
6717 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6718 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6721 uint8_t * const nnz= &h->non_zero_count_cache[0];
6722 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6723 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6724 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6725 h->last_qscale_diff = 0;
6728 s->current_picture.qscale_table[mb_xy]= s->qscale;
6729 write_back_non_zero_count(h);
6732 h->ref_count[0] >>= 1;
6733 h->ref_count[1] >>= 1;
6740 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6742 const int index_a = qp + h->slice_alpha_c0_offset;
6743 const int alpha = (alpha_table+52)[index_a];
6744 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6749 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6750 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6752 /* 16px edge length, because bS=4 is triggered by being at
6753 * the edge of an intra MB, so all 4 bS are the same */
6754 for( d = 0; d < 16; d++ ) {
6755 const int p0 = pix[-1];
6756 const int p1 = pix[-2];
6757 const int p2 = pix[-3];
6759 const int q0 = pix[0];
6760 const int q1 = pix[1];
6761 const int q2 = pix[2];
6763 if( FFABS( p0 - q0 ) < alpha &&
6764 FFABS( p1 - p0 ) < beta &&
6765 FFABS( q1 - q0 ) < beta ) {
6767 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6768 if( FFABS( p2 - p0 ) < beta)
6770 const int p3 = pix[-4];
6772 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6773 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6774 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6777 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6779 if( FFABS( q2 - q0 ) < beta)
6781 const int q3 = pix[3];
6783 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6784 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6785 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6788 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6792 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6793 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6795 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6801 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6803 const int index_a = qp + h->slice_alpha_c0_offset;
6804 const int alpha = (alpha_table+52)[index_a];
6805 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6810 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6811 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6813 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6817 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6819 for( i = 0; i < 16; i++, pix += stride) {
6825 int bS_index = (i >> 1);
6828 bS_index |= (i & 1);
6831 if( bS[bS_index] == 0 ) {
6835 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6836 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6837 alpha = (alpha_table+52)[index_a];
6838 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6840 if( bS[bS_index] < 4 ) {
6841 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6842 const int p0 = pix[-1];
6843 const int p1 = pix[-2];
6844 const int p2 = pix[-3];
6845 const int q0 = pix[0];
6846 const int q1 = pix[1];
6847 const int q2 = pix[2];
6849 if( FFABS( p0 - q0 ) < alpha &&
6850 FFABS( p1 - p0 ) < beta &&
6851 FFABS( q1 - q0 ) < beta ) {
6855 if( FFABS( p2 - p0 ) < beta ) {
6856 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6859 if( FFABS( q2 - q0 ) < beta ) {
6860 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6864 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6865 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6866 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6867 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6870 const int p0 = pix[-1];
6871 const int p1 = pix[-2];
6872 const int p2 = pix[-3];
6874 const int q0 = pix[0];
6875 const int q1 = pix[1];
6876 const int q2 = pix[2];
6878 if( FFABS( p0 - q0 ) < alpha &&
6879 FFABS( p1 - p0 ) < beta &&
6880 FFABS( q1 - q0 ) < beta ) {
6882 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6883 if( FFABS( p2 - p0 ) < beta)
6885 const int p3 = pix[-4];
6887 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6888 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6889 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6892 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6894 if( FFABS( q2 - q0 ) < beta)
6896 const int q3 = pix[3];
6898 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6899 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6900 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6903 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6907 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6908 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6910 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6915 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6917 for( i = 0; i < 8; i++, pix += stride) {
6925 if( bS[bS_index] == 0 ) {
6929 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6930 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6931 alpha = (alpha_table+52)[index_a];
6932 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6934 if( bS[bS_index] < 4 ) {
6935 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6936 const int p0 = pix[-1];
6937 const int p1 = pix[-2];
6938 const int q0 = pix[0];
6939 const int q1 = pix[1];
6941 if( FFABS( p0 - q0 ) < alpha &&
6942 FFABS( p1 - p0 ) < beta &&
6943 FFABS( q1 - q0 ) < beta ) {
6944 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6946 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6947 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6948 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6951 const int p0 = pix[-1];
6952 const int p1 = pix[-2];
6953 const int q0 = pix[0];
6954 const int q1 = pix[1];
6956 if( FFABS( p0 - q0 ) < alpha &&
6957 FFABS( p1 - p0 ) < beta &&
6958 FFABS( q1 - q0 ) < beta ) {
6960 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6961 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6962 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6968 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6970 const int index_a = qp + h->slice_alpha_c0_offset;
6971 const int alpha = (alpha_table+52)[index_a];
6972 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6973 const int pix_next = stride;
6978 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6979 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6981 /* 16px edge length, see filter_mb_edgev */
6982 for( d = 0; d < 16; d++ ) {
6983 const int p0 = pix[-1*pix_next];
6984 const int p1 = pix[-2*pix_next];
6985 const int p2 = pix[-3*pix_next];
6986 const int q0 = pix[0];
6987 const int q1 = pix[1*pix_next];
6988 const int q2 = pix[2*pix_next];
6990 if( FFABS( p0 - q0 ) < alpha &&
6991 FFABS( p1 - p0 ) < beta &&
6992 FFABS( q1 - q0 ) < beta ) {
6994 const int p3 = pix[-4*pix_next];
6995 const int q3 = pix[ 3*pix_next];
6997 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6998 if( FFABS( p2 - p0 ) < beta) {
7000 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
7001 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
7002 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
7005 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
7007 if( FFABS( q2 - q0 ) < beta) {
7009 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
7010 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
7011 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
7014 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
7018 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
7019 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
7021 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
7028 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
7030 const int index_a = qp + h->slice_alpha_c0_offset;
7031 const int alpha = (alpha_table+52)[index_a];
7032 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
7037 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
7038 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
7040 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
7044 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7045 MpegEncContext * const s = &h->s;
7047 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
7049 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) {
7050 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
7053 assert(!FRAME_MBAFF);
7055 mb_xy = mb_x + mb_y*s->mb_stride;
7056 mb_type = s->current_picture.mb_type[mb_xy];
7057 qp = s->current_picture.qscale_table[mb_xy];
7058 qp0 = s->current_picture.qscale_table[mb_xy-1];
7059 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
7060 qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
7061 qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
7062 qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
7063 qp0 = (qp + qp0 + 1) >> 1;
7064 qp1 = (qp + qp1 + 1) >> 1;
7065 qpc0 = (qpc + qpc0 + 1) >> 1;
7066 qpc1 = (qpc + qpc1 + 1) >> 1;
7067 qp_thresh = 15 - h->slice_alpha_c0_offset;
7068 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
7069 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
7072 if( IS_INTRA(mb_type) ) {
7073 int16_t bS4[4] = {4,4,4,4};
7074 int16_t bS3[4] = {3,3,3,3};
7075 if( IS_8x8DCT(mb_type) ) {
7076 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7077 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7078 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7079 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7081 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7082 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
7083 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7084 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
7085 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7086 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
7087 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7088 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
7090 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
7091 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
7092 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
7093 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
7094 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7095 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
7096 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7097 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
7100 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
7101 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
7103 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
7105 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
7107 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
7108 (mb_type & MB_TYPE_16x8) ? 1 : 0;
7109 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
7110 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
7112 int step = IS_8x8DCT(mb_type) ? 2 : 1;
7113 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
7114 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
7115 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
7117 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
7118 bSv[0][0] = 0x0004000400040004ULL;
7119 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
7120 bSv[1][0] = 0x0004000400040004ULL;
7122 #define FILTER(hv,dir,edge)\
7123 if(bSv[dir][edge]) {\
7124 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
7126 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7127 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7133 } else if( IS_8x8DCT(mb_type) ) {
7152 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7153 MpegEncContext * const s = &h->s;
7154 const int mb_xy= mb_x + mb_y*s->mb_stride;
7155 const int mb_type = s->current_picture.mb_type[mb_xy];
7156 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
7157 int first_vertical_edge_done = 0;
7159 /* FIXME: A given frame may occupy more than one position in
7160 * the reference list. So ref2frm should be populated with
7161 * frame numbers, not indices. */
7162 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
7163 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7165 //for sufficiently low qp, filtering wouldn't do anything
7166 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
7168 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
7169 int qp = s->current_picture.qscale_table[mb_xy];
7171 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
7172 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
7178 // left mb is in picture
7179 && h->slice_table[mb_xy-1] != 255
7180 // and current and left pair do not have the same interlaced type
7181 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
7182 // and left mb is in the same slice if deblocking_filter == 2
7183 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
7184 /* First vertical edge is different in MBAFF frames
7185 * There are 8 different bS to compute and 2 different Qp
7187 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
7188 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
7192 int mb_qp, mbn0_qp, mbn1_qp;
7194 first_vertical_edge_done = 1;
7196 if( IS_INTRA(mb_type) )
7197 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
7199 for( i = 0; i < 8; i++ ) {
7200 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
7202 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
7204 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
7205 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
7206 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
7213 mb_qp = s->current_picture.qscale_table[mb_xy];
7214 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
7215 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
7216 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
7217 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7218 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
7219 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
7220 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7221 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
7224 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
7225 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7226 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
7227 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
7228 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
7230 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
7231 for( dir = 0; dir < 2; dir++ )
7234 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
7235 const int mbm_type = s->current_picture.mb_type[mbm_xy];
7236 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
7238 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
7239 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
7240 // how often to recheck mv-based bS when iterating between edges
7241 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
7242 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
7243 // how often to recheck mv-based bS when iterating along each edge
7244 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
7246 if (first_vertical_edge_done) {
7248 first_vertical_edge_done = 0;
7251 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
7254 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
7255 && !IS_INTERLACED(mb_type)
7256 && IS_INTERLACED(mbm_type)
7258 // This is a special case in the norm where the filtering must
7259 // be done twice (one each of the field) even if we are in a
7260 // frame macroblock.
7262 static const int nnz_idx[4] = {4,5,6,3};
7263 unsigned int tmp_linesize = 2 * linesize;
7264 unsigned int tmp_uvlinesize = 2 * uvlinesize;
7265 int mbn_xy = mb_xy - 2 * s->mb_stride;
7270 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
7271 if( IS_INTRA(mb_type) ||
7272 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
7273 bS[0] = bS[1] = bS[2] = bS[3] = 3;
7275 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
7276 for( i = 0; i < 4; i++ ) {
7277 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
7278 mbn_nnz[nnz_idx[i]] != 0 )
7284 // Do not use s->qscale as luma quantizer because it has not the same
7285 // value in IPCM macroblocks.
7286 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7287 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
7288 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7289 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
7290 chroma_qp = ( h->chroma_qp +
7291 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7292 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7293 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7300 for( edge = start; edge < edges; edge++ ) {
7301 /* mbn_xy: neighbor macroblock */
7302 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
7303 const int mbn_type = s->current_picture.mb_type[mbn_xy];
7307 if( (edge&1) && IS_8x8DCT(mb_type) )
7310 if( IS_INTRA(mb_type) ||
7311 IS_INTRA(mbn_type) ) {
7314 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
7315 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
7324 bS[0] = bS[1] = bS[2] = bS[3] = value;
7329 if( edge & mask_edge ) {
7330 bS[0] = bS[1] = bS[2] = bS[3] = 0;
7333 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
7334 bS[0] = bS[1] = bS[2] = bS[3] = 1;
7337 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
7338 int b_idx= 8 + 4 + edge * (dir ? 8:1);
7339 int bn_idx= b_idx - (dir ? 8:1);
7341 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
7342 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7343 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7344 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
7346 bS[0] = bS[1] = bS[2] = bS[3] = v;
7352 for( i = 0; i < 4; i++ ) {
7353 int x = dir == 0 ? edge : i;
7354 int y = dir == 0 ? i : edge;
7355 int b_idx= 8 + 4 + x + 8*y;
7356 int bn_idx= b_idx - (dir ? 8:1);
7358 if( h->non_zero_count_cache[b_idx] != 0 ||
7359 h->non_zero_count_cache[bn_idx] != 0 ) {
7365 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
7366 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7367 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7368 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7376 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7381 // Do not use s->qscale as luma quantizer because it has not the same
7382 // value in IPCM macroblocks.
7383 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7384 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7385 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7386 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7388 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7389 if( (edge&1) == 0 ) {
7390 int chroma_qp = ( h->chroma_qp +
7391 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7392 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
7393 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
7396 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7397 if( (edge&1) == 0 ) {
7398 int chroma_qp = ( h->chroma_qp +
7399 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7400 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7401 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7408 static int decode_slice(H264Context *h){
7409 MpegEncContext * const s = &h->s;
7410 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7414 if( h->pps.cabac ) {
7418 align_get_bits( &s->gb );
7421 ff_init_cabac_states( &h->cabac);
7422 ff_init_cabac_decoder( &h->cabac,
7423 s->gb.buffer + get_bits_count(&s->gb)/8,
7424 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7425 /* calculate pre-state */
7426 for( i= 0; i < 460; i++ ) {
7428 if( h->slice_type == I_TYPE )
7429 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7431 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7434 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
7436 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7441 int ret = decode_mb_cabac(h);
7443 //STOP_TIMER("decode_mb_cabac")
7445 if(ret>=0) hl_decode_mb(h);
7447 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7450 if(ret>=0) ret = decode_mb_cabac(h);
7452 if(ret>=0) hl_decode_mb(h);
7455 eos = get_cabac_terminate( &h->cabac );
7457 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
7458 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7459 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7463 if( ++s->mb_x >= s->mb_width ) {
7465 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7472 if( eos || s->mb_y >= s->mb_height ) {
7473 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7474 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7481 int ret = decode_mb_cavlc(h);
7483 if(ret>=0) hl_decode_mb(h);
7485 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7487 ret = decode_mb_cavlc(h);
7489 if(ret>=0) hl_decode_mb(h);
7494 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7495 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7500 if(++s->mb_x >= s->mb_width){
7502 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7507 if(s->mb_y >= s->mb_height){
7508 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7510 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7511 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7515 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7522 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7523 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7524 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7525 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7529 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7538 for(;s->mb_y < s->mb_height; s->mb_y++){
7539 for(;s->mb_x < s->mb_width; s->mb_x++){
7540 int ret= decode_mb(h);
7545 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7546 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7551 if(++s->mb_x >= s->mb_width){
7553 if(++s->mb_y >= s->mb_height){
7554 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7555 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7559 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7566 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7567 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7568 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7572 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7579 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7582 return -1; //not reached
7585 static int decode_unregistered_user_data(H264Context *h, int size){
7586 MpegEncContext * const s = &h->s;
7587 uint8_t user_data[16+256];
7593 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7594 user_data[i]= get_bits(&s->gb, 8);
7598 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7599 if(e==1 && build>=0)
7600 h->x264_build= build;
7602 if(s->avctx->debug & FF_DEBUG_BUGS)
7603 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7606 skip_bits(&s->gb, 8);
7611 static int decode_sei(H264Context *h){
7612 MpegEncContext * const s = &h->s;
7614 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7619 type+= show_bits(&s->gb, 8);
7620 }while(get_bits(&s->gb, 8) == 255);
7624 size+= show_bits(&s->gb, 8);
7625 }while(get_bits(&s->gb, 8) == 255);
7629 if(decode_unregistered_user_data(h, size) < 0)
7633 skip_bits(&s->gb, 8*size);
7636 //FIXME check bits here
7637 align_get_bits(&s->gb);
7643 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7644 MpegEncContext * const s = &h->s;
7646 cpb_count = get_ue_golomb(&s->gb) + 1;
7647 get_bits(&s->gb, 4); /* bit_rate_scale */
7648 get_bits(&s->gb, 4); /* cpb_size_scale */
7649 for(i=0; i<cpb_count; i++){
7650 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7651 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7652 get_bits1(&s->gb); /* cbr_flag */
7654 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7655 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7656 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7657 get_bits(&s->gb, 5); /* time_offset_length */
7660 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7661 MpegEncContext * const s = &h->s;
7662 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7663 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7665 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7667 if( aspect_ratio_info_present_flag ) {
7668 aspect_ratio_idc= get_bits(&s->gb, 8);
7669 if( aspect_ratio_idc == EXTENDED_SAR ) {
7670 sps->sar.num= get_bits(&s->gb, 16);
7671 sps->sar.den= get_bits(&s->gb, 16);
7672 }else if(aspect_ratio_idc < 14){
7673 sps->sar= pixel_aspect[aspect_ratio_idc];
7675 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7682 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7684 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7685 get_bits1(&s->gb); /* overscan_appropriate_flag */
7688 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7689 get_bits(&s->gb, 3); /* video_format */
7690 get_bits1(&s->gb); /* video_full_range_flag */
7691 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7692 get_bits(&s->gb, 8); /* colour_primaries */
7693 get_bits(&s->gb, 8); /* transfer_characteristics */
7694 get_bits(&s->gb, 8); /* matrix_coefficients */
7698 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7699 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7700 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7703 sps->timing_info_present_flag = get_bits1(&s->gb);
7704 if(sps->timing_info_present_flag){
7705 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7706 sps->time_scale = get_bits_long(&s->gb, 32);
7707 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7710 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7711 if(nal_hrd_parameters_present_flag)
7712 decode_hrd_parameters(h, sps);
7713 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7714 if(vcl_hrd_parameters_present_flag)
7715 decode_hrd_parameters(h, sps);
7716 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7717 get_bits1(&s->gb); /* low_delay_hrd_flag */
7718 get_bits1(&s->gb); /* pic_struct_present_flag */
7720 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7721 if(sps->bitstream_restriction_flag){
7722 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7723 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7724 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7725 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7726 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7727 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7728 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7734 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7735 const uint8_t *jvt_list, const uint8_t *fallback_list){
7736 MpegEncContext * const s = &h->s;
7737 int i, last = 8, next = 8;
7738 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7739 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7740 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7742 for(i=0;i<size;i++){
7744 next = (last + get_se_golomb(&s->gb)) & 0xff;
7745 if(!i && !next){ /* matrix not written, we use the preset one */
7746 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7749 last = factors[scan[i]] = next ? next : last;
7753 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7754 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7755 MpegEncContext * const s = &h->s;
7756 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7757 const uint8_t *fallback[4] = {
7758 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7759 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7760 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7761 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7763 if(get_bits1(&s->gb)){
7764 sps->scaling_matrix_present |= is_sps;
7765 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7766 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7767 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7768 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7769 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7770 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7771 if(is_sps || pps->transform_8x8_mode){
7772 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7773 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7775 } else if(fallback_sps) {
7776 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7777 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7781 static inline int decode_seq_parameter_set(H264Context *h){
7782 MpegEncContext * const s = &h->s;
7783 int profile_idc, level_idc;
7787 profile_idc= get_bits(&s->gb, 8);
7788 get_bits1(&s->gb); //constraint_set0_flag
7789 get_bits1(&s->gb); //constraint_set1_flag
7790 get_bits1(&s->gb); //constraint_set2_flag
7791 get_bits1(&s->gb); //constraint_set3_flag
7792 get_bits(&s->gb, 4); // reserved
7793 level_idc= get_bits(&s->gb, 8);
7794 sps_id= get_ue_golomb(&s->gb);
7796 sps= &h->sps_buffer[ sps_id ];
7797 sps->profile_idc= profile_idc;
7798 sps->level_idc= level_idc;
7800 if(sps->profile_idc >= 100){ //high profile
7801 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7802 get_bits1(&s->gb); //residual_color_transform_flag
7803 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7804 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7805 sps->transform_bypass = get_bits1(&s->gb);
7806 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7808 sps->scaling_matrix_present = 0;
7810 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7811 sps->poc_type= get_ue_golomb(&s->gb);
7813 if(sps->poc_type == 0){ //FIXME #define
7814 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7815 } else if(sps->poc_type == 1){//FIXME #define
7816 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7817 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7818 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7819 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7821 for(i=0; i<sps->poc_cycle_length; i++)
7822 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7824 if(sps->poc_type > 2){
7825 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7829 sps->ref_frame_count= get_ue_golomb(&s->gb);
7830 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7831 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7833 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7834 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7835 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7836 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7837 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7840 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7841 if(!sps->frame_mbs_only_flag)
7842 sps->mb_aff= get_bits1(&s->gb);
7846 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7848 #ifndef ALLOW_INTERLACE
7850 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7852 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7853 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7855 sps->crop= get_bits1(&s->gb);
7857 sps->crop_left = get_ue_golomb(&s->gb);
7858 sps->crop_right = get_ue_golomb(&s->gb);
7859 sps->crop_top = get_ue_golomb(&s->gb);
7860 sps->crop_bottom= get_ue_golomb(&s->gb);
7861 if(sps->crop_left || sps->crop_top){
7862 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7868 sps->crop_bottom= 0;
7871 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7872 if( sps->vui_parameters_present_flag )
7873 decode_vui_parameters(h, sps);
7875 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7876 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7877 sps_id, sps->profile_idc, sps->level_idc,
7879 sps->ref_frame_count,
7880 sps->mb_width, sps->mb_height,
7881 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7882 sps->direct_8x8_inference_flag ? "8B8" : "",
7883 sps->crop_left, sps->crop_right,
7884 sps->crop_top, sps->crop_bottom,
7885 sps->vui_parameters_present_flag ? "VUI" : ""
7891 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7892 MpegEncContext * const s = &h->s;
7893 int pps_id= get_ue_golomb(&s->gb);
7894 PPS *pps= &h->pps_buffer[pps_id];
7896 pps->sps_id= get_ue_golomb(&s->gb);
7897 pps->cabac= get_bits1(&s->gb);
7898 pps->pic_order_present= get_bits1(&s->gb);
7899 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7900 if(pps->slice_group_count > 1 ){
7901 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7902 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7903 switch(pps->mb_slice_group_map_type){
7906 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7907 | run_length[ i ] |1 |ue(v) |
7912 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7914 | top_left_mb[ i ] |1 |ue(v) |
7915 | bottom_right_mb[ i ] |1 |ue(v) |
7923 | slice_group_change_direction_flag |1 |u(1) |
7924 | slice_group_change_rate_minus1 |1 |ue(v) |
7929 | slice_group_id_cnt_minus1 |1 |ue(v) |
7930 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7932 | slice_group_id[ i ] |1 |u(v) |
7937 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7938 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7939 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7940 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7944 pps->weighted_pred= get_bits1(&s->gb);
7945 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7946 pps->init_qp= get_se_golomb(&s->gb) + 26;
7947 pps->init_qs= get_se_golomb(&s->gb) + 26;
7948 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7949 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7950 pps->constrained_intra_pred= get_bits1(&s->gb);
7951 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7953 pps->transform_8x8_mode= 0;
7954 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7955 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7956 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7958 if(get_bits_count(&s->gb) < bit_length){
7959 pps->transform_8x8_mode= get_bits1(&s->gb);
7960 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7961 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7964 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7965 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7966 pps_id, pps->sps_id,
7967 pps->cabac ? "CABAC" : "CAVLC",
7968 pps->slice_group_count,
7969 pps->ref_count[0], pps->ref_count[1],
7970 pps->weighted_pred ? "weighted" : "",
7971 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7972 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7973 pps->constrained_intra_pred ? "CONSTR" : "",
7974 pps->redundant_pic_cnt_present ? "REDU" : "",
7975 pps->transform_8x8_mode ? "8x8DCT" : ""
7983 * finds the end of the current frame in the bitstream.
7984 * @return the position of the first byte of the next frame, or -1
7986 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7989 ParseContext *pc = &(h->s.parse_context);
7990 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7991 // mb_addr= pc->mb_addr - 1;
7993 for(i=0; i<=buf_size; i++){
7994 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7995 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7996 if(pc->frame_start_found){
7997 // If there isn't one more byte in the buffer
7998 // the test on first_mb_in_slice cannot be done yet
7999 // do it at next call.
8000 if (i >= buf_size) break;
8001 if (buf[i] & 0x80) {
8002 // first_mb_in_slice is 0, probably the first nal of a new
8004 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
8006 pc->frame_start_found= 0;
8010 pc->frame_start_found = 1;
8012 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
8013 if(pc->frame_start_found){
8015 pc->frame_start_found= 0;
8020 state= (state<<8) | buf[i];
8024 return END_NOT_FOUND;
8027 #ifdef CONFIG_H264_PARSER
8028 static int h264_parse(AVCodecParserContext *s,
8029 AVCodecContext *avctx,
8030 uint8_t **poutbuf, int *poutbuf_size,
8031 const uint8_t *buf, int buf_size)
8033 H264Context *h = s->priv_data;
8034 ParseContext *pc = &h->s.parse_context;
8037 next= find_frame_end(h, buf, buf_size);
8039 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
8045 *poutbuf = (uint8_t *)buf;
8046 *poutbuf_size = buf_size;
8050 static int h264_split(AVCodecContext *avctx,
8051 const uint8_t *buf, int buf_size)
8054 uint32_t state = -1;
8057 for(i=0; i<=buf_size; i++){
8058 if((state&0xFFFFFF1F) == 0x107)
8060 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
8062 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
8064 while(i>4 && buf[i-5]==0) i--;
8069 state= (state<<8) | buf[i];
8073 #endif /* CONFIG_H264_PARSER */
8075 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
8076 MpegEncContext * const s = &h->s;
8077 AVCodecContext * const avctx= s->avctx;
8081 for(i=0; i<50; i++){
8082 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
8086 s->current_picture_ptr= NULL;
8095 if(buf_index >= buf_size) break;
8097 for(i = 0; i < h->nal_length_size; i++)
8098 nalsize = (nalsize << 8) | buf[buf_index++];
8104 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
8109 // start code prefix search
8110 for(; buf_index + 3 < buf_size; buf_index++){
8111 // this should allways succeed in the first iteration
8112 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
8116 if(buf_index+3 >= buf_size) break;
8121 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
8122 while(ptr[dst_length - 1] == 0 && dst_length > 1)
8124 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
8126 if(s->avctx->debug&FF_DEBUG_STARTCODE){
8127 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
8130 if (h->is_avc && (nalsize != consumed))
8131 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
8133 buf_index += consumed;
8135 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
8136 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
8139 switch(h->nal_unit_type){
8141 idr(h); //FIXME ensure we don't loose some frames if there is reordering
8143 init_get_bits(&s->gb, ptr, bit_length);
8145 h->inter_gb_ptr= &s->gb;
8146 s->data_partitioning = 0;
8148 if(decode_slice_header(h) < 0){
8149 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8152 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
8153 if(h->redundant_pic_count==0 && s->hurry_up < 5
8154 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8155 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8156 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8157 && avctx->skip_frame < AVDISCARD_ALL)
8161 init_get_bits(&s->gb, ptr, bit_length);
8163 h->inter_gb_ptr= NULL;
8164 s->data_partitioning = 1;
8166 if(decode_slice_header(h) < 0){
8167 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8171 init_get_bits(&h->intra_gb, ptr, bit_length);
8172 h->intra_gb_ptr= &h->intra_gb;
8175 init_get_bits(&h->inter_gb, ptr, bit_length);
8176 h->inter_gb_ptr= &h->inter_gb;
8178 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
8180 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8181 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8182 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8183 && avctx->skip_frame < AVDISCARD_ALL)
8187 init_get_bits(&s->gb, ptr, bit_length);
8191 init_get_bits(&s->gb, ptr, bit_length);
8192 decode_seq_parameter_set(h);
8194 if(s->flags& CODEC_FLAG_LOW_DELAY)
8197 if(avctx->has_b_frames < 2)
8198 avctx->has_b_frames= !s->low_delay;
8201 init_get_bits(&s->gb, ptr, bit_length);
8203 decode_picture_parameter_set(h, bit_length);
8207 case NAL_END_SEQUENCE:
8208 case NAL_END_STREAM:
8209 case NAL_FILLER_DATA:
8211 case NAL_AUXILIARY_SLICE:
8214 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
8218 if(!s->current_picture_ptr) return buf_index; //no frame
8220 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
8221 s->current_picture_ptr->pict_type= s->pict_type;
8223 h->prev_frame_num_offset= h->frame_num_offset;
8224 h->prev_frame_num= h->frame_num;
8225 if(s->current_picture_ptr->reference){
8226 h->prev_poc_msb= h->poc_msb;
8227 h->prev_poc_lsb= h->poc_lsb;
8229 if(s->current_picture_ptr->reference)
8230 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
8240 * returns the number of bytes consumed for building the current frame
8242 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
8243 if(s->flags&CODEC_FLAG_TRUNCATED){
8244 pos -= s->parse_context.last_index;
8245 if(pos<0) pos=0; // FIXME remove (unneeded?)
8249 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
8250 if(pos+10>buf_size) pos=buf_size; // oops ;)
8256 static int decode_frame(AVCodecContext *avctx,
8257 void *data, int *data_size,
8258 uint8_t *buf, int buf_size)
8260 H264Context *h = avctx->priv_data;
8261 MpegEncContext *s = &h->s;
8262 AVFrame *pict = data;
8265 s->flags= avctx->flags;
8266 s->flags2= avctx->flags2;
8268 /* no supplementary picture */
8269 if (buf_size == 0) {
8273 if(s->flags&CODEC_FLAG_TRUNCATED){
8274 int next= find_frame_end(h, buf, buf_size);
8276 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
8278 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
8281 if(h->is_avc && !h->got_avcC) {
8282 int i, cnt, nalsize;
8283 unsigned char *p = avctx->extradata;
8284 if(avctx->extradata_size < 7) {
8285 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
8289 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
8292 /* sps and pps in the avcC always have length coded with 2 bytes,
8293 so put a fake nal_length_size = 2 while parsing them */
8294 h->nal_length_size = 2;
8295 // Decode sps from avcC
8296 cnt = *(p+5) & 0x1f; // Number of sps
8298 for (i = 0; i < cnt; i++) {
8299 nalsize = BE_16(p) + 2;
8300 if(decode_nal_units(h, p, nalsize) < 0) {
8301 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
8306 // Decode pps from avcC
8307 cnt = *(p++); // Number of pps
8308 for (i = 0; i < cnt; i++) {
8309 nalsize = BE_16(p) + 2;
8310 if(decode_nal_units(h, p, nalsize) != nalsize) {
8311 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
8316 // Now store right nal length size, that will be use to parse all other nals
8317 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
8318 // Do not reparse avcC
8322 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
8323 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
8327 buf_index=decode_nal_units(h, buf, buf_size);
8331 //FIXME do something with unavailable reference frames
8333 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
8334 if(!s->current_picture_ptr){
8335 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
8340 Picture *out = s->current_picture_ptr;
8341 #if 0 //decode order
8342 *data_size = sizeof(AVFrame);
8344 /* Sort B-frames into display order */
8345 Picture *cur = s->current_picture_ptr;
8346 Picture *prev = h->delayed_output_pic;
8347 int i, pics, cross_idr, out_of_order, out_idx;
8349 if(h->sps.bitstream_restriction_flag
8350 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
8351 s->avctx->has_b_frames = h->sps.num_reorder_frames;
8356 while(h->delayed_pic[pics]) pics++;
8357 h->delayed_pic[pics++] = cur;
8358 if(cur->reference == 0)
8362 for(i=0; h->delayed_pic[i]; i++)
8363 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
8366 out = h->delayed_pic[0];
8368 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8369 if(h->delayed_pic[i]->poc < out->poc){
8370 out = h->delayed_pic[i];
8374 out_of_order = !cross_idr && prev && out->poc < prev->poc;
8375 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
8377 else if(prev && pics <= s->avctx->has_b_frames)
8379 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8381 ((!cross_idr && prev && out->poc > prev->poc + 2)
8382 || cur->pict_type == B_TYPE)))
8385 s->avctx->has_b_frames++;
8388 else if(out_of_order)
8391 if(out_of_order || pics > s->avctx->has_b_frames){
8392 for(i=out_idx; h->delayed_pic[i]; i++)
8393 h->delayed_pic[i] = h->delayed_pic[i+1];
8399 *data_size = sizeof(AVFrame);
8400 if(prev && prev != out && prev->reference == 1)
8401 prev->reference = 0;
8402 h->delayed_output_pic = out;
8406 *pict= *(AVFrame*)out;
8408 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8411 assert(pict->data[0] || !*data_size);
8412 ff_print_debug_info(s, pict);
8413 //printf("out %d\n", (int)pict->data[0]);
8416 /* Return the Picture timestamp as the frame number */
8417 /* we substract 1 because it is added on utils.c */
8418 avctx->frame_number = s->picture_number - 1;
8420 return get_consumed_bytes(s, buf_index, buf_size);
8423 static inline void fill_mb_avail(H264Context *h){
8424 MpegEncContext * const s = &h->s;
8425 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8428 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8429 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8430 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8436 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8437 h->mb_avail[4]= 1; //FIXME move out
8438 h->mb_avail[5]= 0; //FIXME move out
8444 #define SIZE (COUNT*40)
8450 // int int_temp[10000];
8452 AVCodecContext avctx;
8454 dsputil_init(&dsp, &avctx);
8456 init_put_bits(&pb, temp, SIZE);
8457 printf("testing unsigned exp golomb\n");
8458 for(i=0; i<COUNT; i++){
8460 set_ue_golomb(&pb, i);
8461 STOP_TIMER("set_ue_golomb");
8463 flush_put_bits(&pb);
8465 init_get_bits(&gb, temp, 8*SIZE);
8466 for(i=0; i<COUNT; i++){
8469 s= show_bits(&gb, 24);
8472 j= get_ue_golomb(&gb);
8474 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8477 STOP_TIMER("get_ue_golomb");
8481 init_put_bits(&pb, temp, SIZE);
8482 printf("testing signed exp golomb\n");
8483 for(i=0; i<COUNT; i++){
8485 set_se_golomb(&pb, i - COUNT/2);
8486 STOP_TIMER("set_se_golomb");
8488 flush_put_bits(&pb);
8490 init_get_bits(&gb, temp, 8*SIZE);
8491 for(i=0; i<COUNT; i++){
8494 s= show_bits(&gb, 24);
8497 j= get_se_golomb(&gb);
8498 if(j != i - COUNT/2){
8499 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8502 STOP_TIMER("get_se_golomb");
8505 printf("testing 4x4 (I)DCT\n");
8508 uint8_t src[16], ref[16];
8509 uint64_t error= 0, max_error=0;
8511 for(i=0; i<COUNT; i++){
8513 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8514 for(j=0; j<16; j++){
8515 ref[j]= random()%255;
8516 src[j]= random()%255;
8519 h264_diff_dct_c(block, src, ref, 4);
8522 for(j=0; j<16; j++){
8523 // printf("%d ", block[j]);
8524 block[j]= block[j]*4;
8525 if(j&1) block[j]= (block[j]*4 + 2)/5;
8526 if(j&4) block[j]= (block[j]*4 + 2)/5;
8530 s->dsp.h264_idct_add(ref, block, 4);
8531 /* for(j=0; j<16; j++){
8532 printf("%d ", ref[j]);
8536 for(j=0; j<16; j++){
8537 int diff= FFABS(src[j] - ref[j]);
8540 max_error= FFMAX(max_error, diff);
8543 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8545 printf("testing quantizer\n");
8546 for(qp=0; qp<52; qp++){
8548 src1_block[i]= src2_block[i]= random()%255;
8552 printf("Testing NAL layer\n");
8554 uint8_t bitstream[COUNT];
8555 uint8_t nal[COUNT*2];
8557 memset(&h, 0, sizeof(H264Context));
8559 for(i=0; i<COUNT; i++){
8567 for(j=0; j<COUNT; j++){
8568 bitstream[j]= (random() % 255) + 1;
8571 for(j=0; j<zeros; j++){
8572 int pos= random() % COUNT;
8573 while(bitstream[pos] == 0){
8582 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8584 printf("encoding failed\n");
8588 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8592 if(out_length != COUNT){
8593 printf("incorrect length %d %d\n", out_length, COUNT);
8597 if(consumed != nal_length){
8598 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8602 if(memcmp(bitstream, out, COUNT)){
8603 printf("missmatch\n");
8608 printf("Testing RBSP\n");
8616 static int decode_end(AVCodecContext *avctx)
8618 H264Context *h = avctx->priv_data;
8619 MpegEncContext *s = &h->s;
8621 av_freep(&h->rbsp_buffer);
8622 free_tables(h); //FIXME cleanup init stuff perhaps
8625 // memset(h, 0, sizeof(H264Context));
8631 AVCodec h264_decoder = {
8635 sizeof(H264Context),
8640 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8644 #ifdef CONFIG_H264_PARSER
8645 AVCodecParser h264_parser = {
8647 sizeof(H264Context),