2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * H.264 / AVC / MPEG4 part10 codec.
26 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
41 #define interlaced_dct interlaced_dct_is_a_bad_name
42 #define mb_intra mb_intra_isnt_initalized_see_mb_type
44 #define LUMA_DC_BLOCK_INDEX 25
45 #define CHROMA_DC_BLOCK_INDEX 26
47 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
48 #define COEFF_TOKEN_VLC_BITS 8
49 #define TOTAL_ZEROS_VLC_BITS 9
50 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
51 #define RUN_VLC_BITS 3
52 #define RUN7_VLC_BITS 6
54 #define MAX_SPS_COUNT 32
55 #define MAX_PPS_COUNT 256
57 #define MAX_MMCO_COUNT 66
59 /* Compiling in interlaced support reduces the speed
60 * of progressive decoding by about 2%. */
61 #define ALLOW_INTERLACE
63 #ifdef ALLOW_INTERLACE
64 #define MB_MBAFF h->mb_mbaff
65 #define MB_FIELD h->mb_field_decoding_flag
66 #define FRAME_MBAFF h->mb_aff_frame
72 #define IS_INTERLACED(mb_type) 0
76 * Sequence parameter set
82 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
83 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
84 int poc_type; ///< pic_order_cnt_type
85 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
86 int delta_pic_order_always_zero_flag;
87 int offset_for_non_ref_pic;
88 int offset_for_top_to_bottom_field;
89 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
90 int ref_frame_count; ///< num_ref_frames
91 int gaps_in_frame_num_allowed_flag;
92 int mb_width; ///< frame_width_in_mbs_minus1 + 1
93 int mb_height; ///< frame_height_in_mbs_minus1 + 1
94 int frame_mbs_only_flag;
95 int mb_aff; ///<mb_adaptive_frame_field_flag
96 int direct_8x8_inference_flag;
97 int crop; ///< frame_cropping_flag
98 int crop_left; ///< frame_cropping_rect_left_offset
99 int crop_right; ///< frame_cropping_rect_right_offset
100 int crop_top; ///< frame_cropping_rect_top_offset
101 int crop_bottom; ///< frame_cropping_rect_bottom_offset
102 int vui_parameters_present_flag;
104 int timing_info_present_flag;
105 uint32_t num_units_in_tick;
107 int fixed_frame_rate_flag;
108 short offset_for_ref_frame[256]; //FIXME dyn aloc?
109 int bitstream_restriction_flag;
110 int num_reorder_frames;
111 int scaling_matrix_present;
112 uint8_t scaling_matrix4[6][16];
113 uint8_t scaling_matrix8[2][64];
117 * Picture parameter set
121 int cabac; ///< entropy_coding_mode_flag
122 int pic_order_present; ///< pic_order_present_flag
123 int slice_group_count; ///< num_slice_groups_minus1 + 1
124 int mb_slice_group_map_type;
125 unsigned int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
126 int weighted_pred; ///< weighted_pred_flag
127 int weighted_bipred_idc;
128 int init_qp; ///< pic_init_qp_minus26 + 26
129 int init_qs; ///< pic_init_qs_minus26 + 26
130 int chroma_qp_index_offset;
131 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
132 int constrained_intra_pred; ///< constrained_intra_pred_flag
133 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
134 int transform_8x8_mode; ///< transform_8x8_mode_flag
135 uint8_t scaling_matrix4[6][16];
136 uint8_t scaling_matrix8[2][64];
140 * Memory management control operation opcode.
142 typedef enum MMCOOpcode{
153 * Memory management control operation.
164 typedef struct H264Context{
168 uint8_t *rbsp_buffer;
169 unsigned int rbsp_buffer_size;
172 * Used to parse AVC variant of h264
174 int is_avc; ///< this flag is != 0 if codec is avc1
175 int got_avcC; ///< flag used to parse avcC data only once
176 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
184 int chroma_pred_mode;
185 int intra16x16_pred_mode;
190 int8_t intra4x4_pred_mode_cache[5*8];
191 int8_t (*intra4x4_pred_mode)[8];
192 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
193 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
194 void (*pred8x8 [4+3])(uint8_t *src, int stride);
195 void (*pred16x16[4+3])(uint8_t *src, int stride);
196 unsigned int topleft_samples_available;
197 unsigned int top_samples_available;
198 unsigned int topright_samples_available;
199 unsigned int left_samples_available;
200 uint8_t (*top_borders[2])[16+2*8];
201 uint8_t left_border[2*(17+2*9)];
204 * non zero coeff count cache.
205 * is 64 if not available.
207 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
208 uint8_t (*non_zero_count)[16];
211 * Motion vector cache.
213 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
214 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
215 #define LIST_NOT_USED -1 //FIXME rename?
216 #define PART_NOT_AVAILABLE -2
219 * is 1 if the specific list MV&references are set to 0,0,-2.
221 int mv_cache_clean[2];
224 * number of neighbors (top and/or left) that used 8x8 dct
226 int neighbor_transform_size;
229 * block_offset[ 0..23] for frame macroblocks
230 * block_offset[24..47] for field macroblocks
232 int block_offset[2*(16+8)];
234 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
236 int b_stride; //FIXME use s->b4_stride
239 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff
248 int unknown_svq3_flag;
249 int next_slice_index;
251 SPS sps_buffer[MAX_SPS_COUNT];
252 SPS sps; ///< current sps
254 PPS pps_buffer[MAX_PPS_COUNT];
258 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
260 uint32_t dequant4_buffer[6][52][16];
261 uint32_t dequant8_buffer[2][52][64];
262 uint32_t (*dequant4_coeff[6])[16];
263 uint32_t (*dequant8_coeff[2])[64];
264 int dequant_coeff_pps; ///< reinit tables when pps changes
267 uint8_t *slice_table_base;
268 uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
270 int slice_type_fixed;
272 //interlacing specific flags
274 int mb_field_decoding_flag;
275 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
277 unsigned int sub_mb_type[4];
282 int delta_poc_bottom;
285 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
286 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
287 int frame_num_offset; ///< for POC type 2
288 int prev_frame_num_offset; ///< for POC type 2
289 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
292 * frame_num for frames or 2*frame_num for field pics.
297 * max_frame_num or 2*max_frame_num for field pics.
301 //Weighted pred stuff
303 int use_weight_chroma;
304 int luma_log2_weight_denom;
305 int chroma_log2_weight_denom;
306 int luma_weight[2][48];
307 int luma_offset[2][48];
308 int chroma_weight[2][48][2];
309 int chroma_offset[2][48][2];
310 int implicit_weight[48][48];
313 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
314 int slice_alpha_c0_offset;
315 int slice_beta_offset;
317 int redundant_pic_count;
319 int direct_spatial_mv_pred;
320 int dist_scale_factor[16];
321 int dist_scale_factor_field[32];
322 int map_col_to_list0[2][16];
323 int map_col_to_list0_field[2][32];
326 * num_ref_idx_l0/1_active_minus1 + 1
328 unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode
329 unsigned int list_count;
330 Picture *short_ref[32];
331 Picture *long_ref[32];
332 Picture default_ref_list[2][32];
333 Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs
334 Picture *delayed_pic[18]; //FIXME size?
335 Picture *delayed_output_pic;
338 * memory management control operations buffer.
340 MMCO mmco[MAX_MMCO_COUNT];
343 int long_ref_count; ///< number of actual long term references
344 int short_ref_count; ///< number of actual short term references
347 GetBitContext intra_gb;
348 GetBitContext inter_gb;
349 GetBitContext *intra_gb_ptr;
350 GetBitContext *inter_gb_ptr;
352 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
353 DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not to large or ensure that there is some unused stuff after mb
359 uint8_t cabac_state[460];
362 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
367 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
368 uint8_t *chroma_pred_mode_table;
369 int last_qscale_diff;
370 int16_t (*mvd_table[2])[2];
371 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
372 uint8_t *direct_table;
373 uint8_t direct_cache[5*8];
375 uint8_t zigzag_scan[16];
376 uint8_t zigzag_scan8x8[64];
377 uint8_t zigzag_scan8x8_cavlc[64];
378 uint8_t field_scan[16];
379 uint8_t field_scan8x8[64];
380 uint8_t field_scan8x8_cavlc[64];
381 const uint8_t *zigzag_scan_q0;
382 const uint8_t *zigzag_scan8x8_q0;
383 const uint8_t *zigzag_scan8x8_cavlc_q0;
384 const uint8_t *field_scan_q0;
385 const uint8_t *field_scan8x8_q0;
386 const uint8_t *field_scan8x8_cavlc_q0;
391 static VLC coeff_token_vlc[4];
392 static VLC chroma_dc_coeff_token_vlc;
394 static VLC total_zeros_vlc[15];
395 static VLC chroma_dc_total_zeros_vlc[3];
397 static VLC run_vlc[6];
400 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
401 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
402 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
403 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
405 static av_always_inline uint32_t pack16to32(int a, int b){
406 #ifdef WORDS_BIGENDIAN
407 return (b&0xFFFF) + (a<<16);
409 return (a&0xFFFF) + (b<<16);
413 const uint8_t ff_rem6[52]={
414 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
417 const uint8_t ff_div6[52]={
418 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
424 * @param h height of the rectangle, should be a constant
425 * @param w width of the rectangle, should be a constant
426 * @param size the size of val (1 or 4), should be a constant
428 static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
429 uint8_t *p= (uint8_t*)vp;
430 assert(size==1 || size==4);
436 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
437 assert((stride&(w-1))==0);
439 const uint16_t v= size==4 ? val : val*0x0101;
440 *(uint16_t*)(p + 0*stride)= v;
442 *(uint16_t*)(p + 1*stride)= v;
444 *(uint16_t*)(p + 2*stride)=
445 *(uint16_t*)(p + 3*stride)= v;
447 const uint32_t v= size==4 ? val : val*0x01010101;
448 *(uint32_t*)(p + 0*stride)= v;
450 *(uint32_t*)(p + 1*stride)= v;
452 *(uint32_t*)(p + 2*stride)=
453 *(uint32_t*)(p + 3*stride)= v;
455 //gcc can't optimize 64bit math on x86_32
456 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
457 const uint64_t v= val*0x0100000001ULL;
458 *(uint64_t*)(p + 0*stride)= v;
460 *(uint64_t*)(p + 1*stride)= v;
462 *(uint64_t*)(p + 2*stride)=
463 *(uint64_t*)(p + 3*stride)= v;
465 const uint64_t v= val*0x0100000001ULL;
466 *(uint64_t*)(p + 0+0*stride)=
467 *(uint64_t*)(p + 8+0*stride)=
468 *(uint64_t*)(p + 0+1*stride)=
469 *(uint64_t*)(p + 8+1*stride)= v;
471 *(uint64_t*)(p + 0+2*stride)=
472 *(uint64_t*)(p + 8+2*stride)=
473 *(uint64_t*)(p + 0+3*stride)=
474 *(uint64_t*)(p + 8+3*stride)= v;
476 *(uint32_t*)(p + 0+0*stride)=
477 *(uint32_t*)(p + 4+0*stride)= val;
479 *(uint32_t*)(p + 0+1*stride)=
480 *(uint32_t*)(p + 4+1*stride)= val;
482 *(uint32_t*)(p + 0+2*stride)=
483 *(uint32_t*)(p + 4+2*stride)=
484 *(uint32_t*)(p + 0+3*stride)=
485 *(uint32_t*)(p + 4+3*stride)= val;
487 *(uint32_t*)(p + 0+0*stride)=
488 *(uint32_t*)(p + 4+0*stride)=
489 *(uint32_t*)(p + 8+0*stride)=
490 *(uint32_t*)(p +12+0*stride)=
491 *(uint32_t*)(p + 0+1*stride)=
492 *(uint32_t*)(p + 4+1*stride)=
493 *(uint32_t*)(p + 8+1*stride)=
494 *(uint32_t*)(p +12+1*stride)= val;
496 *(uint32_t*)(p + 0+2*stride)=
497 *(uint32_t*)(p + 4+2*stride)=
498 *(uint32_t*)(p + 8+2*stride)=
499 *(uint32_t*)(p +12+2*stride)=
500 *(uint32_t*)(p + 0+3*stride)=
501 *(uint32_t*)(p + 4+3*stride)=
502 *(uint32_t*)(p + 8+3*stride)=
503 *(uint32_t*)(p +12+3*stride)= val;
510 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
511 MpegEncContext * const s = &h->s;
512 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
513 int topleft_xy, top_xy, topright_xy, left_xy[2];
514 int topleft_type, top_type, topright_type, left_type[2];
518 //FIXME deblocking could skip the intra and nnz parts.
519 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
522 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
524 top_xy = mb_xy - s->mb_stride;
525 topleft_xy = top_xy - 1;
526 topright_xy= top_xy + 1;
527 left_xy[1] = left_xy[0] = mb_xy-1;
537 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
538 const int top_pair_xy = pair_xy - s->mb_stride;
539 const int topleft_pair_xy = top_pair_xy - 1;
540 const int topright_pair_xy = top_pair_xy + 1;
541 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
542 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
543 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
544 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
545 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
546 const int bottom = (s->mb_y & 1);
547 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
549 ? !curr_mb_frame_flag // bottom macroblock
550 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
552 top_xy -= s->mb_stride;
555 ? !curr_mb_frame_flag // bottom macroblock
556 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
558 topleft_xy -= s->mb_stride;
561 ? !curr_mb_frame_flag // bottom macroblock
562 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
564 topright_xy -= s->mb_stride;
566 if (left_mb_frame_flag != curr_mb_frame_flag) {
567 left_xy[1] = left_xy[0] = pair_xy - 1;
568 if (curr_mb_frame_flag) {
589 left_xy[1] += s->mb_stride;
602 h->top_mb_xy = top_xy;
603 h->left_mb_xy[0] = left_xy[0];
604 h->left_mb_xy[1] = left_xy[1];
608 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
609 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
610 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
612 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
614 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
616 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
617 for(list=0; list<h->list_count; list++){
618 if(USES_LIST(mb_type,list)){
619 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
620 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
621 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
622 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
628 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
629 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
631 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
632 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
634 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
635 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
640 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
641 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
642 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
643 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
644 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
647 if(IS_INTRA(mb_type)){
648 h->topleft_samples_available=
649 h->top_samples_available=
650 h->left_samples_available= 0xFFFF;
651 h->topright_samples_available= 0xEEEA;
653 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
654 h->topleft_samples_available= 0xB3FF;
655 h->top_samples_available= 0x33FF;
656 h->topright_samples_available= 0x26EA;
659 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
660 h->topleft_samples_available&= 0xDF5F;
661 h->left_samples_available&= 0x5F5F;
665 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
666 h->topleft_samples_available&= 0x7FFF;
668 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
669 h->topright_samples_available&= 0xFBFF;
671 if(IS_INTRA4x4(mb_type)){
672 if(IS_INTRA4x4(top_type)){
673 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
674 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
675 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
676 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
679 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
684 h->intra4x4_pred_mode_cache[4+8*0]=
685 h->intra4x4_pred_mode_cache[5+8*0]=
686 h->intra4x4_pred_mode_cache[6+8*0]=
687 h->intra4x4_pred_mode_cache[7+8*0]= pred;
690 if(IS_INTRA4x4(left_type[i])){
691 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
692 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
695 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
700 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
701 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
716 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
718 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
719 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
720 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
721 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
723 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
724 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
726 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
727 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
730 h->non_zero_count_cache[4+8*0]=
731 h->non_zero_count_cache[5+8*0]=
732 h->non_zero_count_cache[6+8*0]=
733 h->non_zero_count_cache[7+8*0]=
735 h->non_zero_count_cache[1+8*0]=
736 h->non_zero_count_cache[2+8*0]=
738 h->non_zero_count_cache[1+8*3]=
739 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
743 for (i=0; i<2; i++) {
745 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
746 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
747 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
748 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
750 h->non_zero_count_cache[3+8*1 + 2*8*i]=
751 h->non_zero_count_cache[3+8*2 + 2*8*i]=
752 h->non_zero_count_cache[0+8*1 + 8*i]=
753 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
760 h->top_cbp = h->cbp_table[top_xy];
761 } else if(IS_INTRA(mb_type)) {
768 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
769 } else if(IS_INTRA(mb_type)) {
775 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
778 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
783 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
785 for(list=0; list<h->list_count; list++){
786 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
787 /*if(!h->mv_cache_clean[list]){
788 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
789 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
790 h->mv_cache_clean[list]= 1;
794 h->mv_cache_clean[list]= 0;
796 if(USES_LIST(top_type, list)){
797 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
798 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
799 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
800 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
801 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
802 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
803 h->ref_cache[list][scan8[0] + 0 - 1*8]=
804 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
805 h->ref_cache[list][scan8[0] + 2 - 1*8]=
806 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
808 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
809 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
810 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
811 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
812 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
816 int cache_idx = scan8[0] - 1 + i*2*8;
817 if(USES_LIST(left_type[i], list)){
818 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
819 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
820 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
821 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
822 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
823 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
825 *(uint32_t*)h->mv_cache [list][cache_idx ]=
826 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
827 h->ref_cache[list][cache_idx ]=
828 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
832 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
835 if(USES_LIST(topleft_type, list)){
836 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
837 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
838 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
839 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
841 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
842 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
845 if(USES_LIST(topright_type, list)){
846 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
847 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
848 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
849 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
851 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
852 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
855 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
858 h->ref_cache[list][scan8[5 ]+1] =
859 h->ref_cache[list][scan8[7 ]+1] =
860 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
861 h->ref_cache[list][scan8[4 ]] =
862 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
863 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
864 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
865 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
866 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
867 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
870 /* XXX beurk, Load mvd */
871 if(USES_LIST(top_type, list)){
872 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
873 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
874 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
875 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
876 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
878 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
879 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
880 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
881 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
883 if(USES_LIST(left_type[0], list)){
884 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
885 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
886 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
888 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
889 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
891 if(USES_LIST(left_type[1], list)){
892 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
893 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
894 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
896 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
897 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
899 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
900 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
901 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
902 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
903 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
905 if(h->slice_type == B_TYPE){
906 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
908 if(IS_DIRECT(top_type)){
909 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
910 }else if(IS_8X8(top_type)){
911 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
912 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
913 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
915 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
918 if(IS_DIRECT(left_type[0]))
919 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
920 else if(IS_8X8(left_type[0]))
921 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
923 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
925 if(IS_DIRECT(left_type[1]))
926 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
927 else if(IS_8X8(left_type[1]))
928 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
930 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
936 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
937 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
938 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
939 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
940 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
941 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
942 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
943 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
944 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
945 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
947 #define MAP_F2F(idx, mb_type)\
948 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
949 h->ref_cache[list][idx] <<= 1;\
950 h->mv_cache[list][idx][1] /= 2;\
951 h->mvd_cache[list][idx][1] /= 2;\
956 #define MAP_F2F(idx, mb_type)\
957 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
958 h->ref_cache[list][idx] >>= 1;\
959 h->mv_cache[list][idx][1] <<= 1;\
960 h->mvd_cache[list][idx][1] <<= 1;\
970 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
973 static inline void write_back_intra_pred_mode(H264Context *h){
974 MpegEncContext * const s = &h->s;
975 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
977 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
978 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
979 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
980 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
981 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
982 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
983 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
987 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
989 static inline int check_intra4x4_pred_mode(H264Context *h){
990 MpegEncContext * const s = &h->s;
991 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
992 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
995 if(!(h->top_samples_available&0x8000)){
997 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
999 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1002 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
1007 if(!(h->left_samples_available&0x8000)){
1009 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
1011 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1014 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
1020 } //FIXME cleanup like next
1023 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1025 static inline int check_intra_pred_mode(H264Context *h, int mode){
1026 MpegEncContext * const s = &h->s;
1027 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
1028 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
1031 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
1035 if(!(h->top_samples_available&0x8000)){
1038 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1043 if(!(h->left_samples_available&0x8000)){
1046 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1055 * gets the predicted intra4x4 prediction mode.
1057 static inline int pred_intra_mode(H264Context *h, int n){
1058 const int index8= scan8[n];
1059 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
1060 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
1061 const int min= FFMIN(left, top);
1063 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
1065 if(min<0) return DC_PRED;
1069 static inline void write_back_non_zero_count(H264Context *h){
1070 MpegEncContext * const s = &h->s;
1071 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1073 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
1074 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
1075 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
1076 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
1077 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
1078 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
1079 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
1081 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
1082 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
1083 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
1085 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1086 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1087 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1090 // store all luma nnzs, for deblocking
1093 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
1094 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
1099 * gets the predicted number of non zero coefficients.
1100 * @param n block index
1102 static inline int pred_non_zero_count(H264Context *h, int n){
1103 const int index8= scan8[n];
1104 const int left= h->non_zero_count_cache[index8 - 1];
1105 const int top = h->non_zero_count_cache[index8 - 8];
1108 if(i<64) i= (i+1)>>1;
1110 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1115 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1116 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1117 MpegEncContext *s = &h->s;
1119 /* there is no consistent mapping of mvs to neighboring locations that will
1120 * make mbaff happy, so we can't move all this logic to fill_caches */
1122 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
1124 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
1125 *C = h->mv_cache[list][scan8[0]-2];
1128 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
1129 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
1130 if(IS_INTERLACED(mb_types[topright_xy])){
1131 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
1132 const int x4 = X4, y4 = Y4;\
1133 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
1134 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
1135 return LIST_NOT_USED;\
1136 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
1137 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
1138 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
1139 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
1141 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
1144 if(topright_ref == PART_NOT_AVAILABLE
1145 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
1146 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
1148 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
1149 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
1152 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
1153 && i >= scan8[0]+8){
1154 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
1155 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
1161 if(topright_ref != PART_NOT_AVAILABLE){
1162 *C= h->mv_cache[list][ i - 8 + part_width ];
1163 return topright_ref;
1165 tprintf(s->avctx, "topright MV not available\n");
1167 *C= h->mv_cache[list][ i - 8 - 1 ];
1168 return h->ref_cache[list][ i - 8 - 1 ];
1173 * gets the predicted MV.
1174 * @param n the block index
1175 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1176 * @param mx the x component of the predicted motion vector
1177 * @param my the y component of the predicted motion vector
1179 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1180 const int index8= scan8[n];
1181 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1182 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1183 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1184 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1186 int diagonal_ref, match_count;
1188 assert(part_width==1 || part_width==2 || part_width==4);
1198 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1199 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1200 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
1201 if(match_count > 1){ //most common
1202 *mx= mid_pred(A[0], B[0], C[0]);
1203 *my= mid_pred(A[1], B[1], C[1]);
1204 }else if(match_count==1){
1208 }else if(top_ref==ref){
1216 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1220 *mx= mid_pred(A[0], B[0], C[0]);
1221 *my= mid_pred(A[1], B[1], C[1]);
1225 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1229 * gets the directionally predicted 16x8 MV.
1230 * @param n the block index
1231 * @param mx the x component of the predicted motion vector
1232 * @param my the y component of the predicted motion vector
1234 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1236 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1237 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1239 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1247 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1248 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1250 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1252 if(left_ref == ref){
1260 pred_motion(h, n, 4, list, ref, mx, my);
1264 * gets the directionally predicted 8x16 MV.
1265 * @param n the block index
1266 * @param mx the x component of the predicted motion vector
1267 * @param my the y component of the predicted motion vector
1269 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1271 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1272 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1274 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1276 if(left_ref == ref){
1285 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1287 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1289 if(diagonal_ref == ref){
1297 pred_motion(h, n, 2, list, ref, mx, my);
1300 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1301 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1302 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1304 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1306 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1307 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1308 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1314 pred_motion(h, 0, 4, 0, 0, mx, my);
1319 static inline void direct_dist_scale_factor(H264Context * const h){
1320 const int poc = h->s.current_picture_ptr->poc;
1321 const int poc1 = h->ref_list[1][0].poc;
1323 for(i=0; i<h->ref_count[0]; i++){
1324 int poc0 = h->ref_list[0][i].poc;
1325 int td = av_clip(poc1 - poc0, -128, 127);
1326 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1327 h->dist_scale_factor[i] = 256;
1329 int tb = av_clip(poc - poc0, -128, 127);
1330 int tx = (16384 + (FFABS(td) >> 1)) / td;
1331 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
1335 for(i=0; i<h->ref_count[0]; i++){
1336 h->dist_scale_factor_field[2*i] =
1337 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
1341 static inline void direct_ref_list_init(H264Context * const h){
1342 MpegEncContext * const s = &h->s;
1343 Picture * const ref1 = &h->ref_list[1][0];
1344 Picture * const cur = s->current_picture_ptr;
1346 if(cur->pict_type == I_TYPE)
1347 cur->ref_count[0] = 0;
1348 if(cur->pict_type != B_TYPE)
1349 cur->ref_count[1] = 0;
1350 for(list=0; list<2; list++){
1351 cur->ref_count[list] = h->ref_count[list];
1352 for(j=0; j<h->ref_count[list]; j++)
1353 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1355 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1357 for(list=0; list<2; list++){
1358 for(i=0; i<ref1->ref_count[list]; i++){
1359 const int poc = ref1->ref_poc[list][i];
1360 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1361 for(j=0; j<h->ref_count[list]; j++)
1362 if(h->ref_list[list][j].poc == poc){
1363 h->map_col_to_list0[list][i] = j;
1369 for(list=0; list<2; list++){
1370 for(i=0; i<ref1->ref_count[list]; i++){
1371 j = h->map_col_to_list0[list][i];
1372 h->map_col_to_list0_field[list][2*i] = 2*j;
1373 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1379 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1380 MpegEncContext * const s = &h->s;
1381 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1382 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1383 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1384 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1385 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1386 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1387 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1388 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1389 const int is_b8x8 = IS_8X8(*mb_type);
1390 unsigned int sub_mb_type;
1393 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1394 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1395 /* FIXME save sub mb types from previous frames (or derive from MVs)
1396 * so we know exactly what block size to use */
1397 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1398 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1399 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1400 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1401 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1403 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1404 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1407 *mb_type |= MB_TYPE_DIRECT2;
1409 *mb_type |= MB_TYPE_INTERLACED;
1411 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1413 if(h->direct_spatial_mv_pred){
1418 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1420 /* ref = min(neighbors) */
1421 for(list=0; list<2; list++){
1422 int refa = h->ref_cache[list][scan8[0] - 1];
1423 int refb = h->ref_cache[list][scan8[0] - 8];
1424 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1426 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1428 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1430 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1436 if(ref[0] < 0 && ref[1] < 0){
1437 ref[0] = ref[1] = 0;
1438 mv[0][0] = mv[0][1] =
1439 mv[1][0] = mv[1][1] = 0;
1441 for(list=0; list<2; list++){
1443 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1445 mv[list][0] = mv[list][1] = 0;
1450 *mb_type &= ~MB_TYPE_P0L1;
1451 sub_mb_type &= ~MB_TYPE_P0L1;
1452 }else if(ref[0] < 0){
1453 *mb_type &= ~MB_TYPE_P0L0;
1454 sub_mb_type &= ~MB_TYPE_P0L0;
1457 if(IS_16X16(*mb_type)){
1460 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1461 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1462 if(!IS_INTRA(mb_type_col)
1463 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1464 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1465 && (h->x264_build>33 || !h->x264_build)))){
1467 a= pack16to32(mv[0][0],mv[0][1]);
1469 b= pack16to32(mv[1][0],mv[1][1]);
1471 a= pack16to32(mv[0][0],mv[0][1]);
1472 b= pack16to32(mv[1][0],mv[1][1]);
1474 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1475 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1477 for(i8=0; i8<4; i8++){
1478 const int x8 = i8&1;
1479 const int y8 = i8>>1;
1481 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1483 h->sub_mb_type[i8] = sub_mb_type;
1485 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1486 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1487 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1488 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1491 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1492 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1493 && (h->x264_build>33 || !h->x264_build)))){
1494 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1495 if(IS_SUB_8X8(sub_mb_type)){
1496 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1497 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1499 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1501 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1504 for(i4=0; i4<4; i4++){
1505 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1506 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1508 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1510 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1516 }else{ /* direct temporal mv pred */
1517 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1518 const int *dist_scale_factor = h->dist_scale_factor;
1521 if(IS_INTERLACED(*mb_type)){
1522 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1523 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1524 dist_scale_factor = h->dist_scale_factor_field;
1526 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1527 /* FIXME assumes direct_8x8_inference == 1 */
1528 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1529 int mb_types_col[2];
1532 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1533 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1534 | (*mb_type & MB_TYPE_INTERLACED);
1535 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1537 if(IS_INTERLACED(*mb_type)){
1538 /* frame to field scaling */
1539 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1540 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1542 l1ref0 -= 2*h->b8_stride;
1543 l1ref1 -= 2*h->b8_stride;
1544 l1mv0 -= 4*h->b_stride;
1545 l1mv1 -= 4*h->b_stride;
1549 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1550 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1552 *mb_type |= MB_TYPE_16x8;
1554 *mb_type |= MB_TYPE_8x8;
1556 /* field to frame scaling */
1557 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1558 * but in MBAFF, top and bottom POC are equal */
1559 int dy = (s->mb_y&1) ? 1 : 2;
1561 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1562 l1ref0 += dy*h->b8_stride;
1563 l1ref1 += dy*h->b8_stride;
1564 l1mv0 += 2*dy*h->b_stride;
1565 l1mv1 += 2*dy*h->b_stride;
1568 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1570 *mb_type |= MB_TYPE_16x16;
1572 *mb_type |= MB_TYPE_8x8;
1575 for(i8=0; i8<4; i8++){
1576 const int x8 = i8&1;
1577 const int y8 = i8>>1;
1579 const int16_t (*l1mv)[2]= l1mv0;
1581 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1583 h->sub_mb_type[i8] = sub_mb_type;
1585 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1586 if(IS_INTRA(mb_types_col[y8])){
1587 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1588 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1589 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1593 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1595 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1597 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1600 scale = dist_scale_factor[ref0];
1601 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1604 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1605 int my_col = (mv_col[1]<<y_shift)/2;
1606 int mx = (scale * mv_col[0] + 128) >> 8;
1607 int my = (scale * my_col + 128) >> 8;
1608 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1609 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1616 /* one-to-one mv scaling */
1618 if(IS_16X16(*mb_type)){
1621 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1622 if(IS_INTRA(mb_type_col)){
1625 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1626 : map_col_to_list0[1][l1ref1[0]];
1627 const int scale = dist_scale_factor[ref0];
1628 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1630 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1631 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1633 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1634 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1636 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1637 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1638 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1640 for(i8=0; i8<4; i8++){
1641 const int x8 = i8&1;
1642 const int y8 = i8>>1;
1644 const int16_t (*l1mv)[2]= l1mv0;
1646 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1648 h->sub_mb_type[i8] = sub_mb_type;
1649 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1650 if(IS_INTRA(mb_type_col)){
1651 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1652 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1653 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1657 ref0 = l1ref0[x8 + y8*h->b8_stride];
1659 ref0 = map_col_to_list0[0][ref0];
1661 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1664 scale = dist_scale_factor[ref0];
1666 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1667 if(IS_SUB_8X8(sub_mb_type)){
1668 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1669 int mx = (scale * mv_col[0] + 128) >> 8;
1670 int my = (scale * mv_col[1] + 128) >> 8;
1671 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1672 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1674 for(i4=0; i4<4; i4++){
1675 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1676 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1677 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1678 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1679 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1680 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1687 static inline void write_back_motion(H264Context *h, int mb_type){
1688 MpegEncContext * const s = &h->s;
1689 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1690 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1693 if(!USES_LIST(mb_type, 0))
1694 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1696 for(list=0; list<h->list_count; list++){
1698 if(!USES_LIST(mb_type, list))
1702 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1703 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1705 if( h->pps.cabac ) {
1706 if(IS_SKIP(mb_type))
1707 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1710 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1711 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1716 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1717 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1718 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1719 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1720 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1724 if(h->slice_type == B_TYPE && h->pps.cabac){
1725 if(IS_8X8(mb_type)){
1726 uint8_t *direct_table = &h->direct_table[b8_xy];
1727 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1728 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1729 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1735 * Decodes a network abstraction layer unit.
1736 * @param consumed is the number of bytes used as input
1737 * @param length is the length of the array
1738 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1739 * @returns decoded bytes, might be src+1 if no escapes
1741 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1745 // src[0]&0x80; //forbidden bit
1746 h->nal_ref_idc= src[0]>>5;
1747 h->nal_unit_type= src[0]&0x1F;
1751 for(i=0; i<length; i++)
1752 printf("%2X ", src[i]);
1754 for(i=0; i+1<length; i+=2){
1755 if(src[i]) continue;
1756 if(i>0 && src[i-1]==0) i--;
1757 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1759 /* startcode, so we must be past the end */
1766 if(i>=length-1){ //no escaped 0
1767 *dst_length= length;
1768 *consumed= length+1; //+1 for the header
1772 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1773 dst= h->rbsp_buffer;
1779 //printf("decoding esc\n");
1782 //remove escapes (very rare 1:2^22)
1783 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1784 if(src[si+2]==3){ //escape
1789 }else //next start code
1793 dst[di++]= src[si++];
1797 *consumed= si + 1;//+1 for the header
1798 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1803 * identifies the exact end of the bitstream
1804 * @return the length of the trailing, or 0 if damaged
1806 static int decode_rbsp_trailing(H264Context *h, uint8_t *src){
1810 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1820 * idct tranforms the 16 dc values and dequantize them.
1821 * @param qp quantization parameter
1823 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1826 int temp[16]; //FIXME check if this is a good idea
1827 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1828 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1830 //memset(block, 64, 2*256);
1833 const int offset= y_offset[i];
1834 const int z0= block[offset+stride*0] + block[offset+stride*4];
1835 const int z1= block[offset+stride*0] - block[offset+stride*4];
1836 const int z2= block[offset+stride*1] - block[offset+stride*5];
1837 const int z3= block[offset+stride*1] + block[offset+stride*5];
1846 const int offset= x_offset[i];
1847 const int z0= temp[4*0+i] + temp[4*2+i];
1848 const int z1= temp[4*0+i] - temp[4*2+i];
1849 const int z2= temp[4*1+i] - temp[4*3+i];
1850 const int z3= temp[4*1+i] + temp[4*3+i];
1852 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1853 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1854 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1855 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1861 * dct tranforms the 16 dc values.
1862 * @param qp quantization parameter ??? FIXME
1864 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1865 // const int qmul= dequant_coeff[qp][0];
1867 int temp[16]; //FIXME check if this is a good idea
1868 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1869 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1872 const int offset= y_offset[i];
1873 const int z0= block[offset+stride*0] + block[offset+stride*4];
1874 const int z1= block[offset+stride*0] - block[offset+stride*4];
1875 const int z2= block[offset+stride*1] - block[offset+stride*5];
1876 const int z3= block[offset+stride*1] + block[offset+stride*5];
1885 const int offset= x_offset[i];
1886 const int z0= temp[4*0+i] + temp[4*2+i];
1887 const int z1= temp[4*0+i] - temp[4*2+i];
1888 const int z2= temp[4*1+i] - temp[4*3+i];
1889 const int z3= temp[4*1+i] + temp[4*3+i];
1891 block[stride*0 +offset]= (z0 + z3)>>1;
1892 block[stride*2 +offset]= (z1 + z2)>>1;
1893 block[stride*8 +offset]= (z1 - z2)>>1;
1894 block[stride*10+offset]= (z0 - z3)>>1;
1902 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1903 const int stride= 16*2;
1904 const int xStride= 16;
1907 a= block[stride*0 + xStride*0];
1908 b= block[stride*0 + xStride*1];
1909 c= block[stride*1 + xStride*0];
1910 d= block[stride*1 + xStride*1];
1917 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1918 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1919 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1920 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1924 static void chroma_dc_dct_c(DCTELEM *block){
1925 const int stride= 16*2;
1926 const int xStride= 16;
1929 a= block[stride*0 + xStride*0];
1930 b= block[stride*0 + xStride*1];
1931 c= block[stride*1 + xStride*0];
1932 d= block[stride*1 + xStride*1];
1939 block[stride*0 + xStride*0]= (a+c);
1940 block[stride*0 + xStride*1]= (e+b);
1941 block[stride*1 + xStride*0]= (a-c);
1942 block[stride*1 + xStride*1]= (e-b);
1947 * gets the chroma qp.
1949 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1951 return chroma_qp[av_clip(qscale + chroma_qp_index_offset, 0, 51)];
1954 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1955 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1956 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1958 const int * const quant_table= quant_coeff[qscale];
1959 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1960 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1961 const unsigned int threshold2= (threshold1<<1);
1967 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1968 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1969 const unsigned int dc_threshold2= (dc_threshold1<<1);
1971 int level= block[0]*quant_coeff[qscale+18][0];
1972 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1974 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1977 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1980 // last_non_zero = i;
1985 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1986 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1987 const unsigned int dc_threshold2= (dc_threshold1<<1);
1989 int level= block[0]*quant_table[0];
1990 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1992 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1995 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1998 // last_non_zero = i;
2011 const int j= scantable[i];
2012 int level= block[j]*quant_table[j];
2014 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
2015 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
2016 if(((unsigned)(level+threshold1))>threshold2){
2018 level= (bias + level)>>QUANT_SHIFT;
2021 level= (bias - level)>>QUANT_SHIFT;
2030 return last_non_zero;
2033 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
2034 const uint32_t a= ((uint32_t*)(src-stride))[0];
2035 ((uint32_t*)(src+0*stride))[0]= a;
2036 ((uint32_t*)(src+1*stride))[0]= a;
2037 ((uint32_t*)(src+2*stride))[0]= a;
2038 ((uint32_t*)(src+3*stride))[0]= a;
2041 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
2042 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
2043 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
2044 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
2045 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
2048 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
2049 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
2050 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
2052 ((uint32_t*)(src+0*stride))[0]=
2053 ((uint32_t*)(src+1*stride))[0]=
2054 ((uint32_t*)(src+2*stride))[0]=
2055 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2058 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
2059 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
2061 ((uint32_t*)(src+0*stride))[0]=
2062 ((uint32_t*)(src+1*stride))[0]=
2063 ((uint32_t*)(src+2*stride))[0]=
2064 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2067 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
2068 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
2070 ((uint32_t*)(src+0*stride))[0]=
2071 ((uint32_t*)(src+1*stride))[0]=
2072 ((uint32_t*)(src+2*stride))[0]=
2073 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2076 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
2077 ((uint32_t*)(src+0*stride))[0]=
2078 ((uint32_t*)(src+1*stride))[0]=
2079 ((uint32_t*)(src+2*stride))[0]=
2080 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
2084 #define LOAD_TOP_RIGHT_EDGE\
2085 const int t4= topright[0];\
2086 const int t5= topright[1];\
2087 const int t6= topright[2];\
2088 const int t7= topright[3];\
2090 #define LOAD_LEFT_EDGE\
2091 const int l0= src[-1+0*stride];\
2092 const int l1= src[-1+1*stride];\
2093 const int l2= src[-1+2*stride];\
2094 const int l3= src[-1+3*stride];\
2096 #define LOAD_TOP_EDGE\
2097 const int t0= src[ 0-1*stride];\
2098 const int t1= src[ 1-1*stride];\
2099 const int t2= src[ 2-1*stride];\
2100 const int t3= src[ 3-1*stride];\
2102 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
2103 const int lt= src[-1-1*stride];
2107 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
2109 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
2112 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
2116 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2119 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
2121 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2122 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2125 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
2130 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
2132 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
2135 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
2139 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
2142 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
2144 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
2145 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
2148 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
2149 const int lt= src[-1-1*stride];
2152 const __attribute__((unused)) int unu= l3;
2155 src[1+2*stride]=(lt + t0 + 1)>>1;
2157 src[2+2*stride]=(t0 + t1 + 1)>>1;
2159 src[3+2*stride]=(t1 + t2 + 1)>>1;
2160 src[3+0*stride]=(t2 + t3 + 1)>>1;
2162 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2164 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2166 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2167 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2168 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2169 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2172 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2175 const __attribute__((unused)) int unu= t7;
2177 src[0+0*stride]=(t0 + t1 + 1)>>1;
2179 src[0+2*stride]=(t1 + t2 + 1)>>1;
2181 src[1+2*stride]=(t2 + t3 + 1)>>1;
2183 src[2+2*stride]=(t3 + t4+ 1)>>1;
2184 src[3+2*stride]=(t4 + t5+ 1)>>1;
2185 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2187 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2189 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2191 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2192 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2195 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2198 src[0+0*stride]=(l0 + l1 + 1)>>1;
2199 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2201 src[0+1*stride]=(l1 + l2 + 1)>>1;
2203 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2205 src[0+2*stride]=(l2 + l3 + 1)>>1;
2207 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2216 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2217 const int lt= src[-1-1*stride];
2220 const __attribute__((unused)) int unu= t3;
2223 src[2+1*stride]=(lt + l0 + 1)>>1;
2225 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2226 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2227 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2229 src[2+2*stride]=(l0 + l1 + 1)>>1;
2231 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2233 src[2+3*stride]=(l1 + l2+ 1)>>1;
2235 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2236 src[0+3*stride]=(l2 + l3 + 1)>>1;
2237 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2240 void ff_pred16x16_vertical_c(uint8_t *src, int stride){
2242 const uint32_t a= ((uint32_t*)(src-stride))[0];
2243 const uint32_t b= ((uint32_t*)(src-stride))[1];
2244 const uint32_t c= ((uint32_t*)(src-stride))[2];
2245 const uint32_t d= ((uint32_t*)(src-stride))[3];
2247 for(i=0; i<16; i++){
2248 ((uint32_t*)(src+i*stride))[0]= a;
2249 ((uint32_t*)(src+i*stride))[1]= b;
2250 ((uint32_t*)(src+i*stride))[2]= c;
2251 ((uint32_t*)(src+i*stride))[3]= d;
2255 void ff_pred16x16_horizontal_c(uint8_t *src, int stride){
2258 for(i=0; i<16; i++){
2259 ((uint32_t*)(src+i*stride))[0]=
2260 ((uint32_t*)(src+i*stride))[1]=
2261 ((uint32_t*)(src+i*stride))[2]=
2262 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2266 void ff_pred16x16_dc_c(uint8_t *src, int stride){
2270 dc+= src[-1+i*stride];
2277 dc= 0x01010101*((dc + 16)>>5);
2279 for(i=0; i<16; i++){
2280 ((uint32_t*)(src+i*stride))[0]=
2281 ((uint32_t*)(src+i*stride))[1]=
2282 ((uint32_t*)(src+i*stride))[2]=
2283 ((uint32_t*)(src+i*stride))[3]= dc;
2287 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2291 dc+= src[-1+i*stride];
2294 dc= 0x01010101*((dc + 8)>>4);
2296 for(i=0; i<16; i++){
2297 ((uint32_t*)(src+i*stride))[0]=
2298 ((uint32_t*)(src+i*stride))[1]=
2299 ((uint32_t*)(src+i*stride))[2]=
2300 ((uint32_t*)(src+i*stride))[3]= dc;
2304 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2310 dc= 0x01010101*((dc + 8)>>4);
2312 for(i=0; i<16; i++){
2313 ((uint32_t*)(src+i*stride))[0]=
2314 ((uint32_t*)(src+i*stride))[1]=
2315 ((uint32_t*)(src+i*stride))[2]=
2316 ((uint32_t*)(src+i*stride))[3]= dc;
2320 void ff_pred16x16_128_dc_c(uint8_t *src, int stride){
2323 for(i=0; i<16; i++){
2324 ((uint32_t*)(src+i*stride))[0]=
2325 ((uint32_t*)(src+i*stride))[1]=
2326 ((uint32_t*)(src+i*stride))[2]=
2327 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2331 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2334 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2335 const uint8_t * const src0 = src+7-stride;
2336 const uint8_t *src1 = src+8*stride-1;
2337 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2338 int H = src0[1] - src0[-1];
2339 int V = src1[0] - src2[ 0];
2340 for(k=2; k<=8; ++k) {
2341 src1 += stride; src2 -= stride;
2342 H += k*(src0[k] - src0[-k]);
2343 V += k*(src1[0] - src2[ 0]);
2346 H = ( 5*(H/4) ) / 16;
2347 V = ( 5*(V/4) ) / 16;
2349 /* required for 100% accuracy */
2350 i = H; H = V; V = i;
2352 H = ( 5*H+32 ) >> 6;
2353 V = ( 5*V+32 ) >> 6;
2356 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2357 for(j=16; j>0; --j) {
2360 for(i=-16; i<0; i+=4) {
2361 src[16+i] = cm[ (b ) >> 5 ];
2362 src[17+i] = cm[ (b+ H) >> 5 ];
2363 src[18+i] = cm[ (b+2*H) >> 5 ];
2364 src[19+i] = cm[ (b+3*H) >> 5 ];
2371 void ff_pred16x16_plane_c(uint8_t *src, int stride){
2372 pred16x16_plane_compat_c(src, stride, 0);
2375 void ff_pred8x8_vertical_c(uint8_t *src, int stride){
2377 const uint32_t a= ((uint32_t*)(src-stride))[0];
2378 const uint32_t b= ((uint32_t*)(src-stride))[1];
2381 ((uint32_t*)(src+i*stride))[0]= a;
2382 ((uint32_t*)(src+i*stride))[1]= b;
2386 void ff_pred8x8_horizontal_c(uint8_t *src, int stride){
2390 ((uint32_t*)(src+i*stride))[0]=
2391 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2395 void ff_pred8x8_128_dc_c(uint8_t *src, int stride){
2399 ((uint32_t*)(src+i*stride))[0]=
2400 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2404 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2410 dc0+= src[-1+i*stride];
2411 dc2+= src[-1+(i+4)*stride];
2413 dc0= 0x01010101*((dc0 + 2)>>2);
2414 dc2= 0x01010101*((dc2 + 2)>>2);
2417 ((uint32_t*)(src+i*stride))[0]=
2418 ((uint32_t*)(src+i*stride))[1]= dc0;
2421 ((uint32_t*)(src+i*stride))[0]=
2422 ((uint32_t*)(src+i*stride))[1]= dc2;
2426 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2432 dc0+= src[i-stride];
2433 dc1+= src[4+i-stride];
2435 dc0= 0x01010101*((dc0 + 2)>>2);
2436 dc1= 0x01010101*((dc1 + 2)>>2);
2439 ((uint32_t*)(src+i*stride))[0]= dc0;
2440 ((uint32_t*)(src+i*stride))[1]= dc1;
2443 ((uint32_t*)(src+i*stride))[0]= dc0;
2444 ((uint32_t*)(src+i*stride))[1]= dc1;
2449 void ff_pred8x8_dc_c(uint8_t *src, int stride){
2451 int dc0, dc1, dc2, dc3;
2455 dc0+= src[-1+i*stride] + src[i-stride];
2456 dc1+= src[4+i-stride];
2457 dc2+= src[-1+(i+4)*stride];
2459 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2460 dc0= 0x01010101*((dc0 + 4)>>3);
2461 dc1= 0x01010101*((dc1 + 2)>>2);
2462 dc2= 0x01010101*((dc2 + 2)>>2);
2465 ((uint32_t*)(src+i*stride))[0]= dc0;
2466 ((uint32_t*)(src+i*stride))[1]= dc1;
2469 ((uint32_t*)(src+i*stride))[0]= dc2;
2470 ((uint32_t*)(src+i*stride))[1]= dc3;
2474 void ff_pred8x8_plane_c(uint8_t *src, int stride){
2477 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2478 const uint8_t * const src0 = src+3-stride;
2479 const uint8_t *src1 = src+4*stride-1;
2480 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2481 int H = src0[1] - src0[-1];
2482 int V = src1[0] - src2[ 0];
2483 for(k=2; k<=4; ++k) {
2484 src1 += stride; src2 -= stride;
2485 H += k*(src0[k] - src0[-k]);
2486 V += k*(src1[0] - src2[ 0]);
2488 H = ( 17*H+16 ) >> 5;
2489 V = ( 17*V+16 ) >> 5;
2491 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2492 for(j=8; j>0; --j) {
2495 src[0] = cm[ (b ) >> 5 ];
2496 src[1] = cm[ (b+ H) >> 5 ];
2497 src[2] = cm[ (b+2*H) >> 5 ];
2498 src[3] = cm[ (b+3*H) >> 5 ];
2499 src[4] = cm[ (b+4*H) >> 5 ];
2500 src[5] = cm[ (b+5*H) >> 5 ];
2501 src[6] = cm[ (b+6*H) >> 5 ];
2502 src[7] = cm[ (b+7*H) >> 5 ];
2507 #define SRC(x,y) src[(x)+(y)*stride]
2509 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2510 #define PREDICT_8x8_LOAD_LEFT \
2511 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2512 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2513 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2514 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2517 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2518 #define PREDICT_8x8_LOAD_TOP \
2519 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2520 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2521 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2522 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2523 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2526 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2527 #define PREDICT_8x8_LOAD_TOPRIGHT \
2528 int t8, t9, t10, t11, t12, t13, t14, t15; \
2529 if(has_topright) { \
2530 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2531 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2532 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2534 #define PREDICT_8x8_LOAD_TOPLEFT \
2535 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2537 #define PREDICT_8x8_DC(v) \
2539 for( y = 0; y < 8; y++ ) { \
2540 ((uint32_t*)src)[0] = \
2541 ((uint32_t*)src)[1] = v; \
2545 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2547 PREDICT_8x8_DC(0x80808080);
2549 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2551 PREDICT_8x8_LOAD_LEFT;
2552 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2555 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2557 PREDICT_8x8_LOAD_TOP;
2558 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2561 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2563 PREDICT_8x8_LOAD_LEFT;
2564 PREDICT_8x8_LOAD_TOP;
2565 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2566 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2569 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2571 PREDICT_8x8_LOAD_LEFT;
2572 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2573 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2574 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2577 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2580 PREDICT_8x8_LOAD_TOP;
2589 for( y = 1; y < 8; y++ )
2590 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2592 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2594 PREDICT_8x8_LOAD_TOP;
2595 PREDICT_8x8_LOAD_TOPRIGHT;
2596 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2597 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2598 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2599 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2600 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2601 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2602 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2603 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2604 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2605 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2606 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2607 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2608 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2609 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2610 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2612 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2614 PREDICT_8x8_LOAD_TOP;
2615 PREDICT_8x8_LOAD_LEFT;
2616 PREDICT_8x8_LOAD_TOPLEFT;
2617 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2618 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2619 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2620 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2621 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2622 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2623 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2624 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2625 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2626 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2627 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2628 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2629 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2630 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2631 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2634 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2636 PREDICT_8x8_LOAD_TOP;
2637 PREDICT_8x8_LOAD_LEFT;
2638 PREDICT_8x8_LOAD_TOPLEFT;
2639 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2640 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2641 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2642 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2643 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2644 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2645 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2646 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2647 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2648 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2649 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2650 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2651 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2652 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2653 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2654 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2655 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2656 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2657 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2658 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2659 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2660 SRC(7,0)= (t6 + t7 + 1) >> 1;
2662 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2664 PREDICT_8x8_LOAD_TOP;
2665 PREDICT_8x8_LOAD_LEFT;
2666 PREDICT_8x8_LOAD_TOPLEFT;
2667 SRC(0,7)= (l6 + l7 + 1) >> 1;
2668 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2669 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2670 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2671 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2672 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2673 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2674 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2675 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2676 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2677 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2678 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2679 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2680 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2681 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2682 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2683 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2684 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2685 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2686 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2687 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2688 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2690 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2692 PREDICT_8x8_LOAD_TOP;
2693 PREDICT_8x8_LOAD_TOPRIGHT;
2694 SRC(0,0)= (t0 + t1 + 1) >> 1;
2695 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2696 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2697 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2698 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2699 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2700 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2701 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2702 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2703 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2704 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2705 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2706 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2707 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2708 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2709 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2710 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2711 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2712 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2713 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2714 SRC(7,6)= (t10 + t11 + 1) >> 1;
2715 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2717 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2719 PREDICT_8x8_LOAD_LEFT;
2720 SRC(0,0)= (l0 + l1 + 1) >> 1;
2721 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2722 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2723 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2724 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2725 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2726 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2727 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2728 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2729 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2730 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2731 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2732 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2733 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2734 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2735 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2736 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2737 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2739 #undef PREDICT_8x8_LOAD_LEFT
2740 #undef PREDICT_8x8_LOAD_TOP
2741 #undef PREDICT_8x8_LOAD_TOPLEFT
2742 #undef PREDICT_8x8_LOAD_TOPRIGHT
2743 #undef PREDICT_8x8_DC
2749 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2750 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2751 int src_x_offset, int src_y_offset,
2752 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2753 MpegEncContext * const s = &h->s;
2754 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2755 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2756 const int luma_xy= (mx&3) + ((my&3)<<2);
2757 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2758 uint8_t * src_cb, * src_cr;
2759 int extra_width= h->emu_edge_width;
2760 int extra_height= h->emu_edge_height;
2762 const int full_mx= mx>>2;
2763 const int full_my= my>>2;
2764 const int pic_width = 16*s->mb_width;
2765 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2767 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
2770 if(mx&7) extra_width -= 3;
2771 if(my&7) extra_height -= 3;
2773 if( full_mx < 0-extra_width
2774 || full_my < 0-extra_height
2775 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2776 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2777 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2778 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2782 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2784 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2787 if(s->flags&CODEC_FLAG_GRAY) return;
2790 // chroma offset when predicting from a field of opposite parity
2791 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2792 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2794 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2795 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2798 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2799 src_cb= s->edge_emu_buffer;
2801 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2804 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2805 src_cr= s->edge_emu_buffer;
2807 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2810 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2811 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2812 int x_offset, int y_offset,
2813 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2814 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2815 int list0, int list1){
2816 MpegEncContext * const s = &h->s;
2817 qpel_mc_func *qpix_op= qpix_put;
2818 h264_chroma_mc_func chroma_op= chroma_put;
2820 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2821 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2822 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2823 x_offset += 8*s->mb_x;
2824 y_offset += 8*(s->mb_y >> MB_MBAFF);
2827 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2828 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2829 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2830 qpix_op, chroma_op);
2833 chroma_op= chroma_avg;
2837 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2838 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2839 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2840 qpix_op, chroma_op);
2844 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2845 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2846 int x_offset, int y_offset,
2847 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2848 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2849 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2850 int list0, int list1){
2851 MpegEncContext * const s = &h->s;
2853 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2854 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2855 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2856 x_offset += 8*s->mb_x;
2857 y_offset += 8*(s->mb_y >> MB_MBAFF);
2860 /* don't optimize for luma-only case, since B-frames usually
2861 * use implicit weights => chroma too. */
2862 uint8_t *tmp_cb = s->obmc_scratchpad;
2863 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2864 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2865 int refn0 = h->ref_cache[0][ scan8[n] ];
2866 int refn1 = h->ref_cache[1][ scan8[n] ];
2868 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2869 dest_y, dest_cb, dest_cr,
2870 x_offset, y_offset, qpix_put, chroma_put);
2871 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2872 tmp_y, tmp_cb, tmp_cr,
2873 x_offset, y_offset, qpix_put, chroma_put);
2875 if(h->use_weight == 2){
2876 int weight0 = h->implicit_weight[refn0][refn1];
2877 int weight1 = 64 - weight0;
2878 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2879 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
2880 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
2882 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
2883 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2884 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
2885 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2886 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2887 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
2888 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2889 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2890 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
2893 int list = list1 ? 1 : 0;
2894 int refn = h->ref_cache[list][ scan8[n] ];
2895 Picture *ref= &h->ref_list[list][refn];
2896 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2897 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2898 qpix_put, chroma_put);
2900 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
2901 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2902 if(h->use_weight_chroma){
2903 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2904 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2905 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2906 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2911 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2912 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2913 int x_offset, int y_offset,
2914 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2915 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2916 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2917 int list0, int list1){
2918 if((h->use_weight==2 && list0 && list1
2919 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2920 || h->use_weight==1)
2921 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2922 x_offset, y_offset, qpix_put, chroma_put,
2923 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2925 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2926 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2929 static inline void prefetch_motion(H264Context *h, int list){
2930 /* fetch pixels for estimated mv 4 macroblocks ahead
2931 * optimized for 64byte cache lines */
2932 MpegEncContext * const s = &h->s;
2933 const int refn = h->ref_cache[list][scan8[0]];
2935 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
2936 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
2937 uint8_t **src= h->ref_list[list][refn].data;
2938 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
2939 s->dsp.prefetch(src[0]+off, s->linesize, 4);
2940 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
2941 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
2945 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2946 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2947 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2948 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2949 MpegEncContext * const s = &h->s;
2950 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2951 const int mb_type= s->current_picture.mb_type[mb_xy];
2953 assert(IS_INTER(mb_type));
2955 prefetch_motion(h, 0);
2957 if(IS_16X16(mb_type)){
2958 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2959 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2960 &weight_op[0], &weight_avg[0],
2961 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2962 }else if(IS_16X8(mb_type)){
2963 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2964 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2965 &weight_op[1], &weight_avg[1],
2966 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2967 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2968 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2969 &weight_op[1], &weight_avg[1],
2970 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2971 }else if(IS_8X16(mb_type)){
2972 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
2973 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2974 &weight_op[2], &weight_avg[2],
2975 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2976 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
2977 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2978 &weight_op[2], &weight_avg[2],
2979 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2983 assert(IS_8X8(mb_type));
2986 const int sub_mb_type= h->sub_mb_type[i];
2988 int x_offset= (i&1)<<2;
2989 int y_offset= (i&2)<<1;
2991 if(IS_SUB_8X8(sub_mb_type)){
2992 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2993 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2994 &weight_op[3], &weight_avg[3],
2995 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2996 }else if(IS_SUB_8X4(sub_mb_type)){
2997 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2998 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2999 &weight_op[4], &weight_avg[4],
3000 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3001 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
3002 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3003 &weight_op[4], &weight_avg[4],
3004 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3005 }else if(IS_SUB_4X8(sub_mb_type)){
3006 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3007 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3008 &weight_op[5], &weight_avg[5],
3009 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3010 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
3011 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3012 &weight_op[5], &weight_avg[5],
3013 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3016 assert(IS_SUB_4X4(sub_mb_type));
3018 int sub_x_offset= x_offset + 2*(j&1);
3019 int sub_y_offset= y_offset + (j&2);
3020 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
3021 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3022 &weight_op[6], &weight_avg[6],
3023 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3029 prefetch_motion(h, 1);
3032 static void decode_init_vlc(void){
3033 static int done = 0;
3039 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
3040 &chroma_dc_coeff_token_len [0], 1, 1,
3041 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
3044 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
3045 &coeff_token_len [i][0], 1, 1,
3046 &coeff_token_bits[i][0], 1, 1, 1);
3050 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
3051 &chroma_dc_total_zeros_len [i][0], 1, 1,
3052 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
3054 for(i=0; i<15; i++){
3055 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
3056 &total_zeros_len [i][0], 1, 1,
3057 &total_zeros_bits[i][0], 1, 1, 1);
3061 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
3062 &run_len [i][0], 1, 1,
3063 &run_bits[i][0], 1, 1, 1);
3065 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
3066 &run_len [6][0], 1, 1,
3067 &run_bits[6][0], 1, 1, 1);
3072 * Sets the intra prediction function pointers.
3074 static void init_pred_ptrs(H264Context *h){
3075 // MpegEncContext * const s = &h->s;
3077 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
3078 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
3079 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
3080 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
3081 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
3082 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
3083 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
3084 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
3085 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
3086 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
3087 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
3088 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
3090 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
3091 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
3092 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
3093 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
3094 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
3095 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
3096 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
3097 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
3098 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
3099 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
3100 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
3101 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
3103 h->pred8x8[DC_PRED8x8 ]= ff_pred8x8_dc_c;
3104 h->pred8x8[VERT_PRED8x8 ]= ff_pred8x8_vertical_c;
3105 h->pred8x8[HOR_PRED8x8 ]= ff_pred8x8_horizontal_c;
3106 h->pred8x8[PLANE_PRED8x8 ]= ff_pred8x8_plane_c;
3107 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
3108 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
3109 h->pred8x8[DC_128_PRED8x8 ]= ff_pred8x8_128_dc_c;
3111 h->pred16x16[DC_PRED8x8 ]= ff_pred16x16_dc_c;
3112 h->pred16x16[VERT_PRED8x8 ]= ff_pred16x16_vertical_c;
3113 h->pred16x16[HOR_PRED8x8 ]= ff_pred16x16_horizontal_c;
3114 h->pred16x16[PLANE_PRED8x8 ]= ff_pred16x16_plane_c;
3115 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
3116 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
3117 h->pred16x16[DC_128_PRED8x8 ]= ff_pred16x16_128_dc_c;
3120 static void free_tables(H264Context *h){
3121 av_freep(&h->intra4x4_pred_mode);
3122 av_freep(&h->chroma_pred_mode_table);
3123 av_freep(&h->cbp_table);
3124 av_freep(&h->mvd_table[0]);
3125 av_freep(&h->mvd_table[1]);
3126 av_freep(&h->direct_table);
3127 av_freep(&h->non_zero_count);
3128 av_freep(&h->slice_table_base);
3129 av_freep(&h->top_borders[1]);
3130 av_freep(&h->top_borders[0]);
3131 h->slice_table= NULL;
3133 av_freep(&h->mb2b_xy);
3134 av_freep(&h->mb2b8_xy);
3136 av_freep(&h->s.obmc_scratchpad);
3139 static void init_dequant8_coeff_table(H264Context *h){
3141 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
3142 h->dequant8_coeff[0] = h->dequant8_buffer[0];
3143 h->dequant8_coeff[1] = h->dequant8_buffer[1];
3145 for(i=0; i<2; i++ ){
3146 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
3147 h->dequant8_coeff[1] = h->dequant8_buffer[0];
3151 for(q=0; q<52; q++){
3152 int shift = ff_div6[q];
3153 int idx = ff_rem6[q];
3155 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
3156 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
3157 h->pps.scaling_matrix8[i][x]) << shift;
3162 static void init_dequant4_coeff_table(H264Context *h){
3164 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
3165 for(i=0; i<6; i++ ){
3166 h->dequant4_coeff[i] = h->dequant4_buffer[i];
3168 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
3169 h->dequant4_coeff[i] = h->dequant4_buffer[j];
3176 for(q=0; q<52; q++){
3177 int shift = ff_div6[q] + 2;
3178 int idx = ff_rem6[q];
3180 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
3181 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
3182 h->pps.scaling_matrix4[i][x]) << shift;
3187 static void init_dequant_tables(H264Context *h){
3189 init_dequant4_coeff_table(h);
3190 if(h->pps.transform_8x8_mode)
3191 init_dequant8_coeff_table(h);
3192 if(h->sps.transform_bypass){
3195 h->dequant4_coeff[i][0][x] = 1<<6;
3196 if(h->pps.transform_8x8_mode)
3199 h->dequant8_coeff[i][0][x] = 1<<6;
3206 * needs width/height
3208 static int alloc_tables(H264Context *h){
3209 MpegEncContext * const s = &h->s;
3210 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3213 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3215 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3216 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
3217 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3218 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3219 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3221 if( h->pps.cabac ) {
3222 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3223 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3224 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3225 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3228 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
3229 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
3231 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3232 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3233 for(y=0; y<s->mb_height; y++){
3234 for(x=0; x<s->mb_width; x++){
3235 const int mb_xy= x + y*s->mb_stride;
3236 const int b_xy = 4*x + 4*y*h->b_stride;
3237 const int b8_xy= 2*x + 2*y*h->b8_stride;
3239 h->mb2b_xy [mb_xy]= b_xy;
3240 h->mb2b8_xy[mb_xy]= b8_xy;
3244 s->obmc_scratchpad = NULL;
3246 if(!h->dequant4_coeff[0])
3247 init_dequant_tables(h);
3255 static void common_init(H264Context *h){
3256 MpegEncContext * const s = &h->s;
3258 s->width = s->avctx->width;
3259 s->height = s->avctx->height;
3260 s->codec_id= s->avctx->codec->id;
3264 h->dequant_coeff_pps= -1;
3265 s->unrestricted_mv=1;
3266 s->decode=1; //FIXME
3268 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3269 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3272 static int decode_init(AVCodecContext *avctx){
3273 H264Context *h= avctx->priv_data;
3274 MpegEncContext * const s = &h->s;
3276 MPV_decode_defaults(s);
3281 s->out_format = FMT_H264;
3282 s->workaround_bugs= avctx->workaround_bugs;
3285 // s->decode_mb= ff_h263_decode_mb;
3287 avctx->pix_fmt= PIX_FMT_YUV420P;
3291 if(avctx->extradata_size > 0 && avctx->extradata &&
3292 *(char *)avctx->extradata == 1){
3302 static int frame_start(H264Context *h){
3303 MpegEncContext * const s = &h->s;
3306 if(MPV_frame_start(s, s->avctx) < 0)
3308 ff_er_frame_start(s);
3310 assert(s->linesize && s->uvlinesize);
3312 for(i=0; i<16; i++){
3313 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3314 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3317 h->block_offset[16+i]=
3318 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3319 h->block_offset[24+16+i]=
3320 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3323 /* can't be in alloc_tables because linesize isn't known there.
3324 * FIXME: redo bipred weight to not require extra buffer? */
3325 if(!s->obmc_scratchpad)
3326 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
3328 /* some macroblocks will be accessed before they're available */
3330 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
3332 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3336 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3337 MpegEncContext * const s = &h->s;
3341 src_cb -= uvlinesize;
3342 src_cr -= uvlinesize;
3344 // There are two lines saved, the line above the the top macroblock of a pair,
3345 // and the line above the bottom macroblock
3346 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3347 for(i=1; i<17; i++){
3348 h->left_border[i]= src_y[15+i* linesize];
3351 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3352 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3354 if(!(s->flags&CODEC_FLAG_GRAY)){
3355 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3356 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3358 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3359 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3361 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3362 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3366 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3367 MpegEncContext * const s = &h->s;
3370 int deblock_left = (s->mb_x > 0);
3371 int deblock_top = (s->mb_y > 0);
3373 src_y -= linesize + 1;
3374 src_cb -= uvlinesize + 1;
3375 src_cr -= uvlinesize + 1;
3377 #define XCHG(a,b,t,xchg)\
3384 for(i = !deblock_top; i<17; i++){
3385 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3390 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3391 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3392 if(s->mb_x+1 < s->mb_width){
3393 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3397 if(!(s->flags&CODEC_FLAG_GRAY)){
3399 for(i = !deblock_top; i<9; i++){
3400 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3401 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3405 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3406 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3411 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3412 MpegEncContext * const s = &h->s;
3415 src_y -= 2 * linesize;
3416 src_cb -= 2 * uvlinesize;
3417 src_cr -= 2 * uvlinesize;
3419 // There are two lines saved, the line above the the top macroblock of a pair,
3420 // and the line above the bottom macroblock
3421 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3422 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3423 for(i=2; i<34; i++){
3424 h->left_border[i]= src_y[15+i* linesize];
3427 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3428 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3429 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3430 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3432 if(!(s->flags&CODEC_FLAG_GRAY)){
3433 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3434 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3435 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3436 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3437 for(i=2; i<18; i++){
3438 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3439 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3441 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3442 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3443 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3444 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3448 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3449 MpegEncContext * const s = &h->s;
3452 int deblock_left = (s->mb_x > 0);
3453 int deblock_top = (s->mb_y > 1);
3455 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3457 src_y -= 2 * linesize + 1;
3458 src_cb -= 2 * uvlinesize + 1;
3459 src_cr -= 2 * uvlinesize + 1;
3461 #define XCHG(a,b,t,xchg)\
3468 for(i = (!deblock_top)<<1; i<34; i++){
3469 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3474 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3475 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3476 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3477 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3478 if(s->mb_x+1 < s->mb_width){
3479 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3480 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3484 if(!(s->flags&CODEC_FLAG_GRAY)){
3486 for(i = (!deblock_top) << 1; i<18; i++){
3487 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3488 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3492 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3493 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3494 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3495 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3500 static void av_always_inline hl_decode_mb_internal(H264Context *h, int simple){
3501 MpegEncContext * const s = &h->s;
3502 const int mb_x= s->mb_x;
3503 const int mb_y= s->mb_y;
3504 const int mb_xy= mb_x + mb_y*s->mb_stride;
3505 const int mb_type= s->current_picture.mb_type[mb_xy];
3506 uint8_t *dest_y, *dest_cb, *dest_cr;
3507 int linesize, uvlinesize /*dct_offset*/;
3509 int *block_offset = &h->block_offset[0];
3510 const unsigned int bottom = mb_y & 1;
3511 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
3512 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3513 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3515 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3516 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3517 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3519 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
3520 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
3522 if (!simple && MB_FIELD) {
3523 linesize = h->mb_linesize = s->linesize * 2;
3524 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3525 block_offset = &h->block_offset[24];
3526 if(mb_y&1){ //FIXME move out of this func?
3527 dest_y -= s->linesize*15;
3528 dest_cb-= s->uvlinesize*7;
3529 dest_cr-= s->uvlinesize*7;
3533 for(list=0; list<h->list_count; list++){
3534 if(!USES_LIST(mb_type, list))
3536 if(IS_16X16(mb_type)){
3537 int8_t *ref = &h->ref_cache[list][scan8[0]];
3538 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3540 for(i=0; i<16; i+=4){
3541 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3542 int ref = h->ref_cache[list][scan8[i]];
3544 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3550 linesize = h->mb_linesize = s->linesize;
3551 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3552 // dct_offset = s->linesize * 16;
3555 if(transform_bypass){
3557 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3558 }else if(IS_8x8DCT(mb_type)){
3559 idct_dc_add = s->dsp.h264_idct8_dc_add;
3560 idct_add = s->dsp.h264_idct8_add;
3562 idct_dc_add = s->dsp.h264_idct_dc_add;
3563 idct_add = s->dsp.h264_idct_add;
3566 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3567 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3568 int mbt_y = mb_y&~1;
3569 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3570 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3571 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3572 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3575 if (!simple && IS_INTRA_PCM(mb_type)) {
3578 // The pixels are stored in h->mb array in the same order as levels,
3579 // copy them in output in the correct order.
3580 for(i=0; i<16; i++) {
3581 for (y=0; y<4; y++) {
3582 for (x=0; x<4; x++) {
3583 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3587 for(i=16; i<16+4; i++) {
3588 for (y=0; y<4; y++) {
3589 for (x=0; x<4; x++) {
3590 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3594 for(i=20; i<20+4; i++) {
3595 for (y=0; y<4; y++) {
3596 for (x=0; x<4; x++) {
3597 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3602 if(IS_INTRA(mb_type)){
3603 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
3604 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3606 if(simple || !(s->flags&CODEC_FLAG_GRAY)){
3607 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3608 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3611 if(IS_INTRA4x4(mb_type)){
3612 if(simple || !s->encoding){
3613 if(IS_8x8DCT(mb_type)){
3614 for(i=0; i<16; i+=4){
3615 uint8_t * const ptr= dest_y + block_offset[i];
3616 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3617 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3618 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3619 (h->topright_samples_available<<i)&0x4000, linesize);
3621 if(nnz == 1 && h->mb[i*16])
3622 idct_dc_add(ptr, h->mb + i*16, linesize);
3624 idct_add(ptr, h->mb + i*16, linesize);
3628 for(i=0; i<16; i++){
3629 uint8_t * const ptr= dest_y + block_offset[i];
3631 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3634 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3635 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3636 assert(mb_y || linesize <= block_offset[i]);
3637 if(!topright_avail){
3638 tr= ptr[3 - linesize]*0x01010101;
3639 topright= (uint8_t*) &tr;
3641 topright= ptr + 4 - linesize;
3645 h->pred4x4[ dir ](ptr, topright, linesize);
3646 nnz = h->non_zero_count_cache[ scan8[i] ];
3649 if(nnz == 1 && h->mb[i*16])
3650 idct_dc_add(ptr, h->mb + i*16, linesize);
3652 idct_add(ptr, h->mb + i*16, linesize);
3654 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3659 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3661 if(!transform_bypass)
3662 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3664 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3666 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
3667 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3669 hl_motion(h, dest_y, dest_cb, dest_cr,
3670 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
3671 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
3672 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3676 if(!IS_INTRA4x4(mb_type)){
3678 if(IS_INTRA16x16(mb_type)){
3679 for(i=0; i<16; i++){
3680 if(h->non_zero_count_cache[ scan8[i] ])
3681 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3682 else if(h->mb[i*16])
3683 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3686 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3687 for(i=0; i<16; i+=di){
3688 int nnz = h->non_zero_count_cache[ scan8[i] ];
3690 if(nnz==1 && h->mb[i*16])
3691 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3693 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3698 for(i=0; i<16; i++){
3699 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3700 uint8_t * const ptr= dest_y + block_offset[i];
3701 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3707 if(simple || !(s->flags&CODEC_FLAG_GRAY)){
3708 uint8_t *dest[2] = {dest_cb, dest_cr};
3709 if(transform_bypass){
3710 idct_add = idct_dc_add = s->dsp.add_pixels4;
3712 idct_add = s->dsp.h264_idct_add;
3713 idct_dc_add = s->dsp.h264_idct_dc_add;
3714 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3715 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3718 for(i=16; i<16+8; i++){
3719 if(h->non_zero_count_cache[ scan8[i] ])
3720 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3721 else if(h->mb[i*16])
3722 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3725 for(i=16; i<16+8; i++){
3726 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3727 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3728 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3734 if(h->deblocking_filter) {
3735 if (!simple && FRAME_MBAFF) {
3736 //FIXME try deblocking one mb at a time?
3737 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3738 const int mb_y = s->mb_y - 1;
3739 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3740 const int mb_xy= mb_x + mb_y*s->mb_stride;
3741 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3742 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3743 if (!bottom) return;
3744 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3745 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3746 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3748 if(IS_INTRA(mb_type_top | mb_type_bottom))
3749 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3751 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3755 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3756 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3757 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
3758 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3761 tprintf(h->s.avctx, "call mbaff filter_mb\n");
3762 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3763 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3764 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3766 tprintf(h->s.avctx, "call filter_mb\n");
3767 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3768 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3769 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3775 * Process a macroblock; this case avoids checks for expensive uncommon cases.
3777 static void hl_decode_mb_simple(H264Context *h){
3778 hl_decode_mb_internal(h, 1);
3782 * Process a macroblock; this handles edge cases, such as interlacing.
3784 static void av_noinline hl_decode_mb_complex(H264Context *h){
3785 hl_decode_mb_internal(h, 0);
3788 static void hl_decode_mb(H264Context *h){
3789 MpegEncContext * const s = &h->s;
3790 const int mb_x= s->mb_x;
3791 const int mb_y= s->mb_y;
3792 const int mb_xy= mb_x + mb_y*s->mb_stride;
3793 const int mb_type= s->current_picture.mb_type[mb_xy];
3794 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (s->flags&CODEC_FLAG_GRAY) || s->encoding;
3800 hl_decode_mb_complex(h);
3801 else hl_decode_mb_simple(h);
3805 * fills the default_ref_list.
3807 static int fill_default_ref_list(H264Context *h){
3808 MpegEncContext * const s = &h->s;
3810 int smallest_poc_greater_than_current = -1;
3811 Picture sorted_short_ref[32];
3813 if(h->slice_type==B_TYPE){
3817 /* sort frame according to poc in B slice */
3818 for(out_i=0; out_i<h->short_ref_count; out_i++){
3820 int best_poc=INT_MAX;
3822 for(i=0; i<h->short_ref_count; i++){
3823 const int poc= h->short_ref[i]->poc;
3824 if(poc > limit && poc < best_poc){
3830 assert(best_i != INT_MIN);
3833 sorted_short_ref[out_i]= *h->short_ref[best_i];
3834 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3835 if (-1 == smallest_poc_greater_than_current) {
3836 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3837 smallest_poc_greater_than_current = out_i;
3843 if(s->picture_structure == PICT_FRAME){
3844 if(h->slice_type==B_TYPE){
3846 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3848 // find the largest poc
3849 for(list=0; list<2; list++){
3852 int step= list ? -1 : 1;
3854 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3855 while(j<0 || j>= h->short_ref_count){
3856 if(j != -99 && step == (list ? -1 : 1))
3859 j= smallest_poc_greater_than_current + (step>>1);
3861 if(sorted_short_ref[j].reference != 3) continue;
3862 h->default_ref_list[list][index ]= sorted_short_ref[j];
3863 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3866 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3867 if(h->long_ref[i] == NULL) continue;
3868 if(h->long_ref[i]->reference != 3) continue;
3870 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3871 h->default_ref_list[ list ][index++].pic_id= i;;
3874 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3875 // swap the two first elements of L1 when
3876 // L0 and L1 are identical
3877 Picture temp= h->default_ref_list[1][0];
3878 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3879 h->default_ref_list[1][1] = temp;
3882 if(index < h->ref_count[ list ])
3883 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3887 for(i=0; i<h->short_ref_count; i++){
3888 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3889 h->default_ref_list[0][index ]= *h->short_ref[i];
3890 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3892 for(i = 0; i < 16; i++){
3893 if(h->long_ref[i] == NULL) continue;
3894 if(h->long_ref[i]->reference != 3) continue;
3895 h->default_ref_list[0][index ]= *h->long_ref[i];
3896 h->default_ref_list[0][index++].pic_id= i;;
3898 if(index < h->ref_count[0])
3899 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3902 if(h->slice_type==B_TYPE){
3904 //FIXME second field balh
3908 for (i=0; i<h->ref_count[0]; i++) {
3909 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3911 if(h->slice_type==B_TYPE){
3912 for (i=0; i<h->ref_count[1]; i++) {
3913 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3920 static void print_short_term(H264Context *h);
3921 static void print_long_term(H264Context *h);
3923 static int decode_ref_pic_list_reordering(H264Context *h){
3924 MpegEncContext * const s = &h->s;
3927 print_short_term(h);
3929 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3931 for(list=0; list<h->list_count; list++){
3932 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3934 if(get_bits1(&s->gb)){
3935 int pred= h->curr_pic_num;
3937 for(index=0; ; index++){
3938 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3939 unsigned int pic_id;
3941 Picture *ref = NULL;
3943 if(reordering_of_pic_nums_idc==3)
3946 if(index >= h->ref_count[list]){
3947 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3951 if(reordering_of_pic_nums_idc<3){
3952 if(reordering_of_pic_nums_idc<2){
3953 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3955 if(abs_diff_pic_num >= h->max_pic_num){
3956 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3960 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3961 else pred+= abs_diff_pic_num;
3962 pred &= h->max_pic_num - 1;
3964 for(i= h->short_ref_count-1; i>=0; i--){
3965 ref = h->short_ref[i];
3966 assert(ref->reference == 3);
3967 assert(!ref->long_ref);
3968 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3972 ref->pic_id= ref->frame_num;
3974 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3976 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3979 ref = h->long_ref[pic_id];
3981 ref->pic_id= pic_id;
3982 assert(ref->reference == 3);
3983 assert(ref->long_ref);
3991 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3992 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3994 for(i=index; i+1<h->ref_count[list]; i++){
3995 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3998 for(; i > index; i--){
3999 h->ref_list[list][i]= h->ref_list[list][i-1];
4001 h->ref_list[list][index]= *ref;
4004 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
4010 for(list=0; list<h->list_count; list++){
4011 for(index= 0; index < h->ref_count[list]; index++){
4012 if(!h->ref_list[list][index].data[0])
4013 h->ref_list[list][index]= s->current_picture;
4017 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
4018 direct_dist_scale_factor(h);
4019 direct_ref_list_init(h);
4023 static void fill_mbaff_ref_list(H264Context *h){
4025 for(list=0; list<2; list++){ //FIXME try list_count
4026 for(i=0; i<h->ref_count[list]; i++){
4027 Picture *frame = &h->ref_list[list][i];
4028 Picture *field = &h->ref_list[list][16+2*i];
4031 field[0].linesize[j] <<= 1;
4032 field[1] = field[0];
4034 field[1].data[j] += frame->linesize[j];
4036 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
4037 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
4039 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
4040 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
4044 for(j=0; j<h->ref_count[1]; j++){
4045 for(i=0; i<h->ref_count[0]; i++)
4046 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
4047 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
4048 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
4052 static int pred_weight_table(H264Context *h){
4053 MpegEncContext * const s = &h->s;
4055 int luma_def, chroma_def;
4058 h->use_weight_chroma= 0;
4059 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
4060 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
4061 luma_def = 1<<h->luma_log2_weight_denom;
4062 chroma_def = 1<<h->chroma_log2_weight_denom;
4064 for(list=0; list<2; list++){
4065 for(i=0; i<h->ref_count[list]; i++){
4066 int luma_weight_flag, chroma_weight_flag;
4068 luma_weight_flag= get_bits1(&s->gb);
4069 if(luma_weight_flag){
4070 h->luma_weight[list][i]= get_se_golomb(&s->gb);
4071 h->luma_offset[list][i]= get_se_golomb(&s->gb);
4072 if( h->luma_weight[list][i] != luma_def
4073 || h->luma_offset[list][i] != 0)
4076 h->luma_weight[list][i]= luma_def;
4077 h->luma_offset[list][i]= 0;
4080 chroma_weight_flag= get_bits1(&s->gb);
4081 if(chroma_weight_flag){
4084 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
4085 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
4086 if( h->chroma_weight[list][i][j] != chroma_def
4087 || h->chroma_offset[list][i][j] != 0)
4088 h->use_weight_chroma= 1;
4093 h->chroma_weight[list][i][j]= chroma_def;
4094 h->chroma_offset[list][i][j]= 0;
4098 if(h->slice_type != B_TYPE) break;
4100 h->use_weight= h->use_weight || h->use_weight_chroma;
4104 static void implicit_weight_table(H264Context *h){
4105 MpegEncContext * const s = &h->s;
4107 int cur_poc = s->current_picture_ptr->poc;
4109 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
4110 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
4112 h->use_weight_chroma= 0;
4117 h->use_weight_chroma= 2;
4118 h->luma_log2_weight_denom= 5;
4119 h->chroma_log2_weight_denom= 5;
4121 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
4122 int poc0 = h->ref_list[0][ref0].poc;
4123 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
4124 int poc1 = h->ref_list[1][ref1].poc;
4125 int td = av_clip(poc1 - poc0, -128, 127);
4127 int tb = av_clip(cur_poc - poc0, -128, 127);
4128 int tx = (16384 + (FFABS(td) >> 1)) / td;
4129 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
4130 if(dist_scale_factor < -64 || dist_scale_factor > 128)
4131 h->implicit_weight[ref0][ref1] = 32;
4133 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
4135 h->implicit_weight[ref0][ref1] = 32;
4140 static inline void unreference_pic(H264Context *h, Picture *pic){
4143 if(pic == h->delayed_output_pic)
4146 for(i = 0; h->delayed_pic[i]; i++)
4147 if(pic == h->delayed_pic[i]){
4155 * instantaneous decoder refresh.
4157 static void idr(H264Context *h){
4160 for(i=0; i<16; i++){
4161 if (h->long_ref[i] != NULL) {
4162 unreference_pic(h, h->long_ref[i]);
4163 h->long_ref[i]= NULL;
4166 h->long_ref_count=0;
4168 for(i=0; i<h->short_ref_count; i++){
4169 unreference_pic(h, h->short_ref[i]);
4170 h->short_ref[i]= NULL;
4172 h->short_ref_count=0;
4175 /* forget old pics after a seek */
4176 static void flush_dpb(AVCodecContext *avctx){
4177 H264Context *h= avctx->priv_data;
4179 for(i=0; i<16; i++) {
4180 if(h->delayed_pic[i])
4181 h->delayed_pic[i]->reference= 0;
4182 h->delayed_pic[i]= NULL;
4184 if(h->delayed_output_pic)
4185 h->delayed_output_pic->reference= 0;
4186 h->delayed_output_pic= NULL;
4188 if(h->s.current_picture_ptr)
4189 h->s.current_picture_ptr->reference= 0;
4194 * @return the removed picture or NULL if an error occurs
4196 static Picture * remove_short(H264Context *h, int frame_num){
4197 MpegEncContext * const s = &h->s;
4200 if(s->avctx->debug&FF_DEBUG_MMCO)
4201 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
4203 for(i=0; i<h->short_ref_count; i++){
4204 Picture *pic= h->short_ref[i];
4205 if(s->avctx->debug&FF_DEBUG_MMCO)
4206 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
4207 if(pic->frame_num == frame_num){
4208 h->short_ref[i]= NULL;
4209 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
4210 h->short_ref_count--;
4219 * @return the removed picture or NULL if an error occurs
4221 static Picture * remove_long(H264Context *h, int i){
4224 pic= h->long_ref[i];
4225 h->long_ref[i]= NULL;
4226 if(pic) h->long_ref_count--;
4232 * print short term list
4234 static void print_short_term(H264Context *h) {
4236 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4237 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
4238 for(i=0; i<h->short_ref_count; i++){
4239 Picture *pic= h->short_ref[i];
4240 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4246 * print long term list
4248 static void print_long_term(H264Context *h) {
4250 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4251 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
4252 for(i = 0; i < 16; i++){
4253 Picture *pic= h->long_ref[i];
4255 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4262 * Executes the reference picture marking (memory management control operations).
4264 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
4265 MpegEncContext * const s = &h->s;
4267 int current_is_long=0;
4270 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
4271 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4273 for(i=0; i<mmco_count; i++){
4274 if(s->avctx->debug&FF_DEBUG_MMCO)
4275 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4277 switch(mmco[i].opcode){
4278 case MMCO_SHORT2UNUSED:
4279 pic= remove_short(h, mmco[i].short_frame_num);
4281 unreference_pic(h, pic);
4282 else if(s->avctx->debug&FF_DEBUG_MMCO)
4283 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4285 case MMCO_SHORT2LONG:
4286 pic= remove_long(h, mmco[i].long_index);
4287 if(pic) unreference_pic(h, pic);
4289 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4290 if (h->long_ref[ mmco[i].long_index ]){
4291 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4292 h->long_ref_count++;
4295 case MMCO_LONG2UNUSED:
4296 pic= remove_long(h, mmco[i].long_index);
4298 unreference_pic(h, pic);
4299 else if(s->avctx->debug&FF_DEBUG_MMCO)
4300 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4303 pic= remove_long(h, mmco[i].long_index);
4304 if(pic) unreference_pic(h, pic);
4306 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4307 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4308 h->long_ref_count++;
4312 case MMCO_SET_MAX_LONG:
4313 assert(mmco[i].long_index <= 16);
4314 // just remove the long term which index is greater than new max
4315 for(j = mmco[i].long_index; j<16; j++){
4316 pic = remove_long(h, j);
4317 if (pic) unreference_pic(h, pic);
4321 while(h->short_ref_count){
4322 pic= remove_short(h, h->short_ref[0]->frame_num);
4323 if(pic) unreference_pic(h, pic);
4325 for(j = 0; j < 16; j++) {
4326 pic= remove_long(h, j);
4327 if(pic) unreference_pic(h, pic);
4334 if(!current_is_long){
4335 pic= remove_short(h, s->current_picture_ptr->frame_num);
4337 unreference_pic(h, pic);
4338 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4341 if(h->short_ref_count)
4342 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4344 h->short_ref[0]= s->current_picture_ptr;
4345 h->short_ref[0]->long_ref=0;
4346 h->short_ref_count++;
4349 print_short_term(h);
4354 static int decode_ref_pic_marking(H264Context *h){
4355 MpegEncContext * const s = &h->s;
4358 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4359 s->broken_link= get_bits1(&s->gb) -1;
4360 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4361 if(h->mmco[0].long_index == -1)
4364 h->mmco[0].opcode= MMCO_LONG;
4368 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4369 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4370 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4372 h->mmco[i].opcode= opcode;
4373 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4374 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4375 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4376 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4380 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4381 unsigned int long_index= get_ue_golomb(&s->gb);
4382 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ long_index >= 16){
4383 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4386 h->mmco[i].long_index= long_index;
4389 if(opcode > (unsigned)MMCO_LONG){
4390 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4393 if(opcode == MMCO_END)
4398 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4400 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4401 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4402 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4412 static int init_poc(H264Context *h){
4413 MpegEncContext * const s = &h->s;
4414 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4417 if(h->nal_unit_type == NAL_IDR_SLICE){
4418 h->frame_num_offset= 0;
4420 if(h->frame_num < h->prev_frame_num)
4421 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4423 h->frame_num_offset= h->prev_frame_num_offset;
4426 if(h->sps.poc_type==0){
4427 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4429 if(h->nal_unit_type == NAL_IDR_SLICE){
4434 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4435 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4436 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4437 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4439 h->poc_msb = h->prev_poc_msb;
4440 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4442 field_poc[1] = h->poc_msb + h->poc_lsb;
4443 if(s->picture_structure == PICT_FRAME)
4444 field_poc[1] += h->delta_poc_bottom;
4445 }else if(h->sps.poc_type==1){
4446 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4449 if(h->sps.poc_cycle_length != 0)
4450 abs_frame_num = h->frame_num_offset + h->frame_num;
4454 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4457 expected_delta_per_poc_cycle = 0;
4458 for(i=0; i < h->sps.poc_cycle_length; i++)
4459 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4461 if(abs_frame_num > 0){
4462 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4463 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4465 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4466 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4467 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4471 if(h->nal_ref_idc == 0)
4472 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4474 field_poc[0] = expectedpoc + h->delta_poc[0];
4475 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4477 if(s->picture_structure == PICT_FRAME)
4478 field_poc[1] += h->delta_poc[1];
4481 if(h->nal_unit_type == NAL_IDR_SLICE){
4484 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4485 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4491 if(s->picture_structure != PICT_BOTTOM_FIELD)
4492 s->current_picture_ptr->field_poc[0]= field_poc[0];
4493 if(s->picture_structure != PICT_TOP_FIELD)
4494 s->current_picture_ptr->field_poc[1]= field_poc[1];
4495 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4496 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4502 * decodes a slice header.
4503 * this will allso call MPV_common_init() and frame_start() as needed
4505 static int decode_slice_header(H264Context *h){
4506 MpegEncContext * const s = &h->s;
4507 unsigned int first_mb_in_slice;
4508 unsigned int pps_id;
4509 int num_ref_idx_active_override_flag;
4510 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4511 unsigned int slice_type, tmp;
4512 int default_ref_list_done = 0;
4514 s->current_picture.reference= h->nal_ref_idc != 0;
4515 s->dropable= h->nal_ref_idc == 0;
4517 first_mb_in_slice= get_ue_golomb(&s->gb);
4519 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
4521 s->current_picture_ptr= NULL;
4524 slice_type= get_ue_golomb(&s->gb);
4526 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4531 h->slice_type_fixed=1;
4533 h->slice_type_fixed=0;
4535 slice_type= slice_type_map[ slice_type ];
4536 if (slice_type == I_TYPE
4537 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4538 default_ref_list_done = 1;
4540 h->slice_type= slice_type;
4542 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4544 pps_id= get_ue_golomb(&s->gb);
4545 if(pps_id>=MAX_PPS_COUNT){
4546 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4549 h->pps= h->pps_buffer[pps_id];
4550 if(h->pps.slice_group_count == 0){
4551 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4555 h->sps= h->sps_buffer[ h->pps.sps_id ];
4556 if(h->sps.log2_max_frame_num == 0){
4557 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4561 if(h->dequant_coeff_pps != pps_id){
4562 h->dequant_coeff_pps = pps_id;
4563 init_dequant_tables(h);
4566 s->mb_width= h->sps.mb_width;
4567 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4569 h->b_stride= s->mb_width*4;
4570 h->b8_stride= s->mb_width*2;
4572 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4573 if(h->sps.frame_mbs_only_flag)
4574 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4576 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4578 if (s->context_initialized
4579 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4583 if (!s->context_initialized) {
4584 if (MPV_common_init(s) < 0)
4587 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4588 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4589 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4592 for(i=0; i<16; i++){
4593 #define T(x) (x>>2) | ((x<<2) & 0xF)
4594 h->zigzag_scan[i] = T(zigzag_scan[i]);
4595 h-> field_scan[i] = T( field_scan[i]);
4599 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4600 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4601 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4602 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4603 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4606 for(i=0; i<64; i++){
4607 #define T(x) (x>>3) | ((x&7)<<3)
4608 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4609 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4610 h->field_scan8x8[i] = T(field_scan8x8[i]);
4611 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4615 if(h->sps.transform_bypass){ //FIXME same ugly
4616 h->zigzag_scan_q0 = zigzag_scan;
4617 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4618 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4619 h->field_scan_q0 = field_scan;
4620 h->field_scan8x8_q0 = field_scan8x8;
4621 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4623 h->zigzag_scan_q0 = h->zigzag_scan;
4624 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4625 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4626 h->field_scan_q0 = h->field_scan;
4627 h->field_scan8x8_q0 = h->field_scan8x8;
4628 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4633 s->avctx->width = s->width;
4634 s->avctx->height = s->height;
4635 s->avctx->sample_aspect_ratio= h->sps.sar;
4636 if(!s->avctx->sample_aspect_ratio.den)
4637 s->avctx->sample_aspect_ratio.den = 1;
4639 if(h->sps.timing_info_present_flag){
4640 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4641 if(h->x264_build > 0 && h->x264_build < 44)
4642 s->avctx->time_base.den *= 2;
4643 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4644 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4648 if(h->slice_num == 0){
4649 if(frame_start(h) < 0)
4653 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4654 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4657 h->mb_aff_frame = 0;
4658 if(h->sps.frame_mbs_only_flag){
4659 s->picture_structure= PICT_FRAME;
4661 if(get_bits1(&s->gb)) { //field_pic_flag
4662 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4663 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4665 s->picture_structure= PICT_FRAME;
4666 h->mb_aff_frame = h->sps.mb_aff;
4669 assert(s->mb_num == s->mb_width * s->mb_height);
4670 if(first_mb_in_slice << h->mb_aff_frame >= s->mb_num ||
4671 first_mb_in_slice >= s->mb_num){
4672 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4675 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4676 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4677 assert(s->mb_y < s->mb_height);
4679 if(s->picture_structure==PICT_FRAME){
4680 h->curr_pic_num= h->frame_num;
4681 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4683 h->curr_pic_num= 2*h->frame_num;
4684 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4687 if(h->nal_unit_type == NAL_IDR_SLICE){
4688 get_ue_golomb(&s->gb); /* idr_pic_id */
4691 if(h->sps.poc_type==0){
4692 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4694 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4695 h->delta_poc_bottom= get_se_golomb(&s->gb);
4699 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4700 h->delta_poc[0]= get_se_golomb(&s->gb);
4702 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4703 h->delta_poc[1]= get_se_golomb(&s->gb);
4708 if(h->pps.redundant_pic_cnt_present){
4709 h->redundant_pic_count= get_ue_golomb(&s->gb);
4712 //set defaults, might be overriden a few line later
4713 h->ref_count[0]= h->pps.ref_count[0];
4714 h->ref_count[1]= h->pps.ref_count[1];
4716 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4717 if(h->slice_type == B_TYPE){
4718 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4719 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4720 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4722 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4724 if(num_ref_idx_active_override_flag){
4725 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4726 if(h->slice_type==B_TYPE)
4727 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4729 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4730 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4731 h->ref_count[0]= h->ref_count[1]= 1;
4735 if(h->slice_type == B_TYPE)
4742 if(!default_ref_list_done){
4743 fill_default_ref_list(h);
4746 if(decode_ref_pic_list_reordering(h) < 0)
4749 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4750 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4751 pred_weight_table(h);
4752 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4753 implicit_weight_table(h);
4757 if(s->current_picture.reference)
4758 decode_ref_pic_marking(h);
4761 fill_mbaff_ref_list(h);
4763 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
4764 tmp = get_ue_golomb(&s->gb);
4766 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4769 h->cabac_init_idc= tmp;
4772 h->last_qscale_diff = 0;
4773 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4775 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4779 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4780 //FIXME qscale / qp ... stuff
4781 if(h->slice_type == SP_TYPE){
4782 get_bits1(&s->gb); /* sp_for_switch_flag */
4784 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4785 get_se_golomb(&s->gb); /* slice_qs_delta */
4788 h->deblocking_filter = 1;
4789 h->slice_alpha_c0_offset = 0;
4790 h->slice_beta_offset = 0;
4791 if( h->pps.deblocking_filter_parameters_present ) {
4792 tmp= get_ue_golomb(&s->gb);
4794 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4797 h->deblocking_filter= tmp;
4798 if(h->deblocking_filter < 2)
4799 h->deblocking_filter^= 1; // 1<->0
4801 if( h->deblocking_filter ) {
4802 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4803 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4806 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4807 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4808 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4809 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4810 h->deblocking_filter= 0;
4813 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4814 slice_group_change_cycle= get_bits(&s->gb, ?);
4819 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4820 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4822 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4823 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4825 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4827 av_get_pict_type_char(h->slice_type),
4828 pps_id, h->frame_num,
4829 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4830 h->ref_count[0], h->ref_count[1],
4832 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4834 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4838 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
4839 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4840 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4842 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4843 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4852 static inline int get_level_prefix(GetBitContext *gb){
4856 OPEN_READER(re, gb);
4857 UPDATE_CACHE(re, gb);
4858 buf=GET_CACHE(re, gb);
4860 log= 32 - av_log2(buf);
4862 print_bin(buf>>(32-log), log);
4863 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4866 LAST_SKIP_BITS(re, gb, log);
4867 CLOSE_READER(re, gb);
4872 static inline int get_dct8x8_allowed(H264Context *h){
4875 if(!IS_SUB_8X8(h->sub_mb_type[i])
4876 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4883 * decodes a residual block.
4884 * @param n block index
4885 * @param scantable scantable
4886 * @param max_coeff number of coefficients in the block
4887 * @return <0 if an error occured
4889 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4890 MpegEncContext * const s = &h->s;
4891 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4893 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4895 //FIXME put trailing_onex into the context
4897 if(n == CHROMA_DC_BLOCK_INDEX){
4898 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4899 total_coeff= coeff_token>>2;
4901 if(n == LUMA_DC_BLOCK_INDEX){
4902 total_coeff= pred_non_zero_count(h, 0);
4903 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4904 total_coeff= coeff_token>>2;
4906 total_coeff= pred_non_zero_count(h, n);
4907 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4908 total_coeff= coeff_token>>2;
4909 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4913 //FIXME set last_non_zero?
4917 if(total_coeff > (unsigned)max_coeff) {
4918 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4922 trailing_ones= coeff_token&3;
4923 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4924 assert(total_coeff<=16);
4926 for(i=0; i<trailing_ones; i++){
4927 level[i]= 1 - 2*get_bits1(gb);
4931 int level_code, mask;
4932 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4933 int prefix= get_level_prefix(gb);
4935 //first coefficient has suffix_length equal to 0 or 1
4936 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4938 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4940 level_code= (prefix<<suffix_length); //part
4941 }else if(prefix==14){
4943 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4945 level_code= prefix + get_bits(gb, 4); //part
4946 }else if(prefix==15){
4947 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4948 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4950 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4954 if(trailing_ones < 3) level_code += 2;
4959 mask= -(level_code&1);
4960 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4963 //remaining coefficients have suffix_length > 0
4964 for(;i<total_coeff;i++) {
4965 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4966 prefix = get_level_prefix(gb);
4968 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4969 }else if(prefix==15){
4970 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4972 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4975 mask= -(level_code&1);
4976 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4977 if(level_code > suffix_limit[suffix_length])
4982 if(total_coeff == max_coeff)
4985 if(n == CHROMA_DC_BLOCK_INDEX)
4986 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4988 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4991 coeff_num = zeros_left + total_coeff - 1;
4992 j = scantable[coeff_num];
4994 block[j] = level[0];
4995 for(i=1;i<total_coeff;i++) {
4998 else if(zeros_left < 7){
4999 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5001 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5003 zeros_left -= run_before;
5004 coeff_num -= 1 + run_before;
5005 j= scantable[ coeff_num ];
5010 block[j] = (level[0] * qmul[j] + 32)>>6;
5011 for(i=1;i<total_coeff;i++) {
5014 else if(zeros_left < 7){
5015 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5017 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5019 zeros_left -= run_before;
5020 coeff_num -= 1 + run_before;
5021 j= scantable[ coeff_num ];
5023 block[j]= (level[i] * qmul[j] + 32)>>6;
5028 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
5035 static void predict_field_decoding_flag(H264Context *h){
5036 MpegEncContext * const s = &h->s;
5037 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5038 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
5039 ? s->current_picture.mb_type[mb_xy-1]
5040 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
5041 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
5043 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
5047 * decodes a P_SKIP or B_SKIP macroblock
5049 static void decode_mb_skip(H264Context *h){
5050 MpegEncContext * const s = &h->s;
5051 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5054 memset(h->non_zero_count[mb_xy], 0, 16);
5055 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
5058 mb_type|= MB_TYPE_INTERLACED;
5060 if( h->slice_type == B_TYPE )
5062 // just for fill_caches. pred_direct_motion will set the real mb_type
5063 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
5065 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5066 pred_direct_motion(h, &mb_type);
5067 mb_type|= MB_TYPE_SKIP;
5072 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
5074 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5075 pred_pskip_motion(h, &mx, &my);
5076 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
5077 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
5080 write_back_motion(h, mb_type);
5081 s->current_picture.mb_type[mb_xy]= mb_type;
5082 s->current_picture.qscale_table[mb_xy]= s->qscale;
5083 h->slice_table[ mb_xy ]= h->slice_num;
5084 h->prev_mb_skipped= 1;
5088 * decodes a macroblock
5089 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5091 static int decode_mb_cavlc(H264Context *h){
5092 MpegEncContext * const s = &h->s;
5093 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5094 int partition_count;
5095 unsigned int mb_type, cbp;
5096 int dct8x8_allowed= h->pps.transform_8x8_mode;
5098 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
5100 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5101 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
5103 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
5104 if(s->mb_skip_run==-1)
5105 s->mb_skip_run= get_ue_golomb(&s->gb);
5107 if (s->mb_skip_run--) {
5108 if(FRAME_MBAFF && (s->mb_y&1) == 0){
5109 if(s->mb_skip_run==0)
5110 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5112 predict_field_decoding_flag(h);
5119 if( (s->mb_y&1) == 0 )
5120 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5122 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5124 h->prev_mb_skipped= 0;
5126 mb_type= get_ue_golomb(&s->gb);
5127 if(h->slice_type == B_TYPE){
5129 partition_count= b_mb_type_info[mb_type].partition_count;
5130 mb_type= b_mb_type_info[mb_type].type;
5133 goto decode_intra_mb;
5135 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
5137 partition_count= p_mb_type_info[mb_type].partition_count;
5138 mb_type= p_mb_type_info[mb_type].type;
5141 goto decode_intra_mb;
5144 assert(h->slice_type == I_TYPE);
5147 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
5151 cbp= i_mb_type_info[mb_type].cbp;
5152 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5153 mb_type= i_mb_type_info[mb_type].type;
5157 mb_type |= MB_TYPE_INTERLACED;
5159 h->slice_table[ mb_xy ]= h->slice_num;
5161 if(IS_INTRA_PCM(mb_type)){
5164 // we assume these blocks are very rare so we dont optimize it
5165 align_get_bits(&s->gb);
5167 // The pixels are stored in the same order as levels in h->mb array.
5168 for(y=0; y<16; y++){
5169 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5170 for(x=0; x<16; x++){
5171 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5172 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
5176 const int index= 256 + 4*(y&3) + 32*(y>>2);
5178 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5179 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5183 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5185 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5186 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5190 // In deblocking, the quantizer is 0
5191 s->current_picture.qscale_table[mb_xy]= 0;
5192 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5193 // All coeffs are present
5194 memset(h->non_zero_count[mb_xy], 16, 16);
5196 s->current_picture.mb_type[mb_xy]= mb_type;
5201 h->ref_count[0] <<= 1;
5202 h->ref_count[1] <<= 1;
5205 fill_caches(h, mb_type, 0);
5208 if(IS_INTRA(mb_type)){
5210 // init_top_left_availability(h);
5211 if(IS_INTRA4x4(mb_type)){
5214 if(dct8x8_allowed && get_bits1(&s->gb)){
5215 mb_type |= MB_TYPE_8x8DCT;
5219 // fill_intra4x4_pred_table(h);
5220 for(i=0; i<16; i+=di){
5221 int mode= pred_intra_mode(h, i);
5223 if(!get_bits1(&s->gb)){
5224 const int rem_mode= get_bits(&s->gb, 3);
5225 mode = rem_mode + (rem_mode >= mode);
5229 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5231 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
5233 write_back_intra_pred_mode(h);
5234 if( check_intra4x4_pred_mode(h) < 0)
5237 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
5238 if(h->intra16x16_pred_mode < 0)
5242 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
5245 h->chroma_pred_mode= pred_mode;
5246 }else if(partition_count==4){
5247 int i, j, sub_partition_count[4], list, ref[2][4];
5249 if(h->slice_type == B_TYPE){
5251 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5252 if(h->sub_mb_type[i] >=13){
5253 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5256 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5257 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5259 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5260 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5261 pred_direct_motion(h, &mb_type);
5262 h->ref_cache[0][scan8[4]] =
5263 h->ref_cache[1][scan8[4]] =
5264 h->ref_cache[0][scan8[12]] =
5265 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5268 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
5270 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5271 if(h->sub_mb_type[i] >=4){
5272 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5275 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5276 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5280 for(list=0; list<h->list_count; list++){
5281 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5283 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5284 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5285 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
5287 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
5299 dct8x8_allowed = get_dct8x8_allowed(h);
5301 for(list=0; list<h->list_count; list++){
5302 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5305 if(IS_DIRECT(h->sub_mb_type[i])) {
5306 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
5309 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
5310 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5312 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5313 const int sub_mb_type= h->sub_mb_type[i];
5314 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5315 for(j=0; j<sub_partition_count[i]; j++){
5317 const int index= 4*i + block_width*j;
5318 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5319 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
5320 mx += get_se_golomb(&s->gb);
5321 my += get_se_golomb(&s->gb);
5322 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5324 if(IS_SUB_8X8(sub_mb_type)){
5326 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5328 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5329 }else if(IS_SUB_8X4(sub_mb_type)){
5330 mv_cache[ 1 ][0]= mx;
5331 mv_cache[ 1 ][1]= my;
5332 }else if(IS_SUB_4X8(sub_mb_type)){
5333 mv_cache[ 8 ][0]= mx;
5334 mv_cache[ 8 ][1]= my;
5336 mv_cache[ 0 ][0]= mx;
5337 mv_cache[ 0 ][1]= my;
5340 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5346 }else if(IS_DIRECT(mb_type)){
5347 pred_direct_motion(h, &mb_type);
5348 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5350 int list, mx, my, i;
5351 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5352 if(IS_16X16(mb_type)){
5353 for(list=0; list<h->list_count; list++){
5355 if(IS_DIR(mb_type, 0, list)){
5356 val= get_te0_golomb(&s->gb, h->ref_count[list]);
5357 if(val >= h->ref_count[list]){
5358 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5362 val= LIST_NOT_USED&0xFF;
5363 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5365 for(list=0; list<h->list_count; list++){
5367 if(IS_DIR(mb_type, 0, list)){
5368 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5369 mx += get_se_golomb(&s->gb);
5370 my += get_se_golomb(&s->gb);
5371 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5373 val= pack16to32(mx,my);
5376 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
5379 else if(IS_16X8(mb_type)){
5380 for(list=0; list<h->list_count; list++){
5383 if(IS_DIR(mb_type, i, list)){
5384 val= get_te0_golomb(&s->gb, h->ref_count[list]);
5385 if(val >= h->ref_count[list]){
5386 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5390 val= LIST_NOT_USED&0xFF;
5391 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5394 for(list=0; list<h->list_count; list++){
5397 if(IS_DIR(mb_type, i, list)){
5398 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5399 mx += get_se_golomb(&s->gb);
5400 my += get_se_golomb(&s->gb);
5401 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5403 val= pack16to32(mx,my);
5406 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
5410 assert(IS_8X16(mb_type));
5411 for(list=0; list<h->list_count; list++){
5414 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5415 val= get_te0_golomb(&s->gb, h->ref_count[list]);
5416 if(val >= h->ref_count[list]){
5417 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5421 val= LIST_NOT_USED&0xFF;
5422 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5425 for(list=0; list<h->list_count; list++){
5428 if(IS_DIR(mb_type, i, list)){
5429 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5430 mx += get_se_golomb(&s->gb);
5431 my += get_se_golomb(&s->gb);
5432 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5434 val= pack16to32(mx,my);
5437 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
5443 if(IS_INTER(mb_type))
5444 write_back_motion(h, mb_type);
5446 if(!IS_INTRA16x16(mb_type)){
5447 cbp= get_ue_golomb(&s->gb);
5449 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
5453 if(IS_INTRA4x4(mb_type))
5454 cbp= golomb_to_intra4x4_cbp[cbp];
5456 cbp= golomb_to_inter_cbp[cbp];
5460 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5461 if(get_bits1(&s->gb))
5462 mb_type |= MB_TYPE_8x8DCT;
5464 s->current_picture.mb_type[mb_xy]= mb_type;
5466 if(cbp || IS_INTRA16x16(mb_type)){
5467 int i8x8, i4x4, chroma_idx;
5468 int chroma_qp, dquant;
5469 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5470 const uint8_t *scan, *scan8x8, *dc_scan;
5472 // fill_non_zero_count_cache(h);
5474 if(IS_INTERLACED(mb_type)){
5475 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5476 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5477 dc_scan= luma_dc_field_scan;
5479 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5480 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5481 dc_scan= luma_dc_zigzag_scan;
5484 dquant= get_se_golomb(&s->gb);
5486 if( dquant > 25 || dquant < -26 ){
5487 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5491 s->qscale += dquant;
5492 if(((unsigned)s->qscale) > 51){
5493 if(s->qscale<0) s->qscale+= 52;
5494 else s->qscale-= 52;
5497 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5498 if(IS_INTRA16x16(mb_type)){
5499 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5500 return -1; //FIXME continue if partitioned and other return -1 too
5503 assert((cbp&15) == 0 || (cbp&15) == 15);
5506 for(i8x8=0; i8x8<4; i8x8++){
5507 for(i4x4=0; i4x4<4; i4x4++){
5508 const int index= i4x4 + 4*i8x8;
5509 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5515 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5518 for(i8x8=0; i8x8<4; i8x8++){
5519 if(cbp & (1<<i8x8)){
5520 if(IS_8x8DCT(mb_type)){
5521 DCTELEM *buf = &h->mb[64*i8x8];
5523 for(i4x4=0; i4x4<4; i4x4++){
5524 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5525 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5528 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5529 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5531 for(i4x4=0; i4x4<4; i4x4++){
5532 const int index= i4x4 + 4*i8x8;
5534 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5540 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5541 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5547 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5548 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5554 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5555 for(i4x4=0; i4x4<4; i4x4++){
5556 const int index= 16 + 4*chroma_idx + i4x4;
5557 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5563 uint8_t * const nnz= &h->non_zero_count_cache[0];
5564 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5565 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5568 uint8_t * const nnz= &h->non_zero_count_cache[0];
5569 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5570 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5571 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5573 s->current_picture.qscale_table[mb_xy]= s->qscale;
5574 write_back_non_zero_count(h);
5577 h->ref_count[0] >>= 1;
5578 h->ref_count[1] >>= 1;
5584 static int decode_cabac_field_decoding_flag(H264Context *h) {
5585 MpegEncContext * const s = &h->s;
5586 const int mb_x = s->mb_x;
5587 const int mb_y = s->mb_y & ~1;
5588 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5589 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5591 unsigned int ctx = 0;
5593 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5596 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5600 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5603 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5604 uint8_t *state= &h->cabac_state[ctx_base];
5608 MpegEncContext * const s = &h->s;
5609 const int mba_xy = h->left_mb_xy[0];
5610 const int mbb_xy = h->top_mb_xy;
5612 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5614 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5616 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5617 return 0; /* I4x4 */
5620 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5621 return 0; /* I4x4 */
5624 if( get_cabac_terminate( &h->cabac ) )
5625 return 25; /* PCM */
5627 mb_type = 1; /* I16x16 */
5628 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5629 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5630 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5631 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5632 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5636 static int decode_cabac_mb_type( H264Context *h ) {
5637 MpegEncContext * const s = &h->s;
5639 if( h->slice_type == I_TYPE ) {
5640 return decode_cabac_intra_mb_type(h, 3, 1);
5641 } else if( h->slice_type == P_TYPE ) {
5642 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5644 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5645 /* P_L0_D16x16, P_8x8 */
5646 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5648 /* P_L0_D8x16, P_L0_D16x8 */
5649 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5652 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5654 } else if( h->slice_type == B_TYPE ) {
5655 const int mba_xy = h->left_mb_xy[0];
5656 const int mbb_xy = h->top_mb_xy;
5660 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5662 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5665 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5666 return 0; /* B_Direct_16x16 */
5668 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5669 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5672 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5673 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5674 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5675 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5677 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5678 else if( bits == 13 ) {
5679 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5680 } else if( bits == 14 )
5681 return 11; /* B_L1_L0_8x16 */
5682 else if( bits == 15 )
5683 return 22; /* B_8x8 */
5685 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5686 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5688 /* TODO SI/SP frames? */
5693 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5694 MpegEncContext * const s = &h->s;
5698 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5699 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5702 && h->slice_table[mba_xy] == h->slice_num
5703 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5704 mba_xy += s->mb_stride;
5706 mbb_xy = mb_xy - s->mb_stride;
5708 && h->slice_table[mbb_xy] == h->slice_num
5709 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5710 mbb_xy -= s->mb_stride;
5712 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5714 int mb_xy = mb_x + mb_y*s->mb_stride;
5716 mbb_xy = mb_xy - s->mb_stride;
5719 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5721 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5724 if( h->slice_type == B_TYPE )
5726 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5729 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5732 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5735 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5736 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5737 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5739 if( mode >= pred_mode )
5745 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5746 const int mba_xy = h->left_mb_xy[0];
5747 const int mbb_xy = h->top_mb_xy;
5751 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5752 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5755 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5758 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5761 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5763 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5769 static const uint8_t block_idx_x[16] = {
5770 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5772 static const uint8_t block_idx_y[16] = {
5773 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5775 static const uint8_t block_idx_xy[4][4] = {
5782 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5787 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5789 tprintf(h->s.avctx, "cbp_b = top_cbp = %x\n", cbp_b);
5792 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5797 x = block_idx_x[4*i8x8];
5798 y = block_idx_y[4*i8x8];
5802 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5803 cbp_a = h->left_cbp;
5804 tprintf(h->s.avctx, "cbp_a = left_cbp = %x\n", cbp_a);
5810 /* No need to test for skip as we put 0 for skip block */
5811 /* No need to test for IPCM as we put 1 for IPCM block */
5813 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5814 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5819 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5820 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5824 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5830 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5834 cbp_a = (h->left_cbp>>4)&0x03;
5835 cbp_b = (h-> top_cbp>>4)&0x03;
5838 if( cbp_a > 0 ) ctx++;
5839 if( cbp_b > 0 ) ctx += 2;
5840 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5844 if( cbp_a == 2 ) ctx++;
5845 if( cbp_b == 2 ) ctx += 2;
5846 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5848 static int decode_cabac_mb_dqp( H264Context *h) {
5849 MpegEncContext * const s = &h->s;
5855 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5857 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5859 if( h->last_qscale_diff != 0 )
5862 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5868 if(val > 102) //prevent infinite loop
5875 return -(val + 1)/2;
5877 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5878 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5880 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5882 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5886 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5888 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5889 return 0; /* B_Direct_8x8 */
5890 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5891 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5893 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5894 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5895 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5898 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5899 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5903 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5904 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5907 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5908 int refa = h->ref_cache[list][scan8[n] - 1];
5909 int refb = h->ref_cache[list][scan8[n] - 8];
5913 if( h->slice_type == B_TYPE) {
5914 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5916 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5925 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5931 if(ref >= 32 /*h->ref_list[list]*/){
5932 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5933 return 0; //FIXME we should return -1 and check the return everywhere
5939 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5940 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5941 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5942 int ctxbase = (l == 0) ? 40 : 47;
5947 else if( amvd > 32 )
5952 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5957 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5965 while( get_cabac_bypass( &h->cabac ) ) {
5969 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5974 if( get_cabac_bypass( &h->cabac ) )
5978 return get_cabac_bypass_sign( &h->cabac, -mvd );
5981 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5986 nza = h->left_cbp&0x100;
5987 nzb = h-> top_cbp&0x100;
5988 } else if( cat == 1 || cat == 2 ) {
5989 nza = h->non_zero_count_cache[scan8[idx] - 1];
5990 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5991 } else if( cat == 3 ) {
5992 nza = (h->left_cbp>>(6+idx))&0x01;
5993 nzb = (h-> top_cbp>>(6+idx))&0x01;
5996 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5997 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
6006 return ctx + 4 * cat;
6009 static const __attribute((used)) uint8_t last_coeff_flag_offset_8x8[63] = {
6010 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6011 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
6012 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
6013 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
6016 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
6017 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
6018 static const int significant_coeff_flag_offset[2][6] = {
6019 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
6020 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
6022 static const int last_coeff_flag_offset[2][6] = {
6023 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
6024 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
6026 static const int coeff_abs_level_m1_offset[6] = {
6027 227+0, 227+10, 227+20, 227+30, 227+39, 426
6029 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
6030 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
6031 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
6032 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
6033 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
6034 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
6035 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
6036 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
6037 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
6043 int coeff_count = 0;
6046 int abslevelgt1 = 0;
6048 uint8_t *significant_coeff_ctx_base;
6049 uint8_t *last_coeff_ctx_base;
6050 uint8_t *abs_level_m1_ctx_base;
6053 #define CABAC_ON_STACK
6055 #ifdef CABAC_ON_STACK
6058 cc.range = h->cabac.range;
6059 cc.low = h->cabac.low;
6060 cc.bytestream= h->cabac.bytestream;
6062 #define CC &h->cabac
6066 /* cat: 0-> DC 16x16 n = 0
6067 * 1-> AC 16x16 n = luma4x4idx
6068 * 2-> Luma4x4 n = luma4x4idx
6069 * 3-> DC Chroma n = iCbCr
6070 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
6071 * 5-> Luma8x8 n = 4 * luma8x8idx
6074 /* read coded block flag */
6076 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
6077 if( cat == 1 || cat == 2 )
6078 h->non_zero_count_cache[scan8[n]] = 0;
6080 h->non_zero_count_cache[scan8[16+n]] = 0;
6081 #ifdef CABAC_ON_STACK
6082 h->cabac.range = cc.range ;
6083 h->cabac.low = cc.low ;
6084 h->cabac.bytestream= cc.bytestream;
6090 significant_coeff_ctx_base = h->cabac_state
6091 + significant_coeff_flag_offset[MB_FIELD][cat];
6092 last_coeff_ctx_base = h->cabac_state
6093 + last_coeff_flag_offset[MB_FIELD][cat];
6094 abs_level_m1_ctx_base = h->cabac_state
6095 + coeff_abs_level_m1_offset[cat];
6098 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
6099 for(last= 0; last < coefs; last++) { \
6100 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
6101 if( get_cabac( CC, sig_ctx )) { \
6102 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
6103 index[coeff_count++] = last; \
6104 if( get_cabac( CC, last_ctx ) ) { \
6110 if( last == max_coeff -1 ) {\
6111 index[coeff_count++] = last;\
6113 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
6114 #if defined(ARCH_X86) && defined(CONFIG_7REGS) && defined(CONFIG_EBX_AVAILABLE)
6115 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
6117 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
6119 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
6121 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
6124 assert(coeff_count > 0);
6127 h->cbp_table[mb_xy] |= 0x100;
6128 else if( cat == 1 || cat == 2 )
6129 h->non_zero_count_cache[scan8[n]] = coeff_count;
6131 h->cbp_table[mb_xy] |= 0x40 << n;
6133 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
6136 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
6139 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
6140 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
6141 int j= scantable[index[coeff_count]];
6143 if( get_cabac( CC, ctx ) == 0 ) {
6145 block[j] = get_cabac_bypass_sign( CC, -1);
6147 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
6153 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
6154 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
6158 if( coeff_abs >= 15 ) {
6160 while( get_cabac_bypass( CC ) ) {
6166 coeff_abs += coeff_abs + get_cabac_bypass( CC );
6172 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
6173 else block[j] = coeff_abs;
6175 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
6176 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
6182 #ifdef CABAC_ON_STACK
6183 h->cabac.range = cc.range ;
6184 h->cabac.low = cc.low ;
6185 h->cabac.bytestream= cc.bytestream;
6190 static void inline compute_mb_neighbors(H264Context *h)
6192 MpegEncContext * const s = &h->s;
6193 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
6194 h->top_mb_xy = mb_xy - s->mb_stride;
6195 h->left_mb_xy[0] = mb_xy - 1;
6197 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
6198 const int top_pair_xy = pair_xy - s->mb_stride;
6199 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
6200 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
6201 const int curr_mb_frame_flag = !MB_FIELD;
6202 const int bottom = (s->mb_y & 1);
6204 ? !curr_mb_frame_flag // bottom macroblock
6205 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
6207 h->top_mb_xy -= s->mb_stride;
6209 if (left_mb_frame_flag != curr_mb_frame_flag) {
6210 h->left_mb_xy[0] = pair_xy - 1;
6217 * decodes a macroblock
6218 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
6220 static int decode_mb_cabac(H264Context *h) {
6221 MpegEncContext * const s = &h->s;
6222 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
6223 int mb_type, partition_count, cbp = 0;
6224 int dct8x8_allowed= h->pps.transform_8x8_mode;
6226 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
6228 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
6229 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
6231 /* a skipped mb needs the aff flag from the following mb */
6232 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
6233 predict_field_decoding_flag(h);
6234 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
6235 skip = h->next_mb_skipped;
6237 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
6238 /* read skip flags */
6240 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
6241 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
6242 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
6243 if(h->next_mb_skipped)
6244 predict_field_decoding_flag(h);
6246 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6251 h->cbp_table[mb_xy] = 0;
6252 h->chroma_pred_mode_table[mb_xy] = 0;
6253 h->last_qscale_diff = 0;
6260 if( (s->mb_y&1) == 0 )
6262 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6264 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
6266 h->prev_mb_skipped = 0;
6268 compute_mb_neighbors(h);
6269 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
6270 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
6274 if( h->slice_type == B_TYPE ) {
6276 partition_count= b_mb_type_info[mb_type].partition_count;
6277 mb_type= b_mb_type_info[mb_type].type;
6280 goto decode_intra_mb;
6282 } else if( h->slice_type == P_TYPE ) {
6284 partition_count= p_mb_type_info[mb_type].partition_count;
6285 mb_type= p_mb_type_info[mb_type].type;
6288 goto decode_intra_mb;
6291 assert(h->slice_type == I_TYPE);
6293 partition_count = 0;
6294 cbp= i_mb_type_info[mb_type].cbp;
6295 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
6296 mb_type= i_mb_type_info[mb_type].type;
6299 mb_type |= MB_TYPE_INTERLACED;
6301 h->slice_table[ mb_xy ]= h->slice_num;
6303 if(IS_INTRA_PCM(mb_type)) {
6307 // We assume these blocks are very rare so we dont optimize it.
6308 // FIXME The two following lines get the bitstream position in the cabac
6309 // decode, I think it should be done by a function in cabac.h (or cabac.c).
6310 ptr= h->cabac.bytestream;
6311 if(h->cabac.low&0x1) ptr--;
6313 if(h->cabac.low&0x1FF) ptr--;
6316 // The pixels are stored in the same order as levels in h->mb array.
6317 for(y=0; y<16; y++){
6318 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
6319 for(x=0; x<16; x++){
6320 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
6321 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
6325 const int index= 256 + 4*(y&3) + 32*(y>>2);
6327 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
6328 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6332 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
6334 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
6335 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6339 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6341 // All blocks are present
6342 h->cbp_table[mb_xy] = 0x1ef;
6343 h->chroma_pred_mode_table[mb_xy] = 0;
6344 // In deblocking, the quantizer is 0
6345 s->current_picture.qscale_table[mb_xy]= 0;
6346 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
6347 // All coeffs are present
6348 memset(h->non_zero_count[mb_xy], 16, 16);
6349 s->current_picture.mb_type[mb_xy]= mb_type;
6354 h->ref_count[0] <<= 1;
6355 h->ref_count[1] <<= 1;
6358 fill_caches(h, mb_type, 0);
6360 if( IS_INTRA( mb_type ) ) {
6362 if( IS_INTRA4x4( mb_type ) ) {
6363 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6364 mb_type |= MB_TYPE_8x8DCT;
6365 for( i = 0; i < 16; i+=4 ) {
6366 int pred = pred_intra_mode( h, i );
6367 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6368 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6371 for( i = 0; i < 16; i++ ) {
6372 int pred = pred_intra_mode( h, i );
6373 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6375 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6378 write_back_intra_pred_mode(h);
6379 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6381 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6382 if( h->intra16x16_pred_mode < 0 ) return -1;
6384 h->chroma_pred_mode_table[mb_xy] =
6385 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6387 pred_mode= check_intra_pred_mode( h, pred_mode );
6388 if( pred_mode < 0 ) return -1;
6389 h->chroma_pred_mode= pred_mode;
6390 } else if( partition_count == 4 ) {
6391 int i, j, sub_partition_count[4], list, ref[2][4];
6393 if( h->slice_type == B_TYPE ) {
6394 for( i = 0; i < 4; i++ ) {
6395 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6396 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6397 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6399 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
6400 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
6401 pred_direct_motion(h, &mb_type);
6402 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6403 for( i = 0; i < 4; i++ )
6404 if( IS_DIRECT(h->sub_mb_type[i]) )
6405 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6409 for( i = 0; i < 4; i++ ) {
6410 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6411 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6412 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6416 for( list = 0; list < h->list_count; list++ ) {
6417 for( i = 0; i < 4; i++ ) {
6418 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6419 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6420 if( h->ref_count[list] > 1 )
6421 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6427 h->ref_cache[list][ scan8[4*i]+1 ]=
6428 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6433 dct8x8_allowed = get_dct8x8_allowed(h);
6435 for(list=0; list<h->list_count; list++){
6437 if(IS_DIRECT(h->sub_mb_type[i])){
6438 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6441 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6443 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6444 const int sub_mb_type= h->sub_mb_type[i];
6445 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6446 for(j=0; j<sub_partition_count[i]; j++){
6449 const int index= 4*i + block_width*j;
6450 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6451 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6452 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6454 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6455 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6456 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6458 if(IS_SUB_8X8(sub_mb_type)){
6460 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6462 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6465 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6467 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6468 }else if(IS_SUB_8X4(sub_mb_type)){
6469 mv_cache[ 1 ][0]= mx;
6470 mv_cache[ 1 ][1]= my;
6472 mvd_cache[ 1 ][0]= mx - mpx;
6473 mvd_cache[ 1 ][1]= my - mpy;
6474 }else if(IS_SUB_4X8(sub_mb_type)){
6475 mv_cache[ 8 ][0]= mx;
6476 mv_cache[ 8 ][1]= my;
6478 mvd_cache[ 8 ][0]= mx - mpx;
6479 mvd_cache[ 8 ][1]= my - mpy;
6481 mv_cache[ 0 ][0]= mx;
6482 mv_cache[ 0 ][1]= my;
6484 mvd_cache[ 0 ][0]= mx - mpx;
6485 mvd_cache[ 0 ][1]= my - mpy;
6488 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6489 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6490 p[0] = p[1] = p[8] = p[9] = 0;
6491 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6495 } else if( IS_DIRECT(mb_type) ) {
6496 pred_direct_motion(h, &mb_type);
6497 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6498 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6499 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6501 int list, mx, my, i, mpx, mpy;
6502 if(IS_16X16(mb_type)){
6503 for(list=0; list<h->list_count; list++){
6504 if(IS_DIR(mb_type, 0, list)){
6505 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6506 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6508 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
6510 for(list=0; list<h->list_count; list++){
6511 if(IS_DIR(mb_type, 0, list)){
6512 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6514 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6515 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6516 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6518 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6519 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6521 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6524 else if(IS_16X8(mb_type)){
6525 for(list=0; list<h->list_count; list++){
6527 if(IS_DIR(mb_type, i, list)){
6528 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6529 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6531 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6534 for(list=0; list<h->list_count; list++){
6536 if(IS_DIR(mb_type, i, list)){
6537 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6538 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6539 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6540 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6542 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6543 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6545 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6546 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6551 assert(IS_8X16(mb_type));
6552 for(list=0; list<h->list_count; list++){
6554 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6555 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6556 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6558 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6561 for(list=0; list<h->list_count; list++){
6563 if(IS_DIR(mb_type, i, list)){
6564 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6565 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6566 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6568 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6569 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6570 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6572 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6573 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6580 if( IS_INTER( mb_type ) ) {
6581 h->chroma_pred_mode_table[mb_xy] = 0;
6582 write_back_motion( h, mb_type );
6585 if( !IS_INTRA16x16( mb_type ) ) {
6586 cbp = decode_cabac_mb_cbp_luma( h );
6587 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6590 h->cbp_table[mb_xy] = h->cbp = cbp;
6592 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6593 if( decode_cabac_mb_transform_size( h ) )
6594 mb_type |= MB_TYPE_8x8DCT;
6596 s->current_picture.mb_type[mb_xy]= mb_type;
6598 if( cbp || IS_INTRA16x16( mb_type ) ) {
6599 const uint8_t *scan, *scan8x8, *dc_scan;
6602 if(IS_INTERLACED(mb_type)){
6603 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6604 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6605 dc_scan= luma_dc_field_scan;
6607 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6608 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6609 dc_scan= luma_dc_zigzag_scan;
6612 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6613 if( dqp == INT_MIN ){
6614 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6618 if(((unsigned)s->qscale) > 51){
6619 if(s->qscale<0) s->qscale+= 52;
6620 else s->qscale-= 52;
6622 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6624 if( IS_INTRA16x16( mb_type ) ) {
6626 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6627 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6630 for( i = 0; i < 16; i++ ) {
6631 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6632 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6636 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6640 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6641 if( cbp & (1<<i8x8) ) {
6642 if( IS_8x8DCT(mb_type) ) {
6643 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6644 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6647 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6648 const int index = 4*i8x8 + i4x4;
6649 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6651 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6653 //STOP_TIMER("decode_residual")
6656 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6657 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6664 for( c = 0; c < 2; c++ ) {
6665 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6666 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6673 for( c = 0; c < 2; c++ ) {
6674 for( i = 0; i < 4; i++ ) {
6675 const int index = 16 + 4 * c + i;
6676 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6677 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6682 uint8_t * const nnz= &h->non_zero_count_cache[0];
6683 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6684 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6687 uint8_t * const nnz= &h->non_zero_count_cache[0];
6688 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6689 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6690 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6691 h->last_qscale_diff = 0;
6694 s->current_picture.qscale_table[mb_xy]= s->qscale;
6695 write_back_non_zero_count(h);
6698 h->ref_count[0] >>= 1;
6699 h->ref_count[1] >>= 1;
6706 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6708 const int index_a = qp + h->slice_alpha_c0_offset;
6709 const int alpha = (alpha_table+52)[index_a];
6710 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6715 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6716 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6718 /* 16px edge length, because bS=4 is triggered by being at
6719 * the edge of an intra MB, so all 4 bS are the same */
6720 for( d = 0; d < 16; d++ ) {
6721 const int p0 = pix[-1];
6722 const int p1 = pix[-2];
6723 const int p2 = pix[-3];
6725 const int q0 = pix[0];
6726 const int q1 = pix[1];
6727 const int q2 = pix[2];
6729 if( FFABS( p0 - q0 ) < alpha &&
6730 FFABS( p1 - p0 ) < beta &&
6731 FFABS( q1 - q0 ) < beta ) {
6733 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6734 if( FFABS( p2 - p0 ) < beta)
6736 const int p3 = pix[-4];
6738 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6739 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6740 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6743 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6745 if( FFABS( q2 - q0 ) < beta)
6747 const int q3 = pix[3];
6749 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6750 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6751 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6754 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6758 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6759 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6761 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6767 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6769 const int index_a = qp + h->slice_alpha_c0_offset;
6770 const int alpha = (alpha_table+52)[index_a];
6771 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6776 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6777 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6779 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6783 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6785 for( i = 0; i < 16; i++, pix += stride) {
6791 int bS_index = (i >> 1);
6794 bS_index |= (i & 1);
6797 if( bS[bS_index] == 0 ) {
6801 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6802 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6803 alpha = (alpha_table+52)[index_a];
6804 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6806 if( bS[bS_index] < 4 ) {
6807 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6808 const int p0 = pix[-1];
6809 const int p1 = pix[-2];
6810 const int p2 = pix[-3];
6811 const int q0 = pix[0];
6812 const int q1 = pix[1];
6813 const int q2 = pix[2];
6815 if( FFABS( p0 - q0 ) < alpha &&
6816 FFABS( p1 - p0 ) < beta &&
6817 FFABS( q1 - q0 ) < beta ) {
6821 if( FFABS( p2 - p0 ) < beta ) {
6822 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6825 if( FFABS( q2 - q0 ) < beta ) {
6826 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6830 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6831 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6832 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6833 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6836 const int p0 = pix[-1];
6837 const int p1 = pix[-2];
6838 const int p2 = pix[-3];
6840 const int q0 = pix[0];
6841 const int q1 = pix[1];
6842 const int q2 = pix[2];
6844 if( FFABS( p0 - q0 ) < alpha &&
6845 FFABS( p1 - p0 ) < beta &&
6846 FFABS( q1 - q0 ) < beta ) {
6848 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6849 if( FFABS( p2 - p0 ) < beta)
6851 const int p3 = pix[-4];
6853 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6854 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6855 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6858 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6860 if( FFABS( q2 - q0 ) < beta)
6862 const int q3 = pix[3];
6864 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6865 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6866 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6869 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6873 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6874 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6876 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6881 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6883 for( i = 0; i < 8; i++, pix += stride) {
6891 if( bS[bS_index] == 0 ) {
6895 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6896 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6897 alpha = (alpha_table+52)[index_a];
6898 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6900 if( bS[bS_index] < 4 ) {
6901 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6902 const int p0 = pix[-1];
6903 const int p1 = pix[-2];
6904 const int q0 = pix[0];
6905 const int q1 = pix[1];
6907 if( FFABS( p0 - q0 ) < alpha &&
6908 FFABS( p1 - p0 ) < beta &&
6909 FFABS( q1 - q0 ) < beta ) {
6910 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6912 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6913 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6914 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6917 const int p0 = pix[-1];
6918 const int p1 = pix[-2];
6919 const int q0 = pix[0];
6920 const int q1 = pix[1];
6922 if( FFABS( p0 - q0 ) < alpha &&
6923 FFABS( p1 - p0 ) < beta &&
6924 FFABS( q1 - q0 ) < beta ) {
6926 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6927 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6928 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6934 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6936 const int index_a = qp + h->slice_alpha_c0_offset;
6937 const int alpha = (alpha_table+52)[index_a];
6938 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6939 const int pix_next = stride;
6944 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6945 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6947 /* 16px edge length, see filter_mb_edgev */
6948 for( d = 0; d < 16; d++ ) {
6949 const int p0 = pix[-1*pix_next];
6950 const int p1 = pix[-2*pix_next];
6951 const int p2 = pix[-3*pix_next];
6952 const int q0 = pix[0];
6953 const int q1 = pix[1*pix_next];
6954 const int q2 = pix[2*pix_next];
6956 if( FFABS( p0 - q0 ) < alpha &&
6957 FFABS( p1 - p0 ) < beta &&
6958 FFABS( q1 - q0 ) < beta ) {
6960 const int p3 = pix[-4*pix_next];
6961 const int q3 = pix[ 3*pix_next];
6963 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6964 if( FFABS( p2 - p0 ) < beta) {
6966 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6967 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6968 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6971 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6973 if( FFABS( q2 - q0 ) < beta) {
6975 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6976 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6977 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6980 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6984 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6985 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6987 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6994 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6996 const int index_a = qp + h->slice_alpha_c0_offset;
6997 const int alpha = (alpha_table+52)[index_a];
6998 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
7003 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
7004 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
7006 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
7010 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7011 MpegEncContext * const s = &h->s;
7013 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
7015 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) {
7016 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
7019 assert(!FRAME_MBAFF);
7021 mb_xy = mb_x + mb_y*s->mb_stride;
7022 mb_type = s->current_picture.mb_type[mb_xy];
7023 qp = s->current_picture.qscale_table[mb_xy];
7024 qp0 = s->current_picture.qscale_table[mb_xy-1];
7025 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
7026 qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
7027 qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
7028 qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
7029 qp0 = (qp + qp0 + 1) >> 1;
7030 qp1 = (qp + qp1 + 1) >> 1;
7031 qpc0 = (qpc + qpc0 + 1) >> 1;
7032 qpc1 = (qpc + qpc1 + 1) >> 1;
7033 qp_thresh = 15 - h->slice_alpha_c0_offset;
7034 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
7035 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
7038 if( IS_INTRA(mb_type) ) {
7039 int16_t bS4[4] = {4,4,4,4};
7040 int16_t bS3[4] = {3,3,3,3};
7041 if( IS_8x8DCT(mb_type) ) {
7042 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7043 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7044 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7045 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7047 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7048 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
7049 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7050 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
7051 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7052 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
7053 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7054 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
7056 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
7057 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
7058 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
7059 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
7060 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7061 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
7062 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7063 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
7066 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
7067 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
7069 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
7071 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
7073 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
7074 (mb_type & MB_TYPE_16x8) ? 1 : 0;
7075 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
7076 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
7078 int step = IS_8x8DCT(mb_type) ? 2 : 1;
7079 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
7080 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
7081 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
7083 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
7084 bSv[0][0] = 0x0004000400040004ULL;
7085 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
7086 bSv[1][0] = 0x0004000400040004ULL;
7088 #define FILTER(hv,dir,edge)\
7089 if(bSv[dir][edge]) {\
7090 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
7092 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7093 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7099 } else if( IS_8x8DCT(mb_type) ) {
7118 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7119 MpegEncContext * const s = &h->s;
7120 const int mb_xy= mb_x + mb_y*s->mb_stride;
7121 const int mb_type = s->current_picture.mb_type[mb_xy];
7122 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
7123 int first_vertical_edge_done = 0;
7125 /* FIXME: A given frame may occupy more than one position in
7126 * the reference list. So ref2frm should be populated with
7127 * frame numbers, not indices. */
7128 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
7129 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7131 //for sufficiently low qp, filtering wouldn't do anything
7132 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
7134 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
7135 int qp = s->current_picture.qscale_table[mb_xy];
7137 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
7138 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
7144 // left mb is in picture
7145 && h->slice_table[mb_xy-1] != 255
7146 // and current and left pair do not have the same interlaced type
7147 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
7148 // and left mb is in the same slice if deblocking_filter == 2
7149 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
7150 /* First vertical edge is different in MBAFF frames
7151 * There are 8 different bS to compute and 2 different Qp
7153 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
7154 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
7158 int mb_qp, mbn0_qp, mbn1_qp;
7160 first_vertical_edge_done = 1;
7162 if( IS_INTRA(mb_type) )
7163 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
7165 for( i = 0; i < 8; i++ ) {
7166 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
7168 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
7170 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
7171 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
7172 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
7179 mb_qp = s->current_picture.qscale_table[mb_xy];
7180 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
7181 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
7182 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
7183 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7184 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
7185 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
7186 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7187 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
7190 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
7191 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
7192 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
7193 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
7194 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
7196 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
7197 for( dir = 0; dir < 2; dir++ )
7200 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
7201 const int mbm_type = s->current_picture.mb_type[mbm_xy];
7202 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
7204 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
7205 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
7206 // how often to recheck mv-based bS when iterating between edges
7207 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
7208 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
7209 // how often to recheck mv-based bS when iterating along each edge
7210 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
7212 if (first_vertical_edge_done) {
7214 first_vertical_edge_done = 0;
7217 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
7220 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
7221 && !IS_INTERLACED(mb_type)
7222 && IS_INTERLACED(mbm_type)
7224 // This is a special case in the norm where the filtering must
7225 // be done twice (one each of the field) even if we are in a
7226 // frame macroblock.
7228 static const int nnz_idx[4] = {4,5,6,3};
7229 unsigned int tmp_linesize = 2 * linesize;
7230 unsigned int tmp_uvlinesize = 2 * uvlinesize;
7231 int mbn_xy = mb_xy - 2 * s->mb_stride;
7236 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
7237 if( IS_INTRA(mb_type) ||
7238 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
7239 bS[0] = bS[1] = bS[2] = bS[3] = 3;
7241 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
7242 for( i = 0; i < 4; i++ ) {
7243 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
7244 mbn_nnz[nnz_idx[i]] != 0 )
7250 // Do not use s->qscale as luma quantizer because it has not the same
7251 // value in IPCM macroblocks.
7252 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7253 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
7254 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
7255 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
7256 chroma_qp = ( h->chroma_qp +
7257 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7258 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7259 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7266 for( edge = start; edge < edges; edge++ ) {
7267 /* mbn_xy: neighbor macroblock */
7268 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
7269 const int mbn_type = s->current_picture.mb_type[mbn_xy];
7273 if( (edge&1) && IS_8x8DCT(mb_type) )
7276 if( IS_INTRA(mb_type) ||
7277 IS_INTRA(mbn_type) ) {
7280 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
7281 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
7290 bS[0] = bS[1] = bS[2] = bS[3] = value;
7295 if( edge & mask_edge ) {
7296 bS[0] = bS[1] = bS[2] = bS[3] = 0;
7299 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
7300 bS[0] = bS[1] = bS[2] = bS[3] = 1;
7303 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
7304 int b_idx= 8 + 4 + edge * (dir ? 8:1);
7305 int bn_idx= b_idx - (dir ? 8:1);
7307 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
7308 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7309 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7310 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
7312 bS[0] = bS[1] = bS[2] = bS[3] = v;
7318 for( i = 0; i < 4; i++ ) {
7319 int x = dir == 0 ? edge : i;
7320 int y = dir == 0 ? i : edge;
7321 int b_idx= 8 + 4 + x + 8*y;
7322 int bn_idx= b_idx - (dir ? 8:1);
7324 if( h->non_zero_count_cache[b_idx] != 0 ||
7325 h->non_zero_count_cache[bn_idx] != 0 ) {
7331 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
7332 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7333 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7334 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7342 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7347 // Do not use s->qscale as luma quantizer because it has not the same
7348 // value in IPCM macroblocks.
7349 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7350 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7351 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7352 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
7354 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7355 if( (edge&1) == 0 ) {
7356 int chroma_qp = ( h->chroma_qp +
7357 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7358 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
7359 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
7362 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7363 if( (edge&1) == 0 ) {
7364 int chroma_qp = ( h->chroma_qp +
7365 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7366 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7367 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7374 static int decode_slice(H264Context *h){
7375 MpegEncContext * const s = &h->s;
7376 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7380 if( h->pps.cabac ) {
7384 align_get_bits( &s->gb );
7387 ff_init_cabac_states( &h->cabac);
7388 ff_init_cabac_decoder( &h->cabac,
7389 s->gb.buffer + get_bits_count(&s->gb)/8,
7390 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7391 /* calculate pre-state */
7392 for( i= 0; i < 460; i++ ) {
7394 if( h->slice_type == I_TYPE )
7395 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7397 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7400 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
7402 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7407 int ret = decode_mb_cabac(h);
7409 //STOP_TIMER("decode_mb_cabac")
7411 if(ret>=0) hl_decode_mb(h);
7413 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7416 if(ret>=0) ret = decode_mb_cabac(h);
7418 if(ret>=0) hl_decode_mb(h);
7421 eos = get_cabac_terminate( &h->cabac );
7423 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
7424 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7425 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7429 if( ++s->mb_x >= s->mb_width ) {
7431 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7438 if( eos || s->mb_y >= s->mb_height ) {
7439 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7440 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7447 int ret = decode_mb_cavlc(h);
7449 if(ret>=0) hl_decode_mb(h);
7451 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7453 ret = decode_mb_cavlc(h);
7455 if(ret>=0) hl_decode_mb(h);
7460 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7461 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7466 if(++s->mb_x >= s->mb_width){
7468 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7473 if(s->mb_y >= s->mb_height){
7474 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7476 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7477 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7481 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7488 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7489 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7490 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7491 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7495 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7504 for(;s->mb_y < s->mb_height; s->mb_y++){
7505 for(;s->mb_x < s->mb_width; s->mb_x++){
7506 int ret= decode_mb(h);
7511 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7512 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7517 if(++s->mb_x >= s->mb_width){
7519 if(++s->mb_y >= s->mb_height){
7520 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7521 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7525 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7532 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7533 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7534 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7538 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7545 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7548 return -1; //not reached
7551 static int decode_unregistered_user_data(H264Context *h, int size){
7552 MpegEncContext * const s = &h->s;
7553 uint8_t user_data[16+256];
7559 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7560 user_data[i]= get_bits(&s->gb, 8);
7564 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7565 if(e==1 && build>=0)
7566 h->x264_build= build;
7568 if(s->avctx->debug & FF_DEBUG_BUGS)
7569 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7572 skip_bits(&s->gb, 8);
7577 static int decode_sei(H264Context *h){
7578 MpegEncContext * const s = &h->s;
7580 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7585 type+= show_bits(&s->gb, 8);
7586 }while(get_bits(&s->gb, 8) == 255);
7590 size+= show_bits(&s->gb, 8);
7591 }while(get_bits(&s->gb, 8) == 255);
7595 if(decode_unregistered_user_data(h, size) < 0)
7599 skip_bits(&s->gb, 8*size);
7602 //FIXME check bits here
7603 align_get_bits(&s->gb);
7609 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7610 MpegEncContext * const s = &h->s;
7612 cpb_count = get_ue_golomb(&s->gb) + 1;
7613 get_bits(&s->gb, 4); /* bit_rate_scale */
7614 get_bits(&s->gb, 4); /* cpb_size_scale */
7615 for(i=0; i<cpb_count; i++){
7616 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7617 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7618 get_bits1(&s->gb); /* cbr_flag */
7620 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7621 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7622 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7623 get_bits(&s->gb, 5); /* time_offset_length */
7626 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7627 MpegEncContext * const s = &h->s;
7628 int aspect_ratio_info_present_flag;
7629 unsigned int aspect_ratio_idc;
7630 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7632 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7634 if( aspect_ratio_info_present_flag ) {
7635 aspect_ratio_idc= get_bits(&s->gb, 8);
7636 if( aspect_ratio_idc == EXTENDED_SAR ) {
7637 sps->sar.num= get_bits(&s->gb, 16);
7638 sps->sar.den= get_bits(&s->gb, 16);
7639 }else if(aspect_ratio_idc < 14){
7640 sps->sar= pixel_aspect[aspect_ratio_idc];
7642 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7649 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7651 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7652 get_bits1(&s->gb); /* overscan_appropriate_flag */
7655 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7656 get_bits(&s->gb, 3); /* video_format */
7657 get_bits1(&s->gb); /* video_full_range_flag */
7658 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7659 get_bits(&s->gb, 8); /* colour_primaries */
7660 get_bits(&s->gb, 8); /* transfer_characteristics */
7661 get_bits(&s->gb, 8); /* matrix_coefficients */
7665 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7666 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7667 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7670 sps->timing_info_present_flag = get_bits1(&s->gb);
7671 if(sps->timing_info_present_flag){
7672 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7673 sps->time_scale = get_bits_long(&s->gb, 32);
7674 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7677 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7678 if(nal_hrd_parameters_present_flag)
7679 decode_hrd_parameters(h, sps);
7680 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7681 if(vcl_hrd_parameters_present_flag)
7682 decode_hrd_parameters(h, sps);
7683 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7684 get_bits1(&s->gb); /* low_delay_hrd_flag */
7685 get_bits1(&s->gb); /* pic_struct_present_flag */
7687 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7688 if(sps->bitstream_restriction_flag){
7689 unsigned int num_reorder_frames;
7690 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7691 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7692 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7693 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7694 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7695 num_reorder_frames= get_ue_golomb(&s->gb);
7696 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7698 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7699 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7703 sps->num_reorder_frames= num_reorder_frames;
7709 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7710 const uint8_t *jvt_list, const uint8_t *fallback_list){
7711 MpegEncContext * const s = &h->s;
7712 int i, last = 8, next = 8;
7713 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7714 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7715 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7717 for(i=0;i<size;i++){
7719 next = (last + get_se_golomb(&s->gb)) & 0xff;
7720 if(!i && !next){ /* matrix not written, we use the preset one */
7721 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7724 last = factors[scan[i]] = next ? next : last;
7728 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7729 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7730 MpegEncContext * const s = &h->s;
7731 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7732 const uint8_t *fallback[4] = {
7733 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7734 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7735 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7736 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7738 if(get_bits1(&s->gb)){
7739 sps->scaling_matrix_present |= is_sps;
7740 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7741 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7742 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7743 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7744 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7745 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7746 if(is_sps || pps->transform_8x8_mode){
7747 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7748 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7750 } else if(fallback_sps) {
7751 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7752 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7756 static inline int decode_seq_parameter_set(H264Context *h){
7757 MpegEncContext * const s = &h->s;
7758 int profile_idc, level_idc;
7759 unsigned int sps_id, tmp, mb_width, mb_height;
7763 profile_idc= get_bits(&s->gb, 8);
7764 get_bits1(&s->gb); //constraint_set0_flag
7765 get_bits1(&s->gb); //constraint_set1_flag
7766 get_bits1(&s->gb); //constraint_set2_flag
7767 get_bits1(&s->gb); //constraint_set3_flag
7768 get_bits(&s->gb, 4); // reserved
7769 level_idc= get_bits(&s->gb, 8);
7770 sps_id= get_ue_golomb(&s->gb);
7772 if (sps_id >= MAX_SPS_COUNT){
7773 // ok it has gone out of hand, someone is sending us bad stuff.
7774 av_log(h->s.avctx, AV_LOG_ERROR, "illegal sps_id (%d)\n", sps_id);
7778 sps= &h->sps_buffer[ sps_id ];
7779 sps->profile_idc= profile_idc;
7780 sps->level_idc= level_idc;
7782 if(sps->profile_idc >= 100){ //high profile
7783 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7784 get_bits1(&s->gb); //residual_color_transform_flag
7785 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7786 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7787 sps->transform_bypass = get_bits1(&s->gb);
7788 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7790 sps->scaling_matrix_present = 0;
7792 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7793 sps->poc_type= get_ue_golomb(&s->gb);
7795 if(sps->poc_type == 0){ //FIXME #define
7796 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7797 } else if(sps->poc_type == 1){//FIXME #define
7798 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7799 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7800 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7801 tmp= get_ue_golomb(&s->gb);
7803 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7804 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7807 sps->poc_cycle_length= tmp;
7809 for(i=0; i<sps->poc_cycle_length; i++)
7810 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7811 }else if(sps->poc_type != 2){
7812 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7816 tmp= get_ue_golomb(&s->gb);
7817 if(tmp > MAX_PICTURE_COUNT-2){
7818 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7820 sps->ref_frame_count= tmp;
7821 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7822 mb_width= get_ue_golomb(&s->gb) + 1;
7823 mb_height= get_ue_golomb(&s->gb) + 1;
7824 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7825 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7826 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7829 sps->mb_width = mb_width;
7830 sps->mb_height= mb_height;
7832 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7833 if(!sps->frame_mbs_only_flag)
7834 sps->mb_aff= get_bits1(&s->gb);
7838 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7840 #ifndef ALLOW_INTERLACE
7842 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7844 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7845 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7847 sps->crop= get_bits1(&s->gb);
7849 sps->crop_left = get_ue_golomb(&s->gb);
7850 sps->crop_right = get_ue_golomb(&s->gb);
7851 sps->crop_top = get_ue_golomb(&s->gb);
7852 sps->crop_bottom= get_ue_golomb(&s->gb);
7853 if(sps->crop_left || sps->crop_top){
7854 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7860 sps->crop_bottom= 0;
7863 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7864 if( sps->vui_parameters_present_flag )
7865 decode_vui_parameters(h, sps);
7867 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7868 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7869 sps_id, sps->profile_idc, sps->level_idc,
7871 sps->ref_frame_count,
7872 sps->mb_width, sps->mb_height,
7873 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7874 sps->direct_8x8_inference_flag ? "8B8" : "",
7875 sps->crop_left, sps->crop_right,
7876 sps->crop_top, sps->crop_bottom,
7877 sps->vui_parameters_present_flag ? "VUI" : ""
7883 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7884 MpegEncContext * const s = &h->s;
7885 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7888 if(pps_id>=MAX_PPS_COUNT){
7889 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
7892 pps = &h->pps_buffer[pps_id];
7894 tmp= get_ue_golomb(&s->gb);
7895 if(tmp>=MAX_SPS_COUNT){
7896 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7901 pps->cabac= get_bits1(&s->gb);
7902 pps->pic_order_present= get_bits1(&s->gb);
7903 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7904 if(pps->slice_group_count > 1 ){
7905 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7906 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7907 switch(pps->mb_slice_group_map_type){
7910 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7911 | run_length[ i ] |1 |ue(v) |
7916 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7918 | top_left_mb[ i ] |1 |ue(v) |
7919 | bottom_right_mb[ i ] |1 |ue(v) |
7927 | slice_group_change_direction_flag |1 |u(1) |
7928 | slice_group_change_rate_minus1 |1 |ue(v) |
7933 | slice_group_id_cnt_minus1 |1 |ue(v) |
7934 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7936 | slice_group_id[ i ] |1 |u(v) |
7941 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7942 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7943 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7944 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7945 pps->ref_count[0]= pps->ref_count[1]= 1;
7949 pps->weighted_pred= get_bits1(&s->gb);
7950 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7951 pps->init_qp= get_se_golomb(&s->gb) + 26;
7952 pps->init_qs= get_se_golomb(&s->gb) + 26;
7953 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7954 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7955 pps->constrained_intra_pred= get_bits1(&s->gb);
7956 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7958 pps->transform_8x8_mode= 0;
7959 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7960 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7961 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7963 if(get_bits_count(&s->gb) < bit_length){
7964 pps->transform_8x8_mode= get_bits1(&s->gb);
7965 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7966 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7969 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7970 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7971 pps_id, pps->sps_id,
7972 pps->cabac ? "CABAC" : "CAVLC",
7973 pps->slice_group_count,
7974 pps->ref_count[0], pps->ref_count[1],
7975 pps->weighted_pred ? "weighted" : "",
7976 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7977 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7978 pps->constrained_intra_pred ? "CONSTR" : "",
7979 pps->redundant_pic_cnt_present ? "REDU" : "",
7980 pps->transform_8x8_mode ? "8x8DCT" : ""
7988 * finds the end of the current frame in the bitstream.
7989 * @return the position of the first byte of the next frame, or -1
7991 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7994 ParseContext *pc = &(h->s.parse_context);
7995 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7996 // mb_addr= pc->mb_addr - 1;
8001 for(i=0; i<buf_size; i++){
8003 for(; i<buf_size; i++){
8010 if(buf[i]==1) state^= 5; //2->7, 1->4, 0->5
8011 else if(buf[i]) state = 7;
8012 else state>>=1; //2->1, 1->0, 0->0
8014 int v= buf[i] & 0x1F;
8015 if(v==7 || v==8 || v==9){
8016 if(pc->frame_start_found){
8020 pc->frame_start_found= 0;
8023 }else if(v==1 || v==2 || v==5){
8024 if(pc->frame_start_found){
8028 pc->frame_start_found = 1;
8038 return END_NOT_FOUND;
8041 #ifdef CONFIG_H264_PARSER
8042 static int h264_parse(AVCodecParserContext *s,
8043 AVCodecContext *avctx,
8044 uint8_t **poutbuf, int *poutbuf_size,
8045 const uint8_t *buf, int buf_size)
8047 H264Context *h = s->priv_data;
8048 ParseContext *pc = &h->s.parse_context;
8051 if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){
8054 next= find_frame_end(h, buf, buf_size);
8056 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
8062 if(next<0 && next != END_NOT_FOUND){
8063 assert(pc->last_index + next >= 0 );
8064 find_frame_end(h, &pc->buffer[pc->last_index + next], -next); //update state
8068 *poutbuf = (uint8_t *)buf;
8069 *poutbuf_size = buf_size;
8073 static int h264_split(AVCodecContext *avctx,
8074 const uint8_t *buf, int buf_size)
8077 uint32_t state = -1;
8080 for(i=0; i<=buf_size; i++){
8081 if((state&0xFFFFFF1F) == 0x107)
8083 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
8085 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
8087 while(i>4 && buf[i-5]==0) i--;
8092 state= (state<<8) | buf[i];
8096 #endif /* CONFIG_H264_PARSER */
8098 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
8099 MpegEncContext * const s = &h->s;
8100 AVCodecContext * const avctx= s->avctx;
8104 for(i=0; i<50; i++){
8105 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
8108 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
8110 s->current_picture_ptr= NULL;
8121 if(buf_index >= buf_size) break;
8123 for(i = 0; i < h->nal_length_size; i++)
8124 nalsize = (nalsize << 8) | buf[buf_index++];
8125 if(nalsize <= 1 || nalsize > buf_size){
8130 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
8135 // start code prefix search
8136 for(; buf_index + 3 < buf_size; buf_index++){
8137 // This should always succeed in the first iteration.
8138 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
8142 if(buf_index+3 >= buf_size) break;
8147 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
8148 if (ptr==NULL || dst_length < 0){
8151 while(ptr[dst_length - 1] == 0 && dst_length > 0)
8153 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
8155 if(s->avctx->debug&FF_DEBUG_STARTCODE){
8156 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
8159 if (h->is_avc && (nalsize != consumed))
8160 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
8162 buf_index += consumed;
8164 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
8165 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
8168 switch(h->nal_unit_type){
8170 idr(h); //FIXME ensure we don't loose some frames if there is reordering
8172 init_get_bits(&s->gb, ptr, bit_length);
8174 h->inter_gb_ptr= &s->gb;
8175 s->data_partitioning = 0;
8177 if(decode_slice_header(h) < 0){
8178 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8181 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
8182 if(h->redundant_pic_count==0 && s->hurry_up < 5
8183 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8184 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8185 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8186 && avctx->skip_frame < AVDISCARD_ALL)
8190 init_get_bits(&s->gb, ptr, bit_length);
8192 h->inter_gb_ptr= NULL;
8193 s->data_partitioning = 1;
8195 if(decode_slice_header(h) < 0){
8196 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8200 init_get_bits(&h->intra_gb, ptr, bit_length);
8201 h->intra_gb_ptr= &h->intra_gb;
8204 init_get_bits(&h->inter_gb, ptr, bit_length);
8205 h->inter_gb_ptr= &h->inter_gb;
8207 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
8208 && s->context_initialized
8210 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8211 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8212 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8213 && avctx->skip_frame < AVDISCARD_ALL)
8217 init_get_bits(&s->gb, ptr, bit_length);
8221 init_get_bits(&s->gb, ptr, bit_length);
8222 decode_seq_parameter_set(h);
8224 if(s->flags& CODEC_FLAG_LOW_DELAY)
8227 if(avctx->has_b_frames < 2)
8228 avctx->has_b_frames= !s->low_delay;
8231 init_get_bits(&s->gb, ptr, bit_length);
8233 decode_picture_parameter_set(h, bit_length);
8237 case NAL_END_SEQUENCE:
8238 case NAL_END_STREAM:
8239 case NAL_FILLER_DATA:
8241 case NAL_AUXILIARY_SLICE:
8244 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
8252 * returns the number of bytes consumed for building the current frame
8254 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
8255 if(s->flags&CODEC_FLAG_TRUNCATED){
8256 pos -= s->parse_context.last_index;
8257 if(pos<0) pos=0; // FIXME remove (unneeded?)
8261 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
8262 if(pos+10>buf_size) pos=buf_size; // oops ;)
8268 static int decode_frame(AVCodecContext *avctx,
8269 void *data, int *data_size,
8270 uint8_t *buf, int buf_size)
8272 H264Context *h = avctx->priv_data;
8273 MpegEncContext *s = &h->s;
8274 AVFrame *pict = data;
8277 s->flags= avctx->flags;
8278 s->flags2= avctx->flags2;
8280 /* no supplementary picture */
8281 if (buf_size == 0) {
8285 //FIXME factorize this with the output code below
8286 out = h->delayed_pic[0];
8288 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8289 if(h->delayed_pic[i]->poc < out->poc){
8290 out = h->delayed_pic[i];
8294 for(i=out_idx; h->delayed_pic[i]; i++)
8295 h->delayed_pic[i] = h->delayed_pic[i+1];
8298 *data_size = sizeof(AVFrame);
8299 *pict= *(AVFrame*)out;
8305 if(s->flags&CODEC_FLAG_TRUNCATED){
8306 int next= find_frame_end(h, buf, buf_size);
8308 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
8310 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
8313 if(h->is_avc && !h->got_avcC) {
8314 int i, cnt, nalsize;
8315 unsigned char *p = avctx->extradata;
8316 if(avctx->extradata_size < 7) {
8317 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
8321 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
8324 /* sps and pps in the avcC always have length coded with 2 bytes,
8325 so put a fake nal_length_size = 2 while parsing them */
8326 h->nal_length_size = 2;
8327 // Decode sps from avcC
8328 cnt = *(p+5) & 0x1f; // Number of sps
8330 for (i = 0; i < cnt; i++) {
8331 nalsize = AV_RB16(p) + 2;
8332 if(decode_nal_units(h, p, nalsize) < 0) {
8333 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
8338 // Decode pps from avcC
8339 cnt = *(p++); // Number of pps
8340 for (i = 0; i < cnt; i++) {
8341 nalsize = AV_RB16(p) + 2;
8342 if(decode_nal_units(h, p, nalsize) != nalsize) {
8343 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
8348 // Now store right nal length size, that will be use to parse all other nals
8349 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
8350 // Do not reparse avcC
8354 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
8355 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
8359 buf_index=decode_nal_units(h, buf, buf_size);
8363 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
8364 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
8368 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
8369 Picture *out = s->current_picture_ptr;
8370 Picture *cur = s->current_picture_ptr;
8371 Picture *prev = h->delayed_output_pic;
8372 int i, pics, cross_idr, out_of_order, out_idx;
8376 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
8377 s->current_picture_ptr->pict_type= s->pict_type;
8379 h->prev_frame_num_offset= h->frame_num_offset;
8380 h->prev_frame_num= h->frame_num;
8381 if(s->current_picture_ptr->reference){
8382 h->prev_poc_msb= h->poc_msb;
8383 h->prev_poc_lsb= h->poc_lsb;
8385 if(s->current_picture_ptr->reference)
8386 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
8392 //FIXME do something with unavailable reference frames
8394 #if 0 //decode order
8395 *data_size = sizeof(AVFrame);
8397 /* Sort B-frames into display order */
8399 if(h->sps.bitstream_restriction_flag
8400 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
8401 s->avctx->has_b_frames = h->sps.num_reorder_frames;
8406 while(h->delayed_pic[pics]) pics++;
8408 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
8410 h->delayed_pic[pics++] = cur;
8411 if(cur->reference == 0)
8415 for(i=0; h->delayed_pic[i]; i++)
8416 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
8419 out = h->delayed_pic[0];
8421 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8422 if(h->delayed_pic[i]->poc < out->poc){
8423 out = h->delayed_pic[i];
8427 out_of_order = !cross_idr && prev && out->poc < prev->poc;
8428 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
8430 else if(prev && pics <= s->avctx->has_b_frames)
8432 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8434 ((!cross_idr && prev && out->poc > prev->poc + 2)
8435 || cur->pict_type == B_TYPE)))
8438 s->avctx->has_b_frames++;
8441 else if(out_of_order)
8444 if(out_of_order || pics > s->avctx->has_b_frames){
8445 for(i=out_idx; h->delayed_pic[i]; i++)
8446 h->delayed_pic[i] = h->delayed_pic[i+1];
8452 *data_size = sizeof(AVFrame);
8453 if(prev && prev != out && prev->reference == 1)
8454 prev->reference = 0;
8455 h->delayed_output_pic = out;
8459 *pict= *(AVFrame*)out;
8461 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8464 assert(pict->data[0] || !*data_size);
8465 ff_print_debug_info(s, pict);
8466 //printf("out %d\n", (int)pict->data[0]);
8469 /* Return the Picture timestamp as the frame number */
8470 /* we substract 1 because it is added on utils.c */
8471 avctx->frame_number = s->picture_number - 1;
8473 return get_consumed_bytes(s, buf_index, buf_size);
8476 static inline void fill_mb_avail(H264Context *h){
8477 MpegEncContext * const s = &h->s;
8478 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8481 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8482 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8483 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8489 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8490 h->mb_avail[4]= 1; //FIXME move out
8491 h->mb_avail[5]= 0; //FIXME move out
8497 #define SIZE (COUNT*40)
8503 // int int_temp[10000];
8505 AVCodecContext avctx;
8507 dsputil_init(&dsp, &avctx);
8509 init_put_bits(&pb, temp, SIZE);
8510 printf("testing unsigned exp golomb\n");
8511 for(i=0; i<COUNT; i++){
8513 set_ue_golomb(&pb, i);
8514 STOP_TIMER("set_ue_golomb");
8516 flush_put_bits(&pb);
8518 init_get_bits(&gb, temp, 8*SIZE);
8519 for(i=0; i<COUNT; i++){
8522 s= show_bits(&gb, 24);
8525 j= get_ue_golomb(&gb);
8527 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8530 STOP_TIMER("get_ue_golomb");
8534 init_put_bits(&pb, temp, SIZE);
8535 printf("testing signed exp golomb\n");
8536 for(i=0; i<COUNT; i++){
8538 set_se_golomb(&pb, i - COUNT/2);
8539 STOP_TIMER("set_se_golomb");
8541 flush_put_bits(&pb);
8543 init_get_bits(&gb, temp, 8*SIZE);
8544 for(i=0; i<COUNT; i++){
8547 s= show_bits(&gb, 24);
8550 j= get_se_golomb(&gb);
8551 if(j != i - COUNT/2){
8552 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8555 STOP_TIMER("get_se_golomb");
8558 printf("testing 4x4 (I)DCT\n");
8561 uint8_t src[16], ref[16];
8562 uint64_t error= 0, max_error=0;
8564 for(i=0; i<COUNT; i++){
8566 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8567 for(j=0; j<16; j++){
8568 ref[j]= random()%255;
8569 src[j]= random()%255;
8572 h264_diff_dct_c(block, src, ref, 4);
8575 for(j=0; j<16; j++){
8576 // printf("%d ", block[j]);
8577 block[j]= block[j]*4;
8578 if(j&1) block[j]= (block[j]*4 + 2)/5;
8579 if(j&4) block[j]= (block[j]*4 + 2)/5;
8583 s->dsp.h264_idct_add(ref, block, 4);
8584 /* for(j=0; j<16; j++){
8585 printf("%d ", ref[j]);
8589 for(j=0; j<16; j++){
8590 int diff= FFABS(src[j] - ref[j]);
8593 max_error= FFMAX(max_error, diff);
8596 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8598 printf("testing quantizer\n");
8599 for(qp=0; qp<52; qp++){
8601 src1_block[i]= src2_block[i]= random()%255;
8605 printf("Testing NAL layer\n");
8607 uint8_t bitstream[COUNT];
8608 uint8_t nal[COUNT*2];
8610 memset(&h, 0, sizeof(H264Context));
8612 for(i=0; i<COUNT; i++){
8620 for(j=0; j<COUNT; j++){
8621 bitstream[j]= (random() % 255) + 1;
8624 for(j=0; j<zeros; j++){
8625 int pos= random() % COUNT;
8626 while(bitstream[pos] == 0){
8635 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8637 printf("encoding failed\n");
8641 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8645 if(out_length != COUNT){
8646 printf("incorrect length %d %d\n", out_length, COUNT);
8650 if(consumed != nal_length){
8651 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8655 if(memcmp(bitstream, out, COUNT)){
8656 printf("missmatch\n");
8661 printf("Testing RBSP\n");
8669 static int decode_end(AVCodecContext *avctx)
8671 H264Context *h = avctx->priv_data;
8672 MpegEncContext *s = &h->s;
8674 av_freep(&h->rbsp_buffer);
8675 free_tables(h); //FIXME cleanup init stuff perhaps
8678 // memset(h, 0, sizeof(H264Context));
8684 AVCodec h264_decoder = {
8688 sizeof(H264Context),
8693 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8697 #ifdef CONFIG_H264_PARSER
8698 AVCodecParser h264_parser = {
8700 sizeof(H264Context),