2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * H.264 / AVC / MPEG4 part10 codec.
26 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
41 #define interlaced_dct interlaced_dct_is_a_bad_name
42 #define mb_intra mb_intra_isnt_initalized_see_mb_type
44 #define LUMA_DC_BLOCK_INDEX 25
45 #define CHROMA_DC_BLOCK_INDEX 26
47 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
48 #define COEFF_TOKEN_VLC_BITS 8
49 #define TOTAL_ZEROS_VLC_BITS 9
50 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
51 #define RUN_VLC_BITS 3
52 #define RUN7_VLC_BITS 6
54 #define MAX_SPS_COUNT 32
55 #define MAX_PPS_COUNT 256
57 #define MAX_MMCO_COUNT 66
59 /* Compiling in interlaced support reduces the speed
60 * of progressive decoding by about 2%. */
61 #define ALLOW_INTERLACE
63 #ifdef ALLOW_INTERLACE
64 #define MB_MBAFF h->mb_mbaff
65 #define MB_FIELD h->mb_field_decoding_flag
66 #define FRAME_MBAFF h->mb_aff_frame
72 #define IS_INTERLACED(mb_type) 0
76 * Sequence parameter set
82 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
83 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
84 int poc_type; ///< pic_order_cnt_type
85 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
86 int delta_pic_order_always_zero_flag;
87 int offset_for_non_ref_pic;
88 int offset_for_top_to_bottom_field;
89 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
90 int ref_frame_count; ///< num_ref_frames
91 int gaps_in_frame_num_allowed_flag;
92 int mb_width; ///< frame_width_in_mbs_minus1 + 1
93 int mb_height; ///< frame_height_in_mbs_minus1 + 1
94 int frame_mbs_only_flag;
95 int mb_aff; ///<mb_adaptive_frame_field_flag
96 int direct_8x8_inference_flag;
97 int crop; ///< frame_cropping_flag
98 int crop_left; ///< frame_cropping_rect_left_offset
99 int crop_right; ///< frame_cropping_rect_right_offset
100 int crop_top; ///< frame_cropping_rect_top_offset
101 int crop_bottom; ///< frame_cropping_rect_bottom_offset
102 int vui_parameters_present_flag;
104 int timing_info_present_flag;
105 uint32_t num_units_in_tick;
107 int fixed_frame_rate_flag;
108 short offset_for_ref_frame[256]; //FIXME dyn aloc?
109 int bitstream_restriction_flag;
110 int num_reorder_frames;
111 int scaling_matrix_present;
112 uint8_t scaling_matrix4[6][16];
113 uint8_t scaling_matrix8[2][64];
117 * Picture parameter set
121 int cabac; ///< entropy_coding_mode_flag
122 int pic_order_present; ///< pic_order_present_flag
123 int slice_group_count; ///< num_slice_groups_minus1 + 1
124 int mb_slice_group_map_type;
125 unsigned int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
126 int weighted_pred; ///< weighted_pred_flag
127 int weighted_bipred_idc;
128 int init_qp; ///< pic_init_qp_minus26 + 26
129 int init_qs; ///< pic_init_qs_minus26 + 26
130 int chroma_qp_index_offset;
131 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
132 int constrained_intra_pred; ///< constrained_intra_pred_flag
133 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
134 int transform_8x8_mode; ///< transform_8x8_mode_flag
135 uint8_t scaling_matrix4[6][16];
136 uint8_t scaling_matrix8[2][64];
140 * Memory management control operation opcode.
142 typedef enum MMCOOpcode{
153 * Memory management control operation.
164 typedef struct H264Context{
168 uint8_t *rbsp_buffer;
169 unsigned int rbsp_buffer_size;
172 * Used to parse AVC variant of h264
174 int is_avc; ///< this flag is != 0 if codec is avc1
175 int got_avcC; ///< flag used to parse avcC data only once
176 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
184 int chroma_pred_mode;
185 int intra16x16_pred_mode;
190 int8_t intra4x4_pred_mode_cache[5*8];
191 int8_t (*intra4x4_pred_mode)[8];
192 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
193 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
194 void (*pred8x8 [4+3])(uint8_t *src, int stride);
195 void (*pred16x16[4+3])(uint8_t *src, int stride);
196 unsigned int topleft_samples_available;
197 unsigned int top_samples_available;
198 unsigned int topright_samples_available;
199 unsigned int left_samples_available;
200 uint8_t (*top_borders[2])[16+2*8];
201 uint8_t left_border[2*(17+2*9)];
204 * non zero coeff count cache.
205 * is 64 if not available.
207 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
208 uint8_t (*non_zero_count)[16];
211 * Motion vector cache.
213 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
214 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
215 #define LIST_NOT_USED -1 //FIXME rename?
216 #define PART_NOT_AVAILABLE -2
219 * is 1 if the specific list MV&references are set to 0,0,-2.
221 int mv_cache_clean[2];
224 * number of neighbors (top and/or left) that used 8x8 dct
226 int neighbor_transform_size;
229 * block_offset[ 0..23] for frame macroblocks
230 * block_offset[24..47] for field macroblocks
232 int block_offset[2*(16+8)];
234 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
236 int b_stride; //FIXME use s->b4_stride
239 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff
248 int unknown_svq3_flag;
249 int next_slice_index;
251 SPS sps_buffer[MAX_SPS_COUNT];
252 SPS sps; ///< current sps
254 PPS pps_buffer[MAX_PPS_COUNT];
258 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
260 uint32_t dequant4_buffer[6][52][16];
261 uint32_t dequant8_buffer[2][52][64];
262 uint32_t (*dequant4_coeff[6])[16];
263 uint32_t (*dequant8_coeff[2])[64];
264 int dequant_coeff_pps; ///< reinit tables when pps changes
267 uint8_t *slice_table_base;
268 uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
270 int slice_type_fixed;
272 //interlacing specific flags
274 int mb_field_decoding_flag;
275 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
277 unsigned int sub_mb_type[4];
282 int delta_poc_bottom;
285 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
286 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
287 int frame_num_offset; ///< for POC type 2
288 int prev_frame_num_offset; ///< for POC type 2
289 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
292 * frame_num for frames or 2*frame_num for field pics.
297 * max_frame_num or 2*max_frame_num for field pics.
301 //Weighted pred stuff
303 int use_weight_chroma;
304 int luma_log2_weight_denom;
305 int chroma_log2_weight_denom;
306 int luma_weight[2][48];
307 int luma_offset[2][48];
308 int chroma_weight[2][48][2];
309 int chroma_offset[2][48][2];
310 int implicit_weight[48][48];
313 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
314 int slice_alpha_c0_offset;
315 int slice_beta_offset;
317 int redundant_pic_count;
319 int direct_spatial_mv_pred;
320 int dist_scale_factor[16];
321 int dist_scale_factor_field[32];
322 int map_col_to_list0[2][16];
323 int map_col_to_list0_field[2][32];
326 * num_ref_idx_l0/1_active_minus1 + 1
328 unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode
329 unsigned int list_count;
330 Picture *short_ref[32];
331 Picture *long_ref[32];
332 Picture default_ref_list[2][32];
333 Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs
334 Picture *delayed_pic[18]; //FIXME size?
335 Picture *delayed_output_pic;
338 * memory management control operations buffer.
340 MMCO mmco[MAX_MMCO_COUNT];
343 int long_ref_count; ///< number of actual long term references
344 int short_ref_count; ///< number of actual short term references
347 GetBitContext intra_gb;
348 GetBitContext inter_gb;
349 GetBitContext *intra_gb_ptr;
350 GetBitContext *inter_gb_ptr;
352 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
353 DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not to large or ensure that there is some unused stuff after mb
359 uint8_t cabac_state[460];
362 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
367 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
368 uint8_t *chroma_pred_mode_table;
369 int last_qscale_diff;
370 int16_t (*mvd_table[2])[2];
371 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
372 uint8_t *direct_table;
373 uint8_t direct_cache[5*8];
375 uint8_t zigzag_scan[16];
376 uint8_t zigzag_scan8x8[64];
377 uint8_t zigzag_scan8x8_cavlc[64];
378 uint8_t field_scan[16];
379 uint8_t field_scan8x8[64];
380 uint8_t field_scan8x8_cavlc[64];
381 const uint8_t *zigzag_scan_q0;
382 const uint8_t *zigzag_scan8x8_q0;
383 const uint8_t *zigzag_scan8x8_cavlc_q0;
384 const uint8_t *field_scan_q0;
385 const uint8_t *field_scan8x8_q0;
386 const uint8_t *field_scan8x8_cavlc_q0;
391 static VLC coeff_token_vlc[4];
392 static VLC chroma_dc_coeff_token_vlc;
394 static VLC total_zeros_vlc[15];
395 static VLC chroma_dc_total_zeros_vlc[3];
397 static VLC run_vlc[6];
400 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
401 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
402 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
403 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
405 static av_always_inline uint32_t pack16to32(int a, int b){
406 #ifdef WORDS_BIGENDIAN
407 return (b&0xFFFF) + (a<<16);
409 return (a&0xFFFF) + (b<<16);
413 const uint8_t ff_rem6[52]={
414 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
417 const uint8_t ff_div6[52]={
418 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
424 * @param h height of the rectangle, should be a constant
425 * @param w width of the rectangle, should be a constant
426 * @param size the size of val (1 or 4), should be a constant
428 static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
429 uint8_t *p= (uint8_t*)vp;
430 assert(size==1 || size==4);
436 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
437 assert((stride&(w-1))==0);
439 const uint16_t v= size==4 ? val : val*0x0101;
440 *(uint16_t*)(p + 0*stride)= v;
442 *(uint16_t*)(p + 1*stride)= v;
444 *(uint16_t*)(p + 2*stride)=
445 *(uint16_t*)(p + 3*stride)= v;
447 const uint32_t v= size==4 ? val : val*0x01010101;
448 *(uint32_t*)(p + 0*stride)= v;
450 *(uint32_t*)(p + 1*stride)= v;
452 *(uint32_t*)(p + 2*stride)=
453 *(uint32_t*)(p + 3*stride)= v;
455 //gcc can't optimize 64bit math on x86_32
456 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
457 const uint64_t v= val*0x0100000001ULL;
458 *(uint64_t*)(p + 0*stride)= v;
460 *(uint64_t*)(p + 1*stride)= v;
462 *(uint64_t*)(p + 2*stride)=
463 *(uint64_t*)(p + 3*stride)= v;
465 const uint64_t v= val*0x0100000001ULL;
466 *(uint64_t*)(p + 0+0*stride)=
467 *(uint64_t*)(p + 8+0*stride)=
468 *(uint64_t*)(p + 0+1*stride)=
469 *(uint64_t*)(p + 8+1*stride)= v;
471 *(uint64_t*)(p + 0+2*stride)=
472 *(uint64_t*)(p + 8+2*stride)=
473 *(uint64_t*)(p + 0+3*stride)=
474 *(uint64_t*)(p + 8+3*stride)= v;
476 *(uint32_t*)(p + 0+0*stride)=
477 *(uint32_t*)(p + 4+0*stride)= val;
479 *(uint32_t*)(p + 0+1*stride)=
480 *(uint32_t*)(p + 4+1*stride)= val;
482 *(uint32_t*)(p + 0+2*stride)=
483 *(uint32_t*)(p + 4+2*stride)=
484 *(uint32_t*)(p + 0+3*stride)=
485 *(uint32_t*)(p + 4+3*stride)= val;
487 *(uint32_t*)(p + 0+0*stride)=
488 *(uint32_t*)(p + 4+0*stride)=
489 *(uint32_t*)(p + 8+0*stride)=
490 *(uint32_t*)(p +12+0*stride)=
491 *(uint32_t*)(p + 0+1*stride)=
492 *(uint32_t*)(p + 4+1*stride)=
493 *(uint32_t*)(p + 8+1*stride)=
494 *(uint32_t*)(p +12+1*stride)= val;
496 *(uint32_t*)(p + 0+2*stride)=
497 *(uint32_t*)(p + 4+2*stride)=
498 *(uint32_t*)(p + 8+2*stride)=
499 *(uint32_t*)(p +12+2*stride)=
500 *(uint32_t*)(p + 0+3*stride)=
501 *(uint32_t*)(p + 4+3*stride)=
502 *(uint32_t*)(p + 8+3*stride)=
503 *(uint32_t*)(p +12+3*stride)= val;
510 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
511 MpegEncContext * const s = &h->s;
512 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
513 int topleft_xy, top_xy, topright_xy, left_xy[2];
514 int topleft_type, top_type, topright_type, left_type[2];
518 //FIXME deblocking could skip the intra and nnz parts.
519 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
522 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
524 top_xy = mb_xy - s->mb_stride;
525 topleft_xy = top_xy - 1;
526 topright_xy= top_xy + 1;
527 left_xy[1] = left_xy[0] = mb_xy-1;
537 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
538 const int top_pair_xy = pair_xy - s->mb_stride;
539 const int topleft_pair_xy = top_pair_xy - 1;
540 const int topright_pair_xy = top_pair_xy + 1;
541 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
542 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
543 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
544 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
545 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
546 const int bottom = (s->mb_y & 1);
547 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
549 ? !curr_mb_frame_flag // bottom macroblock
550 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
552 top_xy -= s->mb_stride;
555 ? !curr_mb_frame_flag // bottom macroblock
556 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
558 topleft_xy -= s->mb_stride;
561 ? !curr_mb_frame_flag // bottom macroblock
562 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
564 topright_xy -= s->mb_stride;
566 if (left_mb_frame_flag != curr_mb_frame_flag) {
567 left_xy[1] = left_xy[0] = pair_xy - 1;
568 if (curr_mb_frame_flag) {
589 left_xy[1] += s->mb_stride;
602 h->top_mb_xy = top_xy;
603 h->left_mb_xy[0] = left_xy[0];
604 h->left_mb_xy[1] = left_xy[1];
608 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
609 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
610 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
612 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
614 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
616 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
617 for(list=0; list<h->list_count; list++){
618 if(USES_LIST(mb_type,list)){
619 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
620 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
621 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
622 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
628 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
629 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
631 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
632 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
634 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
635 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
640 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
641 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
642 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
643 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
644 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
647 if(IS_INTRA(mb_type)){
648 h->topleft_samples_available=
649 h->top_samples_available=
650 h->left_samples_available= 0xFFFF;
651 h->topright_samples_available= 0xEEEA;
653 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
654 h->topleft_samples_available= 0xB3FF;
655 h->top_samples_available= 0x33FF;
656 h->topright_samples_available= 0x26EA;
659 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
660 h->topleft_samples_available&= 0xDF5F;
661 h->left_samples_available&= 0x5F5F;
665 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
666 h->topleft_samples_available&= 0x7FFF;
668 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
669 h->topright_samples_available&= 0xFBFF;
671 if(IS_INTRA4x4(mb_type)){
672 if(IS_INTRA4x4(top_type)){
673 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
674 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
675 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
676 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
679 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
684 h->intra4x4_pred_mode_cache[4+8*0]=
685 h->intra4x4_pred_mode_cache[5+8*0]=
686 h->intra4x4_pred_mode_cache[6+8*0]=
687 h->intra4x4_pred_mode_cache[7+8*0]= pred;
690 if(IS_INTRA4x4(left_type[i])){
691 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
692 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
695 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
700 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
701 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
716 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
718 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
719 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
720 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
721 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
723 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
724 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
726 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
727 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
730 h->non_zero_count_cache[4+8*0]=
731 h->non_zero_count_cache[5+8*0]=
732 h->non_zero_count_cache[6+8*0]=
733 h->non_zero_count_cache[7+8*0]=
735 h->non_zero_count_cache[1+8*0]=
736 h->non_zero_count_cache[2+8*0]=
738 h->non_zero_count_cache[1+8*3]=
739 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
743 for (i=0; i<2; i++) {
745 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
746 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
747 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
748 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
750 h->non_zero_count_cache[3+8*1 + 2*8*i]=
751 h->non_zero_count_cache[3+8*2 + 2*8*i]=
752 h->non_zero_count_cache[0+8*1 + 8*i]=
753 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
760 h->top_cbp = h->cbp_table[top_xy];
761 } else if(IS_INTRA(mb_type)) {
768 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
769 } else if(IS_INTRA(mb_type)) {
775 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
778 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
783 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
785 for(list=0; list<h->list_count; list++){
786 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
787 /*if(!h->mv_cache_clean[list]){
788 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
789 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
790 h->mv_cache_clean[list]= 1;
794 h->mv_cache_clean[list]= 0;
796 if(USES_LIST(top_type, list)){
797 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
798 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
799 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
800 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
801 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
802 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
803 h->ref_cache[list][scan8[0] + 0 - 1*8]=
804 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
805 h->ref_cache[list][scan8[0] + 2 - 1*8]=
806 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
808 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
809 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
810 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
811 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
812 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
816 int cache_idx = scan8[0] - 1 + i*2*8;
817 if(USES_LIST(left_type[i], list)){
818 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
819 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
820 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
821 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
822 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
823 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
825 *(uint32_t*)h->mv_cache [list][cache_idx ]=
826 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
827 h->ref_cache[list][cache_idx ]=
828 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
832 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
835 if(USES_LIST(topleft_type, list)){
836 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
837 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
838 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
839 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
841 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
842 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
845 if(USES_LIST(topright_type, list)){
846 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
847 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
848 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
849 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
851 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
852 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
855 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
858 h->ref_cache[list][scan8[5 ]+1] =
859 h->ref_cache[list][scan8[7 ]+1] =
860 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
861 h->ref_cache[list][scan8[4 ]] =
862 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
863 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
864 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
865 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
866 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
867 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
870 /* XXX beurk, Load mvd */
871 if(USES_LIST(top_type, list)){
872 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
873 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
874 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
875 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
876 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
878 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
879 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
880 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
881 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
883 if(USES_LIST(left_type[0], list)){
884 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
885 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
886 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
888 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
889 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
891 if(USES_LIST(left_type[1], list)){
892 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
893 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
894 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
896 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
897 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
899 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
900 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
901 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
902 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
903 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
905 if(h->slice_type == B_TYPE){
906 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
908 if(IS_DIRECT(top_type)){
909 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
910 }else if(IS_8X8(top_type)){
911 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
912 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
913 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
915 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
918 if(IS_DIRECT(left_type[0]))
919 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
920 else if(IS_8X8(left_type[0]))
921 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
923 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
925 if(IS_DIRECT(left_type[1]))
926 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
927 else if(IS_8X8(left_type[1]))
928 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
930 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
936 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
937 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
938 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
939 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
940 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
941 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
942 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
943 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
944 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
945 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
947 #define MAP_F2F(idx, mb_type)\
948 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
949 h->ref_cache[list][idx] <<= 1;\
950 h->mv_cache[list][idx][1] /= 2;\
951 h->mvd_cache[list][idx][1] /= 2;\
956 #define MAP_F2F(idx, mb_type)\
957 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
958 h->ref_cache[list][idx] >>= 1;\
959 h->mv_cache[list][idx][1] <<= 1;\
960 h->mvd_cache[list][idx][1] <<= 1;\
970 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
973 static inline void write_back_intra_pred_mode(H264Context *h){
974 MpegEncContext * const s = &h->s;
975 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
977 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
978 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
979 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
980 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
981 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
982 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
983 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
987 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
989 static inline int check_intra4x4_pred_mode(H264Context *h){
990 MpegEncContext * const s = &h->s;
991 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
992 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
995 if(!(h->top_samples_available&0x8000)){
997 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
999 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1002 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
1007 if(!(h->left_samples_available&0x8000)){
1009 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
1011 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1014 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
1020 } //FIXME cleanup like next
1023 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1025 static inline int check_intra_pred_mode(H264Context *h, int mode){
1026 MpegEncContext * const s = &h->s;
1027 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
1028 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
1031 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
1035 if(!(h->top_samples_available&0x8000)){
1038 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1043 if(!(h->left_samples_available&0x8000)){
1046 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1055 * gets the predicted intra4x4 prediction mode.
1057 static inline int pred_intra_mode(H264Context *h, int n){
1058 const int index8= scan8[n];
1059 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
1060 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
1061 const int min= FFMIN(left, top);
1063 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
1065 if(min<0) return DC_PRED;
1069 static inline void write_back_non_zero_count(H264Context *h){
1070 MpegEncContext * const s = &h->s;
1071 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1073 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
1074 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
1075 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
1076 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
1077 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
1078 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
1079 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
1081 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
1082 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
1083 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
1085 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1086 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1087 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1090 // store all luma nnzs, for deblocking
1093 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
1094 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
1099 * gets the predicted number of non zero coefficients.
1100 * @param n block index
1102 static inline int pred_non_zero_count(H264Context *h, int n){
1103 const int index8= scan8[n];
1104 const int left= h->non_zero_count_cache[index8 - 1];
1105 const int top = h->non_zero_count_cache[index8 - 8];
1108 if(i<64) i= (i+1)>>1;
1110 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1115 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1116 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1117 MpegEncContext *s = &h->s;
1119 /* there is no consistent mapping of mvs to neighboring locations that will
1120 * make mbaff happy, so we can't move all this logic to fill_caches */
1122 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
1124 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
1125 *C = h->mv_cache[list][scan8[0]-2];
1128 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
1129 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
1130 if(IS_INTERLACED(mb_types[topright_xy])){
1131 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
1132 const int x4 = X4, y4 = Y4;\
1133 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
1134 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
1135 return LIST_NOT_USED;\
1136 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
1137 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
1138 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
1139 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
1141 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
1144 if(topright_ref == PART_NOT_AVAILABLE
1145 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
1146 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
1148 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
1149 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
1152 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
1153 && i >= scan8[0]+8){
1154 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
1155 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
1161 if(topright_ref != PART_NOT_AVAILABLE){
1162 *C= h->mv_cache[list][ i - 8 + part_width ];
1163 return topright_ref;
1165 tprintf(s->avctx, "topright MV not available\n");
1167 *C= h->mv_cache[list][ i - 8 - 1 ];
1168 return h->ref_cache[list][ i - 8 - 1 ];
1173 * gets the predicted MV.
1174 * @param n the block index
1175 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1176 * @param mx the x component of the predicted motion vector
1177 * @param my the y component of the predicted motion vector
1179 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1180 const int index8= scan8[n];
1181 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1182 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1183 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1184 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1186 int diagonal_ref, match_count;
1188 assert(part_width==1 || part_width==2 || part_width==4);
1198 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1199 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1200 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
1201 if(match_count > 1){ //most common
1202 *mx= mid_pred(A[0], B[0], C[0]);
1203 *my= mid_pred(A[1], B[1], C[1]);
1204 }else if(match_count==1){
1208 }else if(top_ref==ref){
1216 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1220 *mx= mid_pred(A[0], B[0], C[0]);
1221 *my= mid_pred(A[1], B[1], C[1]);
1225 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1229 * gets the directionally predicted 16x8 MV.
1230 * @param n the block index
1231 * @param mx the x component of the predicted motion vector
1232 * @param my the y component of the predicted motion vector
1234 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1236 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1237 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1239 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1247 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1248 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1250 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1252 if(left_ref == ref){
1260 pred_motion(h, n, 4, list, ref, mx, my);
1264 * gets the directionally predicted 8x16 MV.
1265 * @param n the block index
1266 * @param mx the x component of the predicted motion vector
1267 * @param my the y component of the predicted motion vector
1269 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1271 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1272 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1274 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1276 if(left_ref == ref){
1285 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1287 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1289 if(diagonal_ref == ref){
1297 pred_motion(h, n, 2, list, ref, mx, my);
1300 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1301 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1302 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1304 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1306 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1307 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1308 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1314 pred_motion(h, 0, 4, 0, 0, mx, my);
1319 static inline void direct_dist_scale_factor(H264Context * const h){
1320 const int poc = h->s.current_picture_ptr->poc;
1321 const int poc1 = h->ref_list[1][0].poc;
1323 for(i=0; i<h->ref_count[0]; i++){
1324 int poc0 = h->ref_list[0][i].poc;
1325 int td = av_clip(poc1 - poc0, -128, 127);
1326 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1327 h->dist_scale_factor[i] = 256;
1329 int tb = av_clip(poc - poc0, -128, 127);
1330 int tx = (16384 + (FFABS(td) >> 1)) / td;
1331 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
1335 for(i=0; i<h->ref_count[0]; i++){
1336 h->dist_scale_factor_field[2*i] =
1337 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
1341 static inline void direct_ref_list_init(H264Context * const h){
1342 MpegEncContext * const s = &h->s;
1343 Picture * const ref1 = &h->ref_list[1][0];
1344 Picture * const cur = s->current_picture_ptr;
1346 if(cur->pict_type == I_TYPE)
1347 cur->ref_count[0] = 0;
1348 if(cur->pict_type != B_TYPE)
1349 cur->ref_count[1] = 0;
1350 for(list=0; list<2; list++){
1351 cur->ref_count[list] = h->ref_count[list];
1352 for(j=0; j<h->ref_count[list]; j++)
1353 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1355 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1357 for(list=0; list<2; list++){
1358 for(i=0; i<ref1->ref_count[list]; i++){
1359 const int poc = ref1->ref_poc[list][i];
1360 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1361 for(j=0; j<h->ref_count[list]; j++)
1362 if(h->ref_list[list][j].poc == poc){
1363 h->map_col_to_list0[list][i] = j;
1369 for(list=0; list<2; list++){
1370 for(i=0; i<ref1->ref_count[list]; i++){
1371 j = h->map_col_to_list0[list][i];
1372 h->map_col_to_list0_field[list][2*i] = 2*j;
1373 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1379 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1380 MpegEncContext * const s = &h->s;
1381 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1382 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1383 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1384 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1385 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1386 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1387 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1388 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1389 const int is_b8x8 = IS_8X8(*mb_type);
1390 unsigned int sub_mb_type;
1393 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1394 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1395 /* FIXME save sub mb types from previous frames (or derive from MVs)
1396 * so we know exactly what block size to use */
1397 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1398 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1399 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1400 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1401 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1403 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1404 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1407 *mb_type |= MB_TYPE_DIRECT2;
1409 *mb_type |= MB_TYPE_INTERLACED;
1411 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1413 if(h->direct_spatial_mv_pred){
1418 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1420 /* ref = min(neighbors) */
1421 for(list=0; list<2; list++){
1422 int refa = h->ref_cache[list][scan8[0] - 1];
1423 int refb = h->ref_cache[list][scan8[0] - 8];
1424 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1426 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1428 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1430 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1436 if(ref[0] < 0 && ref[1] < 0){
1437 ref[0] = ref[1] = 0;
1438 mv[0][0] = mv[0][1] =
1439 mv[1][0] = mv[1][1] = 0;
1441 for(list=0; list<2; list++){
1443 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1445 mv[list][0] = mv[list][1] = 0;
1450 *mb_type &= ~MB_TYPE_P0L1;
1451 sub_mb_type &= ~MB_TYPE_P0L1;
1452 }else if(ref[0] < 0){
1453 *mb_type &= ~MB_TYPE_P0L0;
1454 sub_mb_type &= ~MB_TYPE_P0L0;
1457 if(IS_16X16(*mb_type)){
1460 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1461 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1462 if(!IS_INTRA(mb_type_col)
1463 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1464 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1465 && (h->x264_build>33 || !h->x264_build)))){
1467 a= pack16to32(mv[0][0],mv[0][1]);
1469 b= pack16to32(mv[1][0],mv[1][1]);
1471 a= pack16to32(mv[0][0],mv[0][1]);
1472 b= pack16to32(mv[1][0],mv[1][1]);
1474 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1475 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1477 for(i8=0; i8<4; i8++){
1478 const int x8 = i8&1;
1479 const int y8 = i8>>1;
1481 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1483 h->sub_mb_type[i8] = sub_mb_type;
1485 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1486 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1487 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1488 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1491 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1492 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1493 && (h->x264_build>33 || !h->x264_build)))){
1494 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1495 if(IS_SUB_8X8(sub_mb_type)){
1496 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1497 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1499 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1501 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1504 for(i4=0; i4<4; i4++){
1505 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1506 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1508 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1510 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1516 }else{ /* direct temporal mv pred */
1517 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1518 const int *dist_scale_factor = h->dist_scale_factor;
1521 if(IS_INTERLACED(*mb_type)){
1522 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1523 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1524 dist_scale_factor = h->dist_scale_factor_field;
1526 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1527 /* FIXME assumes direct_8x8_inference == 1 */
1528 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1529 int mb_types_col[2];
1532 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1533 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1534 | (*mb_type & MB_TYPE_INTERLACED);
1535 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1537 if(IS_INTERLACED(*mb_type)){
1538 /* frame to field scaling */
1539 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1540 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1542 l1ref0 -= 2*h->b8_stride;
1543 l1ref1 -= 2*h->b8_stride;
1544 l1mv0 -= 4*h->b_stride;
1545 l1mv1 -= 4*h->b_stride;
1549 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1550 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1552 *mb_type |= MB_TYPE_16x8;
1554 *mb_type |= MB_TYPE_8x8;
1556 /* field to frame scaling */
1557 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1558 * but in MBAFF, top and bottom POC are equal */
1559 int dy = (s->mb_y&1) ? 1 : 2;
1561 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1562 l1ref0 += dy*h->b8_stride;
1563 l1ref1 += dy*h->b8_stride;
1564 l1mv0 += 2*dy*h->b_stride;
1565 l1mv1 += 2*dy*h->b_stride;
1568 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1570 *mb_type |= MB_TYPE_16x16;
1572 *mb_type |= MB_TYPE_8x8;
1575 for(i8=0; i8<4; i8++){
1576 const int x8 = i8&1;
1577 const int y8 = i8>>1;
1579 const int16_t (*l1mv)[2]= l1mv0;
1581 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1583 h->sub_mb_type[i8] = sub_mb_type;
1585 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1586 if(IS_INTRA(mb_types_col[y8])){
1587 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1588 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1589 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1593 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1595 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1597 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1600 scale = dist_scale_factor[ref0];
1601 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1604 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1605 int my_col = (mv_col[1]<<y_shift)/2;
1606 int mx = (scale * mv_col[0] + 128) >> 8;
1607 int my = (scale * my_col + 128) >> 8;
1608 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1609 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1616 /* one-to-one mv scaling */
1618 if(IS_16X16(*mb_type)){
1621 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1622 if(IS_INTRA(mb_type_col)){
1625 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1626 : map_col_to_list0[1][l1ref1[0]];
1627 const int scale = dist_scale_factor[ref0];
1628 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1630 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1631 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1633 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1634 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1636 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1637 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1638 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1640 for(i8=0; i8<4; i8++){
1641 const int x8 = i8&1;
1642 const int y8 = i8>>1;
1644 const int16_t (*l1mv)[2]= l1mv0;
1646 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1648 h->sub_mb_type[i8] = sub_mb_type;
1649 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1650 if(IS_INTRA(mb_type_col)){
1651 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1652 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1653 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1657 ref0 = l1ref0[x8 + y8*h->b8_stride];
1659 ref0 = map_col_to_list0[0][ref0];
1661 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1664 scale = dist_scale_factor[ref0];
1666 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1667 if(IS_SUB_8X8(sub_mb_type)){
1668 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1669 int mx = (scale * mv_col[0] + 128) >> 8;
1670 int my = (scale * mv_col[1] + 128) >> 8;
1671 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1672 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1674 for(i4=0; i4<4; i4++){
1675 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1676 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1677 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1678 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1679 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1680 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1687 static inline void write_back_motion(H264Context *h, int mb_type){
1688 MpegEncContext * const s = &h->s;
1689 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1690 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1693 if(!USES_LIST(mb_type, 0))
1694 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1696 for(list=0; list<h->list_count; list++){
1698 if(!USES_LIST(mb_type, list))
1702 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1703 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1705 if( h->pps.cabac ) {
1706 if(IS_SKIP(mb_type))
1707 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1710 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1711 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1716 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1717 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1718 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1719 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1720 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1724 if(h->slice_type == B_TYPE && h->pps.cabac){
1725 if(IS_8X8(mb_type)){
1726 uint8_t *direct_table = &h->direct_table[b8_xy];
1727 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1728 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1729 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1735 * Decodes a network abstraction layer unit.
1736 * @param consumed is the number of bytes used as input
1737 * @param length is the length of the array
1738 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1739 * @returns decoded bytes, might be src+1 if no escapes
1741 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1745 // src[0]&0x80; //forbidden bit
1746 h->nal_ref_idc= src[0]>>5;
1747 h->nal_unit_type= src[0]&0x1F;
1751 for(i=0; i<length; i++)
1752 printf("%2X ", src[i]);
1754 for(i=0; i+1<length; i+=2){
1755 if(src[i]) continue;
1756 if(i>0 && src[i-1]==0) i--;
1757 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1759 /* startcode, so we must be past the end */
1766 if(i>=length-1){ //no escaped 0
1767 *dst_length= length;
1768 *consumed= length+1; //+1 for the header
1772 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1773 dst= h->rbsp_buffer;
1779 //printf("decoding esc\n");
1782 //remove escapes (very rare 1:2^22)
1783 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1784 if(src[si+2]==3){ //escape
1789 }else //next start code
1793 dst[di++]= src[si++];
1797 *consumed= si + 1;//+1 for the header
1798 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1803 * identifies the exact end of the bitstream
1804 * @return the length of the trailing, or 0 if damaged
1806 static int decode_rbsp_trailing(H264Context *h, uint8_t *src){
1810 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1820 * idct tranforms the 16 dc values and dequantize them.
1821 * @param qp quantization parameter
1823 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1826 int temp[16]; //FIXME check if this is a good idea
1827 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1828 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1830 //memset(block, 64, 2*256);
1833 const int offset= y_offset[i];
1834 const int z0= block[offset+stride*0] + block[offset+stride*4];
1835 const int z1= block[offset+stride*0] - block[offset+stride*4];
1836 const int z2= block[offset+stride*1] - block[offset+stride*5];
1837 const int z3= block[offset+stride*1] + block[offset+stride*5];
1846 const int offset= x_offset[i];
1847 const int z0= temp[4*0+i] + temp[4*2+i];
1848 const int z1= temp[4*0+i] - temp[4*2+i];
1849 const int z2= temp[4*1+i] - temp[4*3+i];
1850 const int z3= temp[4*1+i] + temp[4*3+i];
1852 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1853 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1854 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1855 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1861 * dct tranforms the 16 dc values.
1862 * @param qp quantization parameter ??? FIXME
1864 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1865 // const int qmul= dequant_coeff[qp][0];
1867 int temp[16]; //FIXME check if this is a good idea
1868 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1869 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1872 const int offset= y_offset[i];
1873 const int z0= block[offset+stride*0] + block[offset+stride*4];
1874 const int z1= block[offset+stride*0] - block[offset+stride*4];
1875 const int z2= block[offset+stride*1] - block[offset+stride*5];
1876 const int z3= block[offset+stride*1] + block[offset+stride*5];
1885 const int offset= x_offset[i];
1886 const int z0= temp[4*0+i] + temp[4*2+i];
1887 const int z1= temp[4*0+i] - temp[4*2+i];
1888 const int z2= temp[4*1+i] - temp[4*3+i];
1889 const int z3= temp[4*1+i] + temp[4*3+i];
1891 block[stride*0 +offset]= (z0 + z3)>>1;
1892 block[stride*2 +offset]= (z1 + z2)>>1;
1893 block[stride*8 +offset]= (z1 - z2)>>1;
1894 block[stride*10+offset]= (z0 - z3)>>1;
1902 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1903 const int stride= 16*2;
1904 const int xStride= 16;
1907 a= block[stride*0 + xStride*0];
1908 b= block[stride*0 + xStride*1];
1909 c= block[stride*1 + xStride*0];
1910 d= block[stride*1 + xStride*1];
1917 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1918 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1919 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1920 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1924 static void chroma_dc_dct_c(DCTELEM *block){
1925 const int stride= 16*2;
1926 const int xStride= 16;
1929 a= block[stride*0 + xStride*0];
1930 b= block[stride*0 + xStride*1];
1931 c= block[stride*1 + xStride*0];
1932 d= block[stride*1 + xStride*1];
1939 block[stride*0 + xStride*0]= (a+c);
1940 block[stride*0 + xStride*1]= (e+b);
1941 block[stride*1 + xStride*0]= (a-c);
1942 block[stride*1 + xStride*1]= (e-b);
1947 * gets the chroma qp.
1949 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1951 return chroma_qp[av_clip(qscale + chroma_qp_index_offset, 0, 51)];
1954 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1955 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1956 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1958 const int * const quant_table= quant_coeff[qscale];
1959 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1960 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1961 const unsigned int threshold2= (threshold1<<1);
1967 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1968 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1969 const unsigned int dc_threshold2= (dc_threshold1<<1);
1971 int level= block[0]*quant_coeff[qscale+18][0];
1972 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1974 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1977 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1980 // last_non_zero = i;
1985 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1986 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1987 const unsigned int dc_threshold2= (dc_threshold1<<1);
1989 int level= block[0]*quant_table[0];
1990 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1992 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1995 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1998 // last_non_zero = i;
2011 const int j= scantable[i];
2012 int level= block[j]*quant_table[j];
2014 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
2015 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
2016 if(((unsigned)(level+threshold1))>threshold2){
2018 level= (bias + level)>>QUANT_SHIFT;
2021 level= (bias - level)>>QUANT_SHIFT;
2030 return last_non_zero;
2033 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
2034 const uint32_t a= ((uint32_t*)(src-stride))[0];
2035 ((uint32_t*)(src+0*stride))[0]= a;
2036 ((uint32_t*)(src+1*stride))[0]= a;
2037 ((uint32_t*)(src+2*stride))[0]= a;
2038 ((uint32_t*)(src+3*stride))[0]= a;
2041 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
2042 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
2043 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
2044 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
2045 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
2048 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
2049 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
2050 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
2052 ((uint32_t*)(src+0*stride))[0]=
2053 ((uint32_t*)(src+1*stride))[0]=
2054 ((uint32_t*)(src+2*stride))[0]=
2055 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2058 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
2059 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
2061 ((uint32_t*)(src+0*stride))[0]=
2062 ((uint32_t*)(src+1*stride))[0]=
2063 ((uint32_t*)(src+2*stride))[0]=
2064 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2067 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
2068 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
2070 ((uint32_t*)(src+0*stride))[0]=
2071 ((uint32_t*)(src+1*stride))[0]=
2072 ((uint32_t*)(src+2*stride))[0]=
2073 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2076 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
2077 ((uint32_t*)(src+0*stride))[0]=
2078 ((uint32_t*)(src+1*stride))[0]=
2079 ((uint32_t*)(src+2*stride))[0]=
2080 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
2084 #define LOAD_TOP_RIGHT_EDGE\
2085 const int t4= topright[0];\
2086 const int t5= topright[1];\
2087 const int t6= topright[2];\
2088 const int t7= topright[3];\
2090 #define LOAD_LEFT_EDGE\
2091 const int l0= src[-1+0*stride];\
2092 const int l1= src[-1+1*stride];\
2093 const int l2= src[-1+2*stride];\
2094 const int l3= src[-1+3*stride];\
2096 #define LOAD_TOP_EDGE\
2097 const int t0= src[ 0-1*stride];\
2098 const int t1= src[ 1-1*stride];\
2099 const int t2= src[ 2-1*stride];\
2100 const int t3= src[ 3-1*stride];\
2102 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
2103 const int lt= src[-1-1*stride];
2107 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
2109 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
2112 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
2116 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2119 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
2121 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2122 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2125 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
2130 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
2132 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
2135 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
2139 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
2142 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
2144 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
2145 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
2148 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
2149 const int lt= src[-1-1*stride];
2152 const __attribute__((unused)) int unu= l3;
2155 src[1+2*stride]=(lt + t0 + 1)>>1;
2157 src[2+2*stride]=(t0 + t1 + 1)>>1;
2159 src[3+2*stride]=(t1 + t2 + 1)>>1;
2160 src[3+0*stride]=(t2 + t3 + 1)>>1;
2162 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2164 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2166 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2167 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2168 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2169 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2172 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2175 const __attribute__((unused)) int unu= t7;
2177 src[0+0*stride]=(t0 + t1 + 1)>>1;
2179 src[0+2*stride]=(t1 + t2 + 1)>>1;
2181 src[1+2*stride]=(t2 + t3 + 1)>>1;
2183 src[2+2*stride]=(t3 + t4+ 1)>>1;
2184 src[3+2*stride]=(t4 + t5+ 1)>>1;
2185 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2187 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2189 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2191 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2192 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2195 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2198 src[0+0*stride]=(l0 + l1 + 1)>>1;
2199 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2201 src[0+1*stride]=(l1 + l2 + 1)>>1;
2203 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2205 src[0+2*stride]=(l2 + l3 + 1)>>1;
2207 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2216 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2217 const int lt= src[-1-1*stride];
2220 const __attribute__((unused)) int unu= t3;
2223 src[2+1*stride]=(lt + l0 + 1)>>1;
2225 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2226 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2227 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2229 src[2+2*stride]=(l0 + l1 + 1)>>1;
2231 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2233 src[2+3*stride]=(l1 + l2+ 1)>>1;
2235 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2236 src[0+3*stride]=(l2 + l3 + 1)>>1;
2237 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2240 void ff_pred16x16_vertical_c(uint8_t *src, int stride){
2242 const uint32_t a= ((uint32_t*)(src-stride))[0];
2243 const uint32_t b= ((uint32_t*)(src-stride))[1];
2244 const uint32_t c= ((uint32_t*)(src-stride))[2];
2245 const uint32_t d= ((uint32_t*)(src-stride))[3];
2247 for(i=0; i<16; i++){
2248 ((uint32_t*)(src+i*stride))[0]= a;
2249 ((uint32_t*)(src+i*stride))[1]= b;
2250 ((uint32_t*)(src+i*stride))[2]= c;
2251 ((uint32_t*)(src+i*stride))[3]= d;
2255 void ff_pred16x16_horizontal_c(uint8_t *src, int stride){
2258 for(i=0; i<16; i++){
2259 ((uint32_t*)(src+i*stride))[0]=
2260 ((uint32_t*)(src+i*stride))[1]=
2261 ((uint32_t*)(src+i*stride))[2]=
2262 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2266 void ff_pred16x16_dc_c(uint8_t *src, int stride){
2270 dc+= src[-1+i*stride];
2277 dc= 0x01010101*((dc + 16)>>5);
2279 for(i=0; i<16; i++){
2280 ((uint32_t*)(src+i*stride))[0]=
2281 ((uint32_t*)(src+i*stride))[1]=
2282 ((uint32_t*)(src+i*stride))[2]=
2283 ((uint32_t*)(src+i*stride))[3]= dc;
2287 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2291 dc+= src[-1+i*stride];
2294 dc= 0x01010101*((dc + 8)>>4);
2296 for(i=0; i<16; i++){
2297 ((uint32_t*)(src+i*stride))[0]=
2298 ((uint32_t*)(src+i*stride))[1]=
2299 ((uint32_t*)(src+i*stride))[2]=
2300 ((uint32_t*)(src+i*stride))[3]= dc;
2304 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2310 dc= 0x01010101*((dc + 8)>>4);
2312 for(i=0; i<16; i++){
2313 ((uint32_t*)(src+i*stride))[0]=
2314 ((uint32_t*)(src+i*stride))[1]=
2315 ((uint32_t*)(src+i*stride))[2]=
2316 ((uint32_t*)(src+i*stride))[3]= dc;
2320 void ff_pred16x16_128_dc_c(uint8_t *src, int stride){
2323 for(i=0; i<16; i++){
2324 ((uint32_t*)(src+i*stride))[0]=
2325 ((uint32_t*)(src+i*stride))[1]=
2326 ((uint32_t*)(src+i*stride))[2]=
2327 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2331 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2334 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2335 const uint8_t * const src0 = src+7-stride;
2336 const uint8_t *src1 = src+8*stride-1;
2337 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2338 int H = src0[1] - src0[-1];
2339 int V = src1[0] - src2[ 0];
2340 for(k=2; k<=8; ++k) {
2341 src1 += stride; src2 -= stride;
2342 H += k*(src0[k] - src0[-k]);
2343 V += k*(src1[0] - src2[ 0]);
2346 H = ( 5*(H/4) ) / 16;
2347 V = ( 5*(V/4) ) / 16;
2349 /* required for 100% accuracy */
2350 i = H; H = V; V = i;
2352 H = ( 5*H+32 ) >> 6;
2353 V = ( 5*V+32 ) >> 6;
2356 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2357 for(j=16; j>0; --j) {
2360 for(i=-16; i<0; i+=4) {
2361 src[16+i] = cm[ (b ) >> 5 ];
2362 src[17+i] = cm[ (b+ H) >> 5 ];
2363 src[18+i] = cm[ (b+2*H) >> 5 ];
2364 src[19+i] = cm[ (b+3*H) >> 5 ];
2371 void ff_pred16x16_plane_c(uint8_t *src, int stride){
2372 pred16x16_plane_compat_c(src, stride, 0);
2375 void ff_pred8x8_vertical_c(uint8_t *src, int stride){
2377 const uint32_t a= ((uint32_t*)(src-stride))[0];
2378 const uint32_t b= ((uint32_t*)(src-stride))[1];
2381 ((uint32_t*)(src+i*stride))[0]= a;
2382 ((uint32_t*)(src+i*stride))[1]= b;
2386 void ff_pred8x8_horizontal_c(uint8_t *src, int stride){
2390 ((uint32_t*)(src+i*stride))[0]=
2391 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2395 void ff_pred8x8_128_dc_c(uint8_t *src, int stride){
2399 ((uint32_t*)(src+i*stride))[0]=
2400 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2404 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2410 dc0+= src[-1+i*stride];
2411 dc2+= src[-1+(i+4)*stride];
2413 dc0= 0x01010101*((dc0 + 2)>>2);
2414 dc2= 0x01010101*((dc2 + 2)>>2);
2417 ((uint32_t*)(src+i*stride))[0]=
2418 ((uint32_t*)(src+i*stride))[1]= dc0;
2421 ((uint32_t*)(src+i*stride))[0]=
2422 ((uint32_t*)(src+i*stride))[1]= dc2;
2426 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2432 dc0+= src[i-stride];
2433 dc1+= src[4+i-stride];
2435 dc0= 0x01010101*((dc0 + 2)>>2);
2436 dc1= 0x01010101*((dc1 + 2)>>2);
2439 ((uint32_t*)(src+i*stride))[0]= dc0;
2440 ((uint32_t*)(src+i*stride))[1]= dc1;
2443 ((uint32_t*)(src+i*stride))[0]= dc0;
2444 ((uint32_t*)(src+i*stride))[1]= dc1;
2449 void ff_pred8x8_dc_c(uint8_t *src, int stride){
2451 int dc0, dc1, dc2, dc3;
2455 dc0+= src[-1+i*stride] + src[i-stride];
2456 dc1+= src[4+i-stride];
2457 dc2+= src[-1+(i+4)*stride];
2459 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2460 dc0= 0x01010101*((dc0 + 4)>>3);
2461 dc1= 0x01010101*((dc1 + 2)>>2);
2462 dc2= 0x01010101*((dc2 + 2)>>2);
2465 ((uint32_t*)(src+i*stride))[0]= dc0;
2466 ((uint32_t*)(src+i*stride))[1]= dc1;
2469 ((uint32_t*)(src+i*stride))[0]= dc2;
2470 ((uint32_t*)(src+i*stride))[1]= dc3;
2474 void ff_pred8x8_plane_c(uint8_t *src, int stride){
2477 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2478 const uint8_t * const src0 = src+3-stride;
2479 const uint8_t *src1 = src+4*stride-1;
2480 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2481 int H = src0[1] - src0[-1];
2482 int V = src1[0] - src2[ 0];
2483 for(k=2; k<=4; ++k) {
2484 src1 += stride; src2 -= stride;
2485 H += k*(src0[k] - src0[-k]);
2486 V += k*(src1[0] - src2[ 0]);
2488 H = ( 17*H+16 ) >> 5;
2489 V = ( 17*V+16 ) >> 5;
2491 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2492 for(j=8; j>0; --j) {
2495 src[0] = cm[ (b ) >> 5 ];
2496 src[1] = cm[ (b+ H) >> 5 ];
2497 src[2] = cm[ (b+2*H) >> 5 ];
2498 src[3] = cm[ (b+3*H) >> 5 ];
2499 src[4] = cm[ (b+4*H) >> 5 ];
2500 src[5] = cm[ (b+5*H) >> 5 ];
2501 src[6] = cm[ (b+6*H) >> 5 ];
2502 src[7] = cm[ (b+7*H) >> 5 ];
2507 #define SRC(x,y) src[(x)+(y)*stride]
2509 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2510 #define PREDICT_8x8_LOAD_LEFT \
2511 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2512 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2513 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2514 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2517 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2518 #define PREDICT_8x8_LOAD_TOP \
2519 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2520 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2521 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2522 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2523 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2526 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2527 #define PREDICT_8x8_LOAD_TOPRIGHT \
2528 int t8, t9, t10, t11, t12, t13, t14, t15; \
2529 if(has_topright) { \
2530 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2531 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2532 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2534 #define PREDICT_8x8_LOAD_TOPLEFT \
2535 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2537 #define PREDICT_8x8_DC(v) \
2539 for( y = 0; y < 8; y++ ) { \
2540 ((uint32_t*)src)[0] = \
2541 ((uint32_t*)src)[1] = v; \
2545 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2547 PREDICT_8x8_DC(0x80808080);
2549 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2551 PREDICT_8x8_LOAD_LEFT;
2552 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2555 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2557 PREDICT_8x8_LOAD_TOP;
2558 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2561 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2563 PREDICT_8x8_LOAD_LEFT;
2564 PREDICT_8x8_LOAD_TOP;
2565 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2566 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2569 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2571 PREDICT_8x8_LOAD_LEFT;
2572 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2573 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2574 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2577 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2580 PREDICT_8x8_LOAD_TOP;
2589 for( y = 1; y < 8; y++ )
2590 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2592 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2594 PREDICT_8x8_LOAD_TOP;
2595 PREDICT_8x8_LOAD_TOPRIGHT;
2596 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2597 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2598 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2599 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2600 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2601 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2602 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2603 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2604 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2605 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2606 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2607 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2608 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2609 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2610 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2612 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2614 PREDICT_8x8_LOAD_TOP;
2615 PREDICT_8x8_LOAD_LEFT;
2616 PREDICT_8x8_LOAD_TOPLEFT;
2617 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2618 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2619 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2620 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2621 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2622 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2623 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2624 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2625 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2626 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2627 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2628 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2629 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2630 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2631 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2634 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2636 PREDICT_8x8_LOAD_TOP;
2637 PREDICT_8x8_LOAD_LEFT;
2638 PREDICT_8x8_LOAD_TOPLEFT;
2639 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2640 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2641 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2642 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2643 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2644 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2645 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2646 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2647 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2648 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2649 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2650 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2651 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2652 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2653 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2654 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2655 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2656 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2657 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2658 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2659 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2660 SRC(7,0)= (t6 + t7 + 1) >> 1;
2662 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2664 PREDICT_8x8_LOAD_TOP;
2665 PREDICT_8x8_LOAD_LEFT;
2666 PREDICT_8x8_LOAD_TOPLEFT;
2667 SRC(0,7)= (l6 + l7 + 1) >> 1;
2668 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2669 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2670 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2671 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2672 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2673 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2674 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2675 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2676 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2677 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2678 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2679 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2680 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2681 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2682 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2683 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2684 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2685 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2686 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2687 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2688 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2690 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2692 PREDICT_8x8_LOAD_TOP;
2693 PREDICT_8x8_LOAD_TOPRIGHT;
2694 SRC(0,0)= (t0 + t1 + 1) >> 1;
2695 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2696 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2697 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2698 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2699 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2700 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2701 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2702 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2703 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2704 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2705 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2706 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2707 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2708 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2709 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2710 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2711 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2712 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2713 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2714 SRC(7,6)= (t10 + t11 + 1) >> 1;
2715 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2717 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2719 PREDICT_8x8_LOAD_LEFT;
2720 SRC(0,0)= (l0 + l1 + 1) >> 1;
2721 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2722 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2723 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2724 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2725 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2726 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2727 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2728 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2729 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2730 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2731 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2732 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2733 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2734 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2735 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2736 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2737 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2739 #undef PREDICT_8x8_LOAD_LEFT
2740 #undef PREDICT_8x8_LOAD_TOP
2741 #undef PREDICT_8x8_LOAD_TOPLEFT
2742 #undef PREDICT_8x8_LOAD_TOPRIGHT
2743 #undef PREDICT_8x8_DC
2749 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2750 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2751 int src_x_offset, int src_y_offset,
2752 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2753 MpegEncContext * const s = &h->s;
2754 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2755 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2756 const int luma_xy= (mx&3) + ((my&3)<<2);
2757 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2758 uint8_t * src_cb, * src_cr;
2759 int extra_width= h->emu_edge_width;
2760 int extra_height= h->emu_edge_height;
2762 const int full_mx= mx>>2;
2763 const int full_my= my>>2;
2764 const int pic_width = 16*s->mb_width;
2765 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2767 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
2770 if(mx&7) extra_width -= 3;
2771 if(my&7) extra_height -= 3;
2773 if( full_mx < 0-extra_width
2774 || full_my < 0-extra_height
2775 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2776 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2777 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2778 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2782 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2784 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2787 if(s->flags&CODEC_FLAG_GRAY) return;
2790 // chroma offset when predicting from a field of opposite parity
2791 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2792 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2794 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2795 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2798 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2799 src_cb= s->edge_emu_buffer;
2801 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2804 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2805 src_cr= s->edge_emu_buffer;
2807 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2810 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2811 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2812 int x_offset, int y_offset,
2813 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2814 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2815 int list0, int list1){
2816 MpegEncContext * const s = &h->s;
2817 qpel_mc_func *qpix_op= qpix_put;
2818 h264_chroma_mc_func chroma_op= chroma_put;
2820 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2821 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2822 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2823 x_offset += 8*s->mb_x;
2824 y_offset += 8*(s->mb_y >> MB_MBAFF);
2827 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2828 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2829 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2830 qpix_op, chroma_op);
2833 chroma_op= chroma_avg;
2837 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2838 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2839 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2840 qpix_op, chroma_op);
2844 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2845 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2846 int x_offset, int y_offset,
2847 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2848 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2849 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2850 int list0, int list1){
2851 MpegEncContext * const s = &h->s;
2853 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2854 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2855 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2856 x_offset += 8*s->mb_x;
2857 y_offset += 8*(s->mb_y >> MB_MBAFF);
2860 /* don't optimize for luma-only case, since B-frames usually
2861 * use implicit weights => chroma too. */
2862 uint8_t *tmp_cb = s->obmc_scratchpad;
2863 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2864 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2865 int refn0 = h->ref_cache[0][ scan8[n] ];
2866 int refn1 = h->ref_cache[1][ scan8[n] ];
2868 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2869 dest_y, dest_cb, dest_cr,
2870 x_offset, y_offset, qpix_put, chroma_put);
2871 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2872 tmp_y, tmp_cb, tmp_cr,
2873 x_offset, y_offset, qpix_put, chroma_put);
2875 if(h->use_weight == 2){
2876 int weight0 = h->implicit_weight[refn0][refn1];
2877 int weight1 = 64 - weight0;
2878 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2879 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
2880 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
2882 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
2883 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2884 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
2885 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2886 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2887 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
2888 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2889 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2890 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
2893 int list = list1 ? 1 : 0;
2894 int refn = h->ref_cache[list][ scan8[n] ];
2895 Picture *ref= &h->ref_list[list][refn];
2896 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2897 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2898 qpix_put, chroma_put);
2900 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
2901 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2902 if(h->use_weight_chroma){
2903 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2904 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2905 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2906 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2911 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2912 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2913 int x_offset, int y_offset,
2914 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2915 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2916 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2917 int list0, int list1){
2918 if((h->use_weight==2 && list0 && list1
2919 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2920 || h->use_weight==1)
2921 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2922 x_offset, y_offset, qpix_put, chroma_put,
2923 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2925 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2926 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2929 static inline void prefetch_motion(H264Context *h, int list){
2930 /* fetch pixels for estimated mv 4 macroblocks ahead
2931 * optimized for 64byte cache lines */
2932 MpegEncContext * const s = &h->s;
2933 const int refn = h->ref_cache[list][scan8[0]];
2935 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
2936 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
2937 uint8_t **src= h->ref_list[list][refn].data;
2938 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
2939 s->dsp.prefetch(src[0]+off, s->linesize, 4);
2940 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
2941 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
2945 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2946 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2947 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2948 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2949 MpegEncContext * const s = &h->s;
2950 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2951 const int mb_type= s->current_picture.mb_type[mb_xy];
2953 assert(IS_INTER(mb_type));
2955 prefetch_motion(h, 0);
2957 if(IS_16X16(mb_type)){
2958 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2959 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2960 &weight_op[0], &weight_avg[0],
2961 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2962 }else if(IS_16X8(mb_type)){
2963 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2964 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2965 &weight_op[1], &weight_avg[1],
2966 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2967 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2968 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2969 &weight_op[1], &weight_avg[1],
2970 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2971 }else if(IS_8X16(mb_type)){
2972 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
2973 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2974 &weight_op[2], &weight_avg[2],
2975 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2976 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
2977 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2978 &weight_op[2], &weight_avg[2],
2979 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2983 assert(IS_8X8(mb_type));
2986 const int sub_mb_type= h->sub_mb_type[i];
2988 int x_offset= (i&1)<<2;
2989 int y_offset= (i&2)<<1;
2991 if(IS_SUB_8X8(sub_mb_type)){
2992 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2993 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2994 &weight_op[3], &weight_avg[3],
2995 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2996 }else if(IS_SUB_8X4(sub_mb_type)){
2997 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2998 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2999 &weight_op[4], &weight_avg[4],
3000 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3001 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
3002 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3003 &weight_op[4], &weight_avg[4],
3004 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3005 }else if(IS_SUB_4X8(sub_mb_type)){
3006 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3007 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3008 &weight_op[5], &weight_avg[5],
3009 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3010 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
3011 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3012 &weight_op[5], &weight_avg[5],
3013 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3016 assert(IS_SUB_4X4(sub_mb_type));
3018 int sub_x_offset= x_offset + 2*(j&1);
3019 int sub_y_offset= y_offset + (j&2);
3020 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
3021 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3022 &weight_op[6], &weight_avg[6],
3023 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3029 prefetch_motion(h, 1);
3032 static void decode_init_vlc(void){
3033 static int done = 0;
3039 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
3040 &chroma_dc_coeff_token_len [0], 1, 1,
3041 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
3044 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
3045 &coeff_token_len [i][0], 1, 1,
3046 &coeff_token_bits[i][0], 1, 1, 1);
3050 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
3051 &chroma_dc_total_zeros_len [i][0], 1, 1,
3052 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
3054 for(i=0; i<15; i++){
3055 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
3056 &total_zeros_len [i][0], 1, 1,
3057 &total_zeros_bits[i][0], 1, 1, 1);
3061 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
3062 &run_len [i][0], 1, 1,
3063 &run_bits[i][0], 1, 1, 1);
3065 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
3066 &run_len [6][0], 1, 1,
3067 &run_bits[6][0], 1, 1, 1);
3072 * Sets the intra prediction function pointers.
3074 static void init_pred_ptrs(H264Context *h){
3075 // MpegEncContext * const s = &h->s;
3077 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
3078 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
3079 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
3080 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
3081 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
3082 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
3083 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
3084 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
3085 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
3086 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
3087 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
3088 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
3090 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
3091 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
3092 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
3093 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
3094 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
3095 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
3096 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
3097 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
3098 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
3099 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
3100 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
3101 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
3103 h->pred8x8[DC_PRED8x8 ]= ff_pred8x8_dc_c;
3104 h->pred8x8[VERT_PRED8x8 ]= ff_pred8x8_vertical_c;
3105 h->pred8x8[HOR_PRED8x8 ]= ff_pred8x8_horizontal_c;
3106 h->pred8x8[PLANE_PRED8x8 ]= ff_pred8x8_plane_c;
3107 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
3108 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
3109 h->pred8x8[DC_128_PRED8x8 ]= ff_pred8x8_128_dc_c;
3111 h->pred16x16[DC_PRED8x8 ]= ff_pred16x16_dc_c;
3112 h->pred16x16[VERT_PRED8x8 ]= ff_pred16x16_vertical_c;
3113 h->pred16x16[HOR_PRED8x8 ]= ff_pred16x16_horizontal_c;
3114 h->pred16x16[PLANE_PRED8x8 ]= ff_pred16x16_plane_c;
3115 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
3116 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
3117 h->pred16x16[DC_128_PRED8x8 ]= ff_pred16x16_128_dc_c;
3120 static void free_tables(H264Context *h){
3121 av_freep(&h->intra4x4_pred_mode);
3122 av_freep(&h->chroma_pred_mode_table);
3123 av_freep(&h->cbp_table);
3124 av_freep(&h->mvd_table[0]);
3125 av_freep(&h->mvd_table[1]);
3126 av_freep(&h->direct_table);
3127 av_freep(&h->non_zero_count);
3128 av_freep(&h->slice_table_base);
3129 av_freep(&h->top_borders[1]);
3130 av_freep(&h->top_borders[0]);
3131 h->slice_table= NULL;
3133 av_freep(&h->mb2b_xy);
3134 av_freep(&h->mb2b8_xy);
3136 av_freep(&h->s.obmc_scratchpad);
3139 static void init_dequant8_coeff_table(H264Context *h){
3141 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
3142 h->dequant8_coeff[0] = h->dequant8_buffer[0];
3143 h->dequant8_coeff[1] = h->dequant8_buffer[1];
3145 for(i=0; i<2; i++ ){
3146 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
3147 h->dequant8_coeff[1] = h->dequant8_buffer[0];
3151 for(q=0; q<52; q++){
3152 int shift = ff_div6[q];
3153 int idx = ff_rem6[q];
3155 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
3156 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
3157 h->pps.scaling_matrix8[i][x]) << shift;
3162 static void init_dequant4_coeff_table(H264Context *h){
3164 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
3165 for(i=0; i<6; i++ ){
3166 h->dequant4_coeff[i] = h->dequant4_buffer[i];
3168 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
3169 h->dequant4_coeff[i] = h->dequant4_buffer[j];
3176 for(q=0; q<52; q++){
3177 int shift = ff_div6[q] + 2;
3178 int idx = ff_rem6[q];
3180 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
3181 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
3182 h->pps.scaling_matrix4[i][x]) << shift;
3187 static void init_dequant_tables(H264Context *h){
3189 init_dequant4_coeff_table(h);
3190 if(h->pps.transform_8x8_mode)
3191 init_dequant8_coeff_table(h);
3192 if(h->sps.transform_bypass){
3195 h->dequant4_coeff[i][0][x] = 1<<6;
3196 if(h->pps.transform_8x8_mode)
3199 h->dequant8_coeff[i][0][x] = 1<<6;
3206 * needs width/height
3208 static int alloc_tables(H264Context *h){
3209 MpegEncContext * const s = &h->s;
3210 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3213 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3215 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3216 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
3217 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3218 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3219 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3221 if( h->pps.cabac ) {
3222 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3223 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3224 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3225 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3228 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
3229 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
3231 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3232 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3233 for(y=0; y<s->mb_height; y++){
3234 for(x=0; x<s->mb_width; x++){
3235 const int mb_xy= x + y*s->mb_stride;
3236 const int b_xy = 4*x + 4*y*h->b_stride;
3237 const int b8_xy= 2*x + 2*y*h->b8_stride;
3239 h->mb2b_xy [mb_xy]= b_xy;
3240 h->mb2b8_xy[mb_xy]= b8_xy;
3244 s->obmc_scratchpad = NULL;
3246 if(!h->dequant4_coeff[0])
3247 init_dequant_tables(h);
3255 static void common_init(H264Context *h){
3256 MpegEncContext * const s = &h->s;
3258 s->width = s->avctx->width;
3259 s->height = s->avctx->height;
3260 s->codec_id= s->avctx->codec->id;
3264 h->dequant_coeff_pps= -1;
3265 s->unrestricted_mv=1;
3266 s->decode=1; //FIXME
3268 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3269 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3272 static int decode_init(AVCodecContext *avctx){
3273 H264Context *h= avctx->priv_data;
3274 MpegEncContext * const s = &h->s;
3276 MPV_decode_defaults(s);
3281 s->out_format = FMT_H264;
3282 s->workaround_bugs= avctx->workaround_bugs;
3285 // s->decode_mb= ff_h263_decode_mb;
3287 avctx->pix_fmt= PIX_FMT_YUV420P;
3291 if(avctx->extradata_size > 0 && avctx->extradata &&
3292 *(char *)avctx->extradata == 1){
3302 static int frame_start(H264Context *h){
3303 MpegEncContext * const s = &h->s;
3306 if(MPV_frame_start(s, s->avctx) < 0)
3308 ff_er_frame_start(s);
3310 assert(s->linesize && s->uvlinesize);
3312 for(i=0; i<16; i++){
3313 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3314 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3317 h->block_offset[16+i]=
3318 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3319 h->block_offset[24+16+i]=
3320 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3323 /* can't be in alloc_tables because linesize isn't known there.
3324 * FIXME: redo bipred weight to not require extra buffer? */
3325 if(!s->obmc_scratchpad)
3326 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
3328 /* some macroblocks will be accessed before they're available */
3330 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
3332 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3336 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3337 MpegEncContext * const s = &h->s;
3341 src_cb -= uvlinesize;
3342 src_cr -= uvlinesize;
3344 // There are two lines saved, the line above the the top macroblock of a pair,
3345 // and the line above the bottom macroblock
3346 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3347 for(i=1; i<17; i++){
3348 h->left_border[i]= src_y[15+i* linesize];
3351 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3352 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3354 if(!(s->flags&CODEC_FLAG_GRAY)){
3355 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3356 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3358 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3359 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3361 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3362 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3366 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3367 MpegEncContext * const s = &h->s;
3370 int deblock_left = (s->mb_x > 0);
3371 int deblock_top = (s->mb_y > 0);
3373 src_y -= linesize + 1;
3374 src_cb -= uvlinesize + 1;
3375 src_cr -= uvlinesize + 1;
3377 #define XCHG(a,b,t,xchg)\
3384 for(i = !deblock_top; i<17; i++){
3385 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3390 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3391 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3392 if(s->mb_x+1 < s->mb_width){
3393 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3397 if(!(s->flags&CODEC_FLAG_GRAY)){
3399 for(i = !deblock_top; i<9; i++){
3400 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3401 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3405 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3406 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3411 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3412 MpegEncContext * const s = &h->s;
3415 src_y -= 2 * linesize;
3416 src_cb -= 2 * uvlinesize;
3417 src_cr -= 2 * uvlinesize;
3419 // There are two lines saved, the line above the the top macroblock of a pair,
3420 // and the line above the bottom macroblock
3421 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3422 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3423 for(i=2; i<34; i++){
3424 h->left_border[i]= src_y[15+i* linesize];
3427 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3428 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3429 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3430 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3432 if(!(s->flags&CODEC_FLAG_GRAY)){
3433 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3434 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3435 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3436 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3437 for(i=2; i<18; i++){
3438 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3439 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3441 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3442 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3443 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3444 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3448 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3449 MpegEncContext * const s = &h->s;
3452 int deblock_left = (s->mb_x > 0);
3453 int deblock_top = (s->mb_y > 1);
3455 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3457 src_y -= 2 * linesize + 1;
3458 src_cb -= 2 * uvlinesize + 1;
3459 src_cr -= 2 * uvlinesize + 1;
3461 #define XCHG(a,b,t,xchg)\
3468 for(i = (!deblock_top)<<1; i<34; i++){
3469 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3474 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3475 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3476 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3477 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3478 if(s->mb_x+1 < s->mb_width){
3479 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3480 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3484 if(!(s->flags&CODEC_FLAG_GRAY)){
3486 for(i = (!deblock_top) << 1; i<18; i++){
3487 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3488 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3492 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3493 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3494 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3495 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3500 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
3501 MpegEncContext * const s = &h->s;
3502 const int mb_x= s->mb_x;
3503 const int mb_y= s->mb_y;
3504 const int mb_xy= mb_x + mb_y*s->mb_stride;
3505 const int mb_type= s->current_picture.mb_type[mb_xy];
3506 uint8_t *dest_y, *dest_cb, *dest_cr;
3507 int linesize, uvlinesize /*dct_offset*/;
3509 int *block_offset = &h->block_offset[0];
3510 const unsigned int bottom = mb_y & 1;
3511 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
3512 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3513 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3515 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3516 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3517 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3519 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
3520 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
3522 if (!simple && MB_FIELD) {
3523 linesize = h->mb_linesize = s->linesize * 2;
3524 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3525 block_offset = &h->block_offset[24];
3526 if(mb_y&1){ //FIXME move out of this func?
3527 dest_y -= s->linesize*15;
3528 dest_cb-= s->uvlinesize*7;
3529 dest_cr-= s->uvlinesize*7;
3533 for(list=0; list<h->list_count; list++){
3534 if(!USES_LIST(mb_type, list))
3536 if(IS_16X16(mb_type)){
3537 int8_t *ref = &h->ref_cache[list][scan8[0]];
3538 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3540 for(i=0; i<16; i+=4){
3541 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3542 int ref = h->ref_cache[list][scan8[i]];
3544 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3550 linesize = h->mb_linesize = s->linesize;
3551 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3552 // dct_offset = s->linesize * 16;
3555 if(transform_bypass){
3557 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3558 }else if(IS_8x8DCT(mb_type)){
3559 idct_dc_add = s->dsp.h264_idct8_dc_add;
3560 idct_add = s->dsp.h264_idct8_add;
3562 idct_dc_add = s->dsp.h264_idct_dc_add;
3563 idct_add = s->dsp.h264_idct_add;
3566 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3567 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3568 int mbt_y = mb_y&~1;
3569 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3570 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3571 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3572 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3575 if (!simple && IS_INTRA_PCM(mb_type)) {
3578 // The pixels are stored in h->mb array in the same order as levels,
3579 // copy them in output in the correct order.
3580 for(i=0; i<16; i++) {
3581 for (y=0; y<4; y++) {
3582 for (x=0; x<4; x++) {
3583 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3587 for(i=16; i<16+4; i++) {
3588 for (y=0; y<4; y++) {
3589 for (x=0; x<4; x++) {
3590 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3594 for(i=20; i<20+4; i++) {
3595 for (y=0; y<4; y++) {
3596 for (x=0; x<4; x++) {
3597 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3602 if(IS_INTRA(mb_type)){
3603 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
3604 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3606 if(simple || !(s->flags&CODEC_FLAG_GRAY)){
3607 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3608 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3611 if(IS_INTRA4x4(mb_type)){
3612 if(simple || !s->encoding){
3613 if(IS_8x8DCT(mb_type)){
3614 for(i=0; i<16; i+=4){
3615 uint8_t * const ptr= dest_y + block_offset[i];
3616 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3617 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3618 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3619 (h->topright_samples_available<<i)&0x4000, linesize);
3621 if(nnz == 1 && h->mb[i*16])
3622 idct_dc_add(ptr, h->mb + i*16, linesize);
3624 idct_add(ptr, h->mb + i*16, linesize);
3628 for(i=0; i<16; i++){
3629 uint8_t * const ptr= dest_y + block_offset[i];
3631 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3634 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3635 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3636 assert(mb_y || linesize <= block_offset[i]);
3637 if(!topright_avail){
3638 tr= ptr[3 - linesize]*0x01010101;
3639 topright= (uint8_t*) &tr;
3641 topright= ptr + 4 - linesize;
3645 h->pred4x4[ dir ](ptr, topright, linesize);
3646 nnz = h->non_zero_count_cache[ scan8[i] ];
3649 if(nnz == 1 && h->mb[i*16])
3650 idct_dc_add(ptr, h->mb + i*16, linesize);
3652 idct_add(ptr, h->mb + i*16, linesize);
3654 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3659 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3661 if(!transform_bypass)
3662 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3664 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3666 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
3667 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3669 hl_motion(h, dest_y, dest_cb, dest_cr,
3670 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
3671 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
3672 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3676 if(!IS_INTRA4x4(mb_type)){
3678 if(IS_INTRA16x16(mb_type)){
3679 for(i=0; i<16; i++){
3680 if(h->non_zero_count_cache[ scan8[i] ])
3681 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3682 else if(h->mb[i*16])
3683 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3686 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3687 for(i=0; i<16; i+=di){
3688 int nnz = h->non_zero_count_cache[ scan8[i] ];
3690 if(nnz==1 && h->mb[i*16])
3691 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3693 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3698 for(i=0; i<16; i++){
3699 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3700 uint8_t * const ptr= dest_y + block_offset[i];
3701 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3707 if(simple || !(s->flags&CODEC_FLAG_GRAY)){
3708 uint8_t *dest[2] = {dest_cb, dest_cr};
3709 if(transform_bypass){
3710 idct_add = idct_dc_add = s->dsp.add_pixels4;
3712 idct_add = s->dsp.h264_idct_add;
3713 idct_dc_add = s->dsp.h264_idct_dc_add;
3714 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3715 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3718 for(i=16; i<16+8; i++){
3719 if(h->non_zero_count_cache[ scan8[i] ])
3720 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3721 else if(h->mb[i*16])
3722 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3725 for(i=16; i<16+8; i++){
3726 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3727 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3728 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3734 if(h->deblocking_filter) {
3735 if (!simple && FRAME_MBAFF) {
3736 //FIXME try deblocking one mb at a time?
3737 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3738 const int mb_y = s->mb_y - 1;
3739 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3740 const int mb_xy= mb_x + mb_y*s->mb_stride;
3741 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3742 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3743 if (!bottom) return;
3744 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3745 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3746 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3748 if(IS_INTRA(mb_type_top | mb_type_bottom))
3749 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3751 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3755 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3756 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3757 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
3758 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3761 tprintf(h->s.avctx, "call mbaff filter_mb\n");
3762 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3763 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3764 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3766 tprintf(h->s.avctx, "call filter_mb\n");
3767 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3768 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3769 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3775 * Process a macroblock; this case avoids checks for expensive uncommon cases.
3777 static void hl_decode_mb_simple(H264Context *h){
3778 hl_decode_mb_internal(h, 1);
3782 * Process a macroblock; this handles edge cases, such as interlacing.
3784 static void av_noinline hl_decode_mb_complex(H264Context *h){
3785 hl_decode_mb_internal(h, 0);
3788 static void hl_decode_mb(H264Context *h){
3789 MpegEncContext * const s = &h->s;
3790 const int mb_x= s->mb_x;
3791 const int mb_y= s->mb_y;
3792 const int mb_xy= mb_x + mb_y*s->mb_stride;
3793 const int mb_type= s->current_picture.mb_type[mb_xy];
3794 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (s->flags&CODEC_FLAG_GRAY) || s->encoding;
3800 hl_decode_mb_complex(h);
3801 else hl_decode_mb_simple(h);
3805 * fills the default_ref_list.
3807 static int fill_default_ref_list(H264Context *h){
3808 MpegEncContext * const s = &h->s;
3810 int smallest_poc_greater_than_current = -1;
3811 Picture sorted_short_ref[32];
3813 if(h->slice_type==B_TYPE){
3817 /* sort frame according to poc in B slice */
3818 for(out_i=0; out_i<h->short_ref_count; out_i++){
3820 int best_poc=INT_MAX;
3822 for(i=0; i<h->short_ref_count; i++){
3823 const int poc= h->short_ref[i]->poc;
3824 if(poc > limit && poc < best_poc){
3830 assert(best_i != INT_MIN);
3833 sorted_short_ref[out_i]= *h->short_ref[best_i];
3834 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3835 if (-1 == smallest_poc_greater_than_current) {
3836 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3837 smallest_poc_greater_than_current = out_i;
3843 if(s->picture_structure == PICT_FRAME){
3844 if(h->slice_type==B_TYPE){
3846 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3848 // find the largest poc
3849 for(list=0; list<2; list++){
3852 int step= list ? -1 : 1;
3854 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3855 while(j<0 || j>= h->short_ref_count){
3856 if(j != -99 && step == (list ? -1 : 1))
3859 j= smallest_poc_greater_than_current + (step>>1);
3861 if(sorted_short_ref[j].reference != 3) continue;
3862 h->default_ref_list[list][index ]= sorted_short_ref[j];
3863 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3866 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3867 if(h->long_ref[i] == NULL) continue;
3868 if(h->long_ref[i]->reference != 3) continue;
3870 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3871 h->default_ref_list[ list ][index++].pic_id= i;;
3874 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3875 // swap the two first elements of L1 when
3876 // L0 and L1 are identical
3877 Picture temp= h->default_ref_list[1][0];
3878 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3879 h->default_ref_list[1][1] = temp;
3882 if(index < h->ref_count[ list ])
3883 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3887 for(i=0; i<h->short_ref_count; i++){
3888 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3889 h->default_ref_list[0][index ]= *h->short_ref[i];
3890 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3892 for(i = 0; i < 16; i++){
3893 if(h->long_ref[i] == NULL) continue;
3894 if(h->long_ref[i]->reference != 3) continue;
3895 h->default_ref_list[0][index ]= *h->long_ref[i];
3896 h->default_ref_list[0][index++].pic_id= i;;
3898 if(index < h->ref_count[0])
3899 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3902 if(h->slice_type==B_TYPE){
3904 //FIXME second field balh
3908 for (i=0; i<h->ref_count[0]; i++) {
3909 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3911 if(h->slice_type==B_TYPE){
3912 for (i=0; i<h->ref_count[1]; i++) {
3913 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3920 static void print_short_term(H264Context *h);
3921 static void print_long_term(H264Context *h);
3923 static int decode_ref_pic_list_reordering(H264Context *h){
3924 MpegEncContext * const s = &h->s;
3927 print_short_term(h);
3929 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3931 for(list=0; list<h->list_count; list++){
3932 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3934 if(get_bits1(&s->gb)){
3935 int pred= h->curr_pic_num;
3937 for(index=0; ; index++){
3938 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3939 unsigned int pic_id;
3941 Picture *ref = NULL;
3943 if(reordering_of_pic_nums_idc==3)
3946 if(index >= h->ref_count[list]){
3947 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3951 if(reordering_of_pic_nums_idc<3){
3952 if(reordering_of_pic_nums_idc<2){
3953 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3955 if(abs_diff_pic_num >= h->max_pic_num){
3956 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3960 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3961 else pred+= abs_diff_pic_num;
3962 pred &= h->max_pic_num - 1;
3964 for(i= h->short_ref_count-1; i>=0; i--){
3965 ref = h->short_ref[i];
3966 assert(ref->reference == 3);
3967 assert(!ref->long_ref);
3968 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3972 ref->pic_id= ref->frame_num;
3974 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3976 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3979 ref = h->long_ref[pic_id];
3981 ref->pic_id= pic_id;
3982 assert(ref->reference == 3);
3983 assert(ref->long_ref);
3991 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3992 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3994 for(i=index; i+1<h->ref_count[list]; i++){
3995 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3998 for(; i > index; i--){
3999 h->ref_list[list][i]= h->ref_list[list][i-1];
4001 h->ref_list[list][index]= *ref;
4004 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
4010 for(list=0; list<h->list_count; list++){
4011 for(index= 0; index < h->ref_count[list]; index++){
4012 if(!h->ref_list[list][index].data[0])
4013 h->ref_list[list][index]= s->current_picture;
4017 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
4018 direct_dist_scale_factor(h);
4019 direct_ref_list_init(h);
4023 static void fill_mbaff_ref_list(H264Context *h){
4025 for(list=0; list<2; list++){ //FIXME try list_count
4026 for(i=0; i<h->ref_count[list]; i++){
4027 Picture *frame = &h->ref_list[list][i];
4028 Picture *field = &h->ref_list[list][16+2*i];
4031 field[0].linesize[j] <<= 1;
4032 field[1] = field[0];
4034 field[1].data[j] += frame->linesize[j];
4036 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
4037 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
4039 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
4040 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
4044 for(j=0; j<h->ref_count[1]; j++){
4045 for(i=0; i<h->ref_count[0]; i++)
4046 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
4047 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
4048 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
4052 static int pred_weight_table(H264Context *h){
4053 MpegEncContext * const s = &h->s;
4055 int luma_def, chroma_def;
4058 h->use_weight_chroma= 0;
4059 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
4060 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
4061 luma_def = 1<<h->luma_log2_weight_denom;
4062 chroma_def = 1<<h->chroma_log2_weight_denom;
4064 for(list=0; list<2; list++){
4065 for(i=0; i<h->ref_count[list]; i++){
4066 int luma_weight_flag, chroma_weight_flag;
4068 luma_weight_flag= get_bits1(&s->gb);
4069 if(luma_weight_flag){
4070 h->luma_weight[list][i]= get_se_golomb(&s->gb);
4071 h->luma_offset[list][i]= get_se_golomb(&s->gb);
4072 if( h->luma_weight[list][i] != luma_def
4073 || h->luma_offset[list][i] != 0)
4076 h->luma_weight[list][i]= luma_def;
4077 h->luma_offset[list][i]= 0;
4080 chroma_weight_flag= get_bits1(&s->gb);
4081 if(chroma_weight_flag){
4084 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
4085 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
4086 if( h->chroma_weight[list][i][j] != chroma_def
4087 || h->chroma_offset[list][i][j] != 0)
4088 h->use_weight_chroma= 1;
4093 h->chroma_weight[list][i][j]= chroma_def;
4094 h->chroma_offset[list][i][j]= 0;
4098 if(h->slice_type != B_TYPE) break;
4100 h->use_weight= h->use_weight || h->use_weight_chroma;
4104 static void implicit_weight_table(H264Context *h){
4105 MpegEncContext * const s = &h->s;
4107 int cur_poc = s->current_picture_ptr->poc;
4109 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
4110 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
4112 h->use_weight_chroma= 0;
4117 h->use_weight_chroma= 2;
4118 h->luma_log2_weight_denom= 5;
4119 h->chroma_log2_weight_denom= 5;
4121 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
4122 int poc0 = h->ref_list[0][ref0].poc;
4123 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
4124 int poc1 = h->ref_list[1][ref1].poc;
4125 int td = av_clip(poc1 - poc0, -128, 127);
4127 int tb = av_clip(cur_poc - poc0, -128, 127);
4128 int tx = (16384 + (FFABS(td) >> 1)) / td;
4129 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
4130 if(dist_scale_factor < -64 || dist_scale_factor > 128)
4131 h->implicit_weight[ref0][ref1] = 32;
4133 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
4135 h->implicit_weight[ref0][ref1] = 32;
4140 static inline void unreference_pic(H264Context *h, Picture *pic){
4143 if(pic == h->delayed_output_pic)
4146 for(i = 0; h->delayed_pic[i]; i++)
4147 if(pic == h->delayed_pic[i]){
4155 * instantaneous decoder refresh.
4157 static void idr(H264Context *h){
4160 for(i=0; i<16; i++){
4161 if (h->long_ref[i] != NULL) {
4162 unreference_pic(h, h->long_ref[i]);
4163 h->long_ref[i]= NULL;
4166 h->long_ref_count=0;
4168 for(i=0; i<h->short_ref_count; i++){
4169 unreference_pic(h, h->short_ref[i]);
4170 h->short_ref[i]= NULL;
4172 h->short_ref_count=0;
4175 /* forget old pics after a seek */
4176 static void flush_dpb(AVCodecContext *avctx){
4177 H264Context *h= avctx->priv_data;
4179 for(i=0; i<16; i++) {
4180 if(h->delayed_pic[i])
4181 h->delayed_pic[i]->reference= 0;
4182 h->delayed_pic[i]= NULL;
4184 if(h->delayed_output_pic)
4185 h->delayed_output_pic->reference= 0;
4186 h->delayed_output_pic= NULL;
4188 if(h->s.current_picture_ptr)
4189 h->s.current_picture_ptr->reference= 0;
4194 * @return the removed picture or NULL if an error occurs
4196 static Picture * remove_short(H264Context *h, int frame_num){
4197 MpegEncContext * const s = &h->s;
4200 if(s->avctx->debug&FF_DEBUG_MMCO)
4201 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
4203 for(i=0; i<h->short_ref_count; i++){
4204 Picture *pic= h->short_ref[i];
4205 if(s->avctx->debug&FF_DEBUG_MMCO)
4206 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
4207 if(pic->frame_num == frame_num){
4208 h->short_ref[i]= NULL;
4209 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
4210 h->short_ref_count--;
4219 * @return the removed picture or NULL if an error occurs
4221 static Picture * remove_long(H264Context *h, int i){
4224 pic= h->long_ref[i];
4225 h->long_ref[i]= NULL;
4226 if(pic) h->long_ref_count--;
4232 * print short term list
4234 static void print_short_term(H264Context *h) {
4236 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4237 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
4238 for(i=0; i<h->short_ref_count; i++){
4239 Picture *pic= h->short_ref[i];
4240 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4246 * print long term list
4248 static void print_long_term(H264Context *h) {
4250 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4251 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
4252 for(i = 0; i < 16; i++){
4253 Picture *pic= h->long_ref[i];
4255 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4262 * Executes the reference picture marking (memory management control operations).
4264 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
4265 MpegEncContext * const s = &h->s;
4267 int current_is_long=0;
4270 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
4271 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4273 for(i=0; i<mmco_count; i++){
4274 if(s->avctx->debug&FF_DEBUG_MMCO)
4275 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4277 switch(mmco[i].opcode){
4278 case MMCO_SHORT2UNUSED:
4279 pic= remove_short(h, mmco[i].short_frame_num);
4281 unreference_pic(h, pic);
4282 else if(s->avctx->debug&FF_DEBUG_MMCO)
4283 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4285 case MMCO_SHORT2LONG:
4286 pic= remove_long(h, mmco[i].long_index);
4287 if(pic) unreference_pic(h, pic);
4289 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4290 if (h->long_ref[ mmco[i].long_index ]){
4291 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4292 h->long_ref_count++;
4295 case MMCO_LONG2UNUSED:
4296 pic= remove_long(h, mmco[i].long_index);
4298 unreference_pic(h, pic);
4299 else if(s->avctx->debug&FF_DEBUG_MMCO)
4300 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4303 pic= remove_long(h, mmco[i].long_index);
4304 if(pic) unreference_pic(h, pic);
4306 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4307 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4308 h->long_ref_count++;
4312 case MMCO_SET_MAX_LONG:
4313 assert(mmco[i].long_index <= 16);
4314 // just remove the long term which index is greater than new max
4315 for(j = mmco[i].long_index; j<16; j++){
4316 pic = remove_long(h, j);
4317 if (pic) unreference_pic(h, pic);
4321 while(h->short_ref_count){
4322 pic= remove_short(h, h->short_ref[0]->frame_num);
4323 if(pic) unreference_pic(h, pic);
4325 for(j = 0; j < 16; j++) {
4326 pic= remove_long(h, j);
4327 if(pic) unreference_pic(h, pic);
4334 if(!current_is_long){
4335 pic= remove_short(h, s->current_picture_ptr->frame_num);
4337 unreference_pic(h, pic);
4338 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4341 if(h->short_ref_count)
4342 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4344 h->short_ref[0]= s->current_picture_ptr;
4345 h->short_ref[0]->long_ref=0;
4346 h->short_ref_count++;
4349 print_short_term(h);
4354 static int decode_ref_pic_marking(H264Context *h){
4355 MpegEncContext * const s = &h->s;
4358 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4359 s->broken_link= get_bits1(&s->gb) -1;
4360 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4361 if(h->mmco[0].long_index == -1)
4364 h->mmco[0].opcode= MMCO_LONG;
4368 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4369 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4370 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4372 h->mmco[i].opcode= opcode;
4373 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4374 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4375 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4376 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4380 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4381 unsigned int long_index= get_ue_golomb(&s->gb);
4382 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ long_index >= 16){
4383 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4386 h->mmco[i].long_index= long_index;
4389 if(opcode > (unsigned)MMCO_LONG){
4390 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4393 if(opcode == MMCO_END)
4398 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4400 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4401 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4402 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4412 static int init_poc(H264Context *h){
4413 MpegEncContext * const s = &h->s;
4414 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4417 if(h->nal_unit_type == NAL_IDR_SLICE){
4418 h->frame_num_offset= 0;
4420 if(h->frame_num < h->prev_frame_num)
4421 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4423 h->frame_num_offset= h->prev_frame_num_offset;
4426 if(h->sps.poc_type==0){
4427 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4429 if(h->nal_unit_type == NAL_IDR_SLICE){
4434 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4435 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4436 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4437 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4439 h->poc_msb = h->prev_poc_msb;
4440 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4442 field_poc[1] = h->poc_msb + h->poc_lsb;
4443 if(s->picture_structure == PICT_FRAME)
4444 field_poc[1] += h->delta_poc_bottom;
4445 }else if(h->sps.poc_type==1){
4446 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4449 if(h->sps.poc_cycle_length != 0)
4450 abs_frame_num = h->frame_num_offset + h->frame_num;
4454 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4457 expected_delta_per_poc_cycle = 0;
4458 for(i=0; i < h->sps.poc_cycle_length; i++)
4459 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4461 if(abs_frame_num > 0){
4462 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4463 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4465 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4466 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4467 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4471 if(h->nal_ref_idc == 0)
4472 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4474 field_poc[0] = expectedpoc + h->delta_poc[0];
4475 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4477 if(s->picture_structure == PICT_FRAME)
4478 field_poc[1] += h->delta_poc[1];
4481 if(h->nal_unit_type == NAL_IDR_SLICE){
4484 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4485 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4491 if(s->picture_structure != PICT_BOTTOM_FIELD)
4492 s->current_picture_ptr->field_poc[0]= field_poc[0];
4493 if(s->picture_structure != PICT_TOP_FIELD)
4494 s->current_picture_ptr->field_poc[1]= field_poc[1];
4495 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4496 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4502 * decodes a slice header.
4503 * this will allso call MPV_common_init() and frame_start() as needed
4505 static int decode_slice_header(H264Context *h){
4506 MpegEncContext * const s = &h->s;
4507 unsigned int first_mb_in_slice;
4508 unsigned int pps_id;
4509 int num_ref_idx_active_override_flag;
4510 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4511 unsigned int slice_type, tmp;
4512 int default_ref_list_done = 0;
4514 s->current_picture.reference= h->nal_ref_idc != 0;
4515 s->dropable= h->nal_ref_idc == 0;
4517 first_mb_in_slice= get_ue_golomb(&s->gb);
4519 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
4521 s->current_picture_ptr= NULL;
4524 slice_type= get_ue_golomb(&s->gb);
4526 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4531 h->slice_type_fixed=1;
4533 h->slice_type_fixed=0;
4535 slice_type= slice_type_map[ slice_type ];
4536 if (slice_type == I_TYPE
4537 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4538 default_ref_list_done = 1;
4540 h->slice_type= slice_type;
4542 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4544 pps_id= get_ue_golomb(&s->gb);
4545 if(pps_id>=MAX_PPS_COUNT){
4546 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4549 h->pps= h->pps_buffer[pps_id];
4550 if(h->pps.slice_group_count == 0){
4551 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4555 h->sps= h->sps_buffer[ h->pps.sps_id ];
4556 if(h->sps.log2_max_frame_num == 0){
4557 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4561 if(h->dequant_coeff_pps != pps_id){
4562 h->dequant_coeff_pps = pps_id;
4563 init_dequant_tables(h);
4566 s->mb_width= h->sps.mb_width;
4567 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4569 h->b_stride= s->mb_width*4;
4570 h->b8_stride= s->mb_width*2;
4572 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4573 if(h->sps.frame_mbs_only_flag)
4574 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4576 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4578 if (s->context_initialized
4579 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4583 if (!s->context_initialized) {
4584 if (MPV_common_init(s) < 0)
4587 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4588 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4589 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4592 for(i=0; i<16; i++){
4593 #define T(x) (x>>2) | ((x<<2) & 0xF)
4594 h->zigzag_scan[i] = T(zigzag_scan[i]);
4595 h-> field_scan[i] = T( field_scan[i]);
4599 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4600 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4601 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4602 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4603 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4606 for(i=0; i<64; i++){
4607 #define T(x) (x>>3) | ((x&7)<<3)
4608 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4609 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4610 h->field_scan8x8[i] = T(field_scan8x8[i]);
4611 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4615 if(h->sps.transform_bypass){ //FIXME same ugly
4616 h->zigzag_scan_q0 = zigzag_scan;
4617 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4618 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4619 h->field_scan_q0 = field_scan;
4620 h->field_scan8x8_q0 = field_scan8x8;
4621 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4623 h->zigzag_scan_q0 = h->zigzag_scan;
4624 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4625 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4626 h->field_scan_q0 = h->field_scan;
4627 h->field_scan8x8_q0 = h->field_scan8x8;
4628 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4633 s->avctx->width = s->width;
4634 s->avctx->height = s->height;
4635 s->avctx->sample_aspect_ratio= h->sps.sar;
4636 if(!s->avctx->sample_aspect_ratio.den)
4637 s->avctx->sample_aspect_ratio.den = 1;
4639 if(h->sps.timing_info_present_flag){
4640 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4641 if(h->x264_build > 0 && h->x264_build < 44)
4642 s->avctx->time_base.den *= 2;
4643 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4644 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4648 if(h->slice_num == 0){
4649 if(frame_start(h) < 0)
4653 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4654 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4657 h->mb_aff_frame = 0;
4658 if(h->sps.frame_mbs_only_flag){
4659 s->picture_structure= PICT_FRAME;
4661 if(get_bits1(&s->gb)) { //field_pic_flag
4662 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4663 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4665 s->picture_structure= PICT_FRAME;
4666 h->mb_aff_frame = h->sps.mb_aff;
4669 assert(s->mb_num == s->mb_width * s->mb_height);
4670 if(first_mb_in_slice << h->mb_aff_frame >= s->mb_num ||
4671 first_mb_in_slice >= s->mb_num){
4672 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4675 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4676 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4677 assert(s->mb_y < s->mb_height);
4679 if(s->picture_structure==PICT_FRAME){
4680 h->curr_pic_num= h->frame_num;
4681 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4683 h->curr_pic_num= 2*h->frame_num;
4684 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4687 if(h->nal_unit_type == NAL_IDR_SLICE){
4688 get_ue_golomb(&s->gb); /* idr_pic_id */
4691 if(h->sps.poc_type==0){
4692 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4694 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4695 h->delta_poc_bottom= get_se_golomb(&s->gb);
4699 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4700 h->delta_poc[0]= get_se_golomb(&s->gb);
4702 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4703 h->delta_poc[1]= get_se_golomb(&s->gb);
4708 if(h->pps.redundant_pic_cnt_present){
4709 h->redundant_pic_count= get_ue_golomb(&s->gb);
4712 //set defaults, might be overriden a few line later
4713 h->ref_count[0]= h->pps.ref_count[0];
4714 h->ref_count[1]= h->pps.ref_count[1];
4716 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4717 if(h->slice_type == B_TYPE){
4718 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4719 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4720 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4722 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4724 if(num_ref_idx_active_override_flag){
4725 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4726 if(h->slice_type==B_TYPE)
4727 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4729 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4730 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4731 h->ref_count[0]= h->ref_count[1]= 1;
4735 if(h->slice_type == B_TYPE)
4742 if(!default_ref_list_done){
4743 fill_default_ref_list(h);
4746 if(decode_ref_pic_list_reordering(h) < 0)
4749 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4750 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4751 pred_weight_table(h);
4752 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4753 implicit_weight_table(h);
4757 if(s->current_picture.reference)
4758 decode_ref_pic_marking(h);
4761 fill_mbaff_ref_list(h);
4763 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
4764 tmp = get_ue_golomb(&s->gb);
4766 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4769 h->cabac_init_idc= tmp;
4772 h->last_qscale_diff = 0;
4773 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4775 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4779 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4780 //FIXME qscale / qp ... stuff
4781 if(h->slice_type == SP_TYPE){
4782 get_bits1(&s->gb); /* sp_for_switch_flag */
4784 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4785 get_se_golomb(&s->gb); /* slice_qs_delta */
4788 h->deblocking_filter = 1;
4789 h->slice_alpha_c0_offset = 0;
4790 h->slice_beta_offset = 0;
4791 if( h->pps.deblocking_filter_parameters_present ) {
4792 tmp= get_ue_golomb(&s->gb);
4794 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4797 h->deblocking_filter= tmp;
4798 if(h->deblocking_filter < 2)
4799 h->deblocking_filter^= 1; // 1<->0
4801 if( h->deblocking_filter ) {
4802 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4803 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4806 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4807 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4808 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4809 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4810 h->deblocking_filter= 0;
4813 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4814 slice_group_change_cycle= get_bits(&s->gb, ?);
4819 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4820 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4822 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4823 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4825 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4827 av_get_pict_type_char(h->slice_type),
4828 pps_id, h->frame_num,
4829 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4830 h->ref_count[0], h->ref_count[1],
4832 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4834 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4838 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
4839 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4840 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4842 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4843 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4852 static inline int get_level_prefix(GetBitContext *gb){
4856 OPEN_READER(re, gb);
4857 UPDATE_CACHE(re, gb);
4858 buf=GET_CACHE(re, gb);
4860 log= 32 - av_log2(buf);
4862 print_bin(buf>>(32-log), log);
4863 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4866 LAST_SKIP_BITS(re, gb, log);
4867 CLOSE_READER(re, gb);
4872 static inline int get_dct8x8_allowed(H264Context *h){
4875 if(!IS_SUB_8X8(h->sub_mb_type[i])
4876 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4883 * decodes a residual block.
4884 * @param n block index
4885 * @param scantable scantable
4886 * @param max_coeff number of coefficients in the block
4887 * @return <0 if an error occured
4889 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4890 MpegEncContext * const s = &h->s;
4891 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4893 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4895 //FIXME put trailing_onex into the context
4897 if(n == CHROMA_DC_BLOCK_INDEX){
4898 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4899 total_coeff= coeff_token>>2;
4901 if(n == LUMA_DC_BLOCK_INDEX){
4902 total_coeff= pred_non_zero_count(h, 0);
4903 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4904 total_coeff= coeff_token>>2;
4906 total_coeff= pred_non_zero_count(h, n);
4907 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4908 total_coeff= coeff_token>>2;
4909 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4913 //FIXME set last_non_zero?
4917 if(total_coeff > (unsigned)max_coeff) {
4918 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4922 trailing_ones= coeff_token&3;
4923 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4924 assert(total_coeff<=16);
4926 for(i=0; i<trailing_ones; i++){
4927 level[i]= 1 - 2*get_bits1(gb);
4931 int level_code, mask;
4932 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4933 int prefix= get_level_prefix(gb);
4935 //first coefficient has suffix_length equal to 0 or 1
4936 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4938 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4940 level_code= (prefix<<suffix_length); //part
4941 }else if(prefix==14){
4943 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4945 level_code= prefix + get_bits(gb, 4); //part
4946 }else if(prefix==15){
4947 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4948 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4950 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4954 if(trailing_ones < 3) level_code += 2;
4959 mask= -(level_code&1);
4960 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4963 //remaining coefficients have suffix_length > 0
4964 for(;i<total_coeff;i++) {
4965 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4966 prefix = get_level_prefix(gb);
4968 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4969 }else if(prefix==15){
4970 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4972 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4975 mask= -(level_code&1);
4976 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4977 if(level_code > suffix_limit[suffix_length])
4982 if(total_coeff == max_coeff)
4985 if(n == CHROMA_DC_BLOCK_INDEX)
4986 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4988 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4991 coeff_num = zeros_left + total_coeff - 1;
4992 j = scantable[coeff_num];
4994 block[j] = level[0];
4995 for(i=1;i<total_coeff;i++) {
4998 else if(zeros_left < 7){
4999 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5001 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5003 zeros_left -= run_before;
5004 coeff_num -= 1 + run_before;
5005 j= scantable[ coeff_num ];
5010 block[j] = (level[0] * qmul[j] + 32)>>6;
5011 for(i=1;i<total_coeff;i++) {
5014 else if(zeros_left < 7){
5015 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5017 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5019 zeros_left -= run_before;
5020 coeff_num -= 1 + run_before;
5021 j= scantable[ coeff_num ];
5023 block[j]= (level[i] * qmul[j] + 32)>>6;
5028 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
5035 static void predict_field_decoding_flag(H264Context *h){
5036 MpegEncContext * const s = &h->s;
5037 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5038 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
5039 ? s->current_picture.mb_type[mb_xy-1]
5040 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
5041 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
5043 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
5047 * decodes a P_SKIP or B_SKIP macroblock
5049 static void decode_mb_skip(H264Context *h){
5050 MpegEncContext * const s = &h->s;
5051 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5054 memset(h->non_zero_count[mb_xy], 0, 16);
5055 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
5058 mb_type|= MB_TYPE_INTERLACED;
5060 if( h->slice_type == B_TYPE )
5062 // just for fill_caches. pred_direct_motion will set the real mb_type
5063 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
5065 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5066 pred_direct_motion(h, &mb_type);
5067 mb_type|= MB_TYPE_SKIP;
5072 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
5074 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5075 pred_pskip_motion(h, &mx, &my);
5076 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
5077 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
5080 write_back_motion(h, mb_type);
5081 s->current_picture.mb_type[mb_xy]= mb_type;
5082 s->current_picture.qscale_table[mb_xy]= s->qscale;
5083 h->slice_table[ mb_xy ]= h->slice_num;
5084 h->prev_mb_skipped= 1;
5088 * decodes a macroblock
5089 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5091 static int decode_mb_cavlc(H264Context *h){
5092 MpegEncContext * const s = &h->s;
5093 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5094 int partition_count;
5095 unsigned int mb_type, cbp;
5096 int dct8x8_allowed= h->pps.transform_8x8_mode;
5098 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
5100 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5101 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
5103 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
5104 if(s->mb_skip_run==-1)
5105 s->mb_skip_run= get_ue_golomb(&s->gb);
5107 if (s->mb_skip_run--) {
5108 if(FRAME_MBAFF && (s->mb_y&1) == 0){
5109 if(s->mb_skip_run==0)
5110 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5112 predict_field_decoding_flag(h);
5119 if( (s->mb_y&1) == 0 )
5120 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5122 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5124 h->prev_mb_skipped= 0;
5126 mb_type= get_ue_golomb(&s->gb);
5127 if(h->slice_type == B_TYPE){
5129 partition_count= b_mb_type_info[mb_type].partition_count;
5130 mb_type= b_mb_type_info[mb_type].type;
5133 goto decode_intra_mb;
5135 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
5137 partition_count= p_mb_type_info[mb_type].partition_count;
5138 mb_type= p_mb_type_info[mb_type].type;
5141 goto decode_intra_mb;
5144 assert(h->slice_type == I_TYPE);
5147 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
5151 cbp= i_mb_type_info[mb_type].cbp;
5152 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5153 mb_type= i_mb_type_info[mb_type].type;
5157 mb_type |= MB_TYPE_INTERLACED;
5159 h->slice_table[ mb_xy ]= h->slice_num;
5161 if(IS_INTRA_PCM(mb_type)){
5164 // we assume these blocks are very rare so we dont optimize it
5165 align_get_bits(&s->gb);
5167 // The pixels are stored in the same order as levels in h->mb array.
5168 for(y=0; y<16; y++){
5169 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5170 for(x=0; x<16; x++){
5171 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5172 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
5176 const int index= 256 + 4*(y&3) + 32*(y>>2);
5178 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5179 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5183 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5185 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5186 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5190 // In deblocking, the quantizer is 0
5191 s->current_picture.qscale_table[mb_xy]= 0;
5192 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5193 // All coeffs are present
5194 memset(h->non_zero_count[mb_xy], 16, 16);
5196 s->current_picture.mb_type[mb_xy]= mb_type;
5201 h->ref_count[0] <<= 1;
5202 h->ref_count[1] <<= 1;
5205 fill_caches(h, mb_type, 0);
5208 if(IS_INTRA(mb_type)){
5210 // init_top_left_availability(h);
5211 if(IS_INTRA4x4(mb_type)){
5214 if(dct8x8_allowed && get_bits1(&s->gb)){
5215 mb_type |= MB_TYPE_8x8DCT;
5219 // fill_intra4x4_pred_table(h);
5220 for(i=0; i<16; i+=di){
5221 int mode= pred_intra_mode(h, i);
5223 if(!get_bits1(&s->gb)){
5224 const int rem_mode= get_bits(&s->gb, 3);
5225 mode = rem_mode + (rem_mode >= mode);
5229 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5231 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
5233 write_back_intra_pred_mode(h);
5234 if( check_intra4x4_pred_mode(h) < 0)
5237 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
5238 if(h->intra16x16_pred_mode < 0)
5242 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
5245 h->chroma_pred_mode= pred_mode;
5246 }else if(partition_count==4){
5247 int i, j, sub_partition_count[4], list, ref[2][4];
5249 if(h->slice_type == B_TYPE){
5251 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5252 if(h->sub_mb_type[i] >=13){
5253 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5256 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5257 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5259 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5260 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5261 pred_direct_motion(h, &mb_type);
5262 h->ref_cache[0][scan8[4]] =
5263 h->ref_cache[1][scan8[4]] =
5264 h->ref_cache[0][scan8[12]] =
5265 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5268 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
5270 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5271 if(h->sub_mb_type[i] >=4){
5272 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5275 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5276 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5280 for(list=0; list<h->list_count; list++){
5281 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5283 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5284 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5285 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
5287 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
5299 dct8x8_allowed = get_dct8x8_allowed(h);
5301 for(list=0; list<h->list_count; list++){
5303 if(IS_DIRECT(h->sub_mb_type[i])) {
5304 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
5307 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
5308 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5310 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5311 const int sub_mb_type= h->sub_mb_type[i];
5312 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5313 for(j=0; j<sub_partition_count[i]; j++){
5315 const int index= 4*i + block_width*j;
5316 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5317 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
5318 mx += get_se_golomb(&s->gb);
5319 my += get_se_golomb(&s->gb);
5320 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5322 if(IS_SUB_8X8(sub_mb_type)){
5324 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5326 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5327 }else if(IS_SUB_8X4(sub_mb_type)){
5328 mv_cache[ 1 ][0]= mx;
5329 mv_cache[ 1 ][1]= my;
5330 }else if(IS_SUB_4X8(sub_mb_type)){
5331 mv_cache[ 8 ][0]= mx;
5332 mv_cache[ 8 ][1]= my;
5334 mv_cache[ 0 ][0]= mx;
5335 mv_cache[ 0 ][1]= my;
5338 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5344 }else if(IS_DIRECT(mb_type)){
5345 pred_direct_motion(h, &mb_type);
5346 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5348 int list, mx, my, i;
5349 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5350 if(IS_16X16(mb_type)){
5351 for(list=0; list<h->list_count; list++){
5353 if(IS_DIR(mb_type, 0, list)){
5354 val= get_te0_golomb(&s->gb, h->ref_count[list]);
5355 if(val >= h->ref_count[list]){
5356 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5360 val= LIST_NOT_USED&0xFF;
5361 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5363 for(list=0; list<h->list_count; list++){
5365 if(IS_DIR(mb_type, 0, list)){
5366 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5367 mx += get_se_golomb(&s->gb);
5368 my += get_se_golomb(&s->gb);
5369 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5371 val= pack16to32(mx,my);
5374 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
5377 else if(IS_16X8(mb_type)){
5378 for(list=0; list<h->list_count; list++){
5381 if(IS_DIR(mb_type, i, list)){
5382 val= get_te0_golomb(&s->gb, h->ref_count[list]);
5383 if(val >= h->ref_count[list]){
5384 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5388 val= LIST_NOT_USED&0xFF;
5389 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5392 for(list=0; list<h->list_count; list++){
5395 if(IS_DIR(mb_type, i, list)){
5396 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5397 mx += get_se_golomb(&s->gb);
5398 my += get_se_golomb(&s->gb);
5399 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5401 val= pack16to32(mx,my);
5404 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
5408 assert(IS_8X16(mb_type));
5409 for(list=0; list<h->list_count; list++){
5412 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5413 val= get_te0_golomb(&s->gb, h->ref_count[list]);
5414 if(val >= h->ref_count[list]){
5415 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5419 val= LIST_NOT_USED&0xFF;
5420 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5423 for(list=0; list<h->list_count; list++){
5426 if(IS_DIR(mb_type, i, list)){
5427 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5428 mx += get_se_golomb(&s->gb);
5429 my += get_se_golomb(&s->gb);
5430 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5432 val= pack16to32(mx,my);
5435 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
5441 if(IS_INTER(mb_type))
5442 write_back_motion(h, mb_type);
5444 if(!IS_INTRA16x16(mb_type)){
5445 cbp= get_ue_golomb(&s->gb);
5447 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
5451 if(IS_INTRA4x4(mb_type))
5452 cbp= golomb_to_intra4x4_cbp[cbp];
5454 cbp= golomb_to_inter_cbp[cbp];
5458 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5459 if(get_bits1(&s->gb))
5460 mb_type |= MB_TYPE_8x8DCT;
5462 s->current_picture.mb_type[mb_xy]= mb_type;
5464 if(cbp || IS_INTRA16x16(mb_type)){
5465 int i8x8, i4x4, chroma_idx;
5466 int chroma_qp, dquant;
5467 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5468 const uint8_t *scan, *scan8x8, *dc_scan;
5470 // fill_non_zero_count_cache(h);
5472 if(IS_INTERLACED(mb_type)){
5473 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5474 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5475 dc_scan= luma_dc_field_scan;
5477 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5478 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5479 dc_scan= luma_dc_zigzag_scan;
5482 dquant= get_se_golomb(&s->gb);
5484 if( dquant > 25 || dquant < -26 ){
5485 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5489 s->qscale += dquant;
5490 if(((unsigned)s->qscale) > 51){
5491 if(s->qscale<0) s->qscale+= 52;
5492 else s->qscale-= 52;
5495 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5496 if(IS_INTRA16x16(mb_type)){
5497 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5498 return -1; //FIXME continue if partitioned and other return -1 too
5501 assert((cbp&15) == 0 || (cbp&15) == 15);
5504 for(i8x8=0; i8x8<4; i8x8++){
5505 for(i4x4=0; i4x4<4; i4x4++){
5506 const int index= i4x4 + 4*i8x8;
5507 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5513 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5516 for(i8x8=0; i8x8<4; i8x8++){
5517 if(cbp & (1<<i8x8)){
5518 if(IS_8x8DCT(mb_type)){
5519 DCTELEM *buf = &h->mb[64*i8x8];
5521 for(i4x4=0; i4x4<4; i4x4++){
5522 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5523 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5526 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5527 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5529 for(i4x4=0; i4x4<4; i4x4++){
5530 const int index= i4x4 + 4*i8x8;
5532 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5538 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5539 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5545 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5546 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5552 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5553 for(i4x4=0; i4x4<4; i4x4++){
5554 const int index= 16 + 4*chroma_idx + i4x4;
5555 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5561 uint8_t * const nnz= &h->non_zero_count_cache[0];
5562 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5563 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5566 uint8_t * const nnz= &h->non_zero_count_cache[0];
5567 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5568 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5569 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5571 s->current_picture.qscale_table[mb_xy]= s->qscale;
5572 write_back_non_zero_count(h);
5575 h->ref_count[0] >>= 1;
5576 h->ref_count[1] >>= 1;
5582 static int decode_cabac_field_decoding_flag(H264Context *h) {
5583 MpegEncContext * const s = &h->s;
5584 const int mb_x = s->mb_x;
5585 const int mb_y = s->mb_y & ~1;
5586 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5587 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5589 unsigned int ctx = 0;
5591 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5594 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5598 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5601 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5602 uint8_t *state= &h->cabac_state[ctx_base];
5606 MpegEncContext * const s = &h->s;
5607 const int mba_xy = h->left_mb_xy[0];
5608 const int mbb_xy = h->top_mb_xy;
5610 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5612 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5614 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5615 return 0; /* I4x4 */
5618 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5619 return 0; /* I4x4 */
5622 if( get_cabac_terminate( &h->cabac ) )
5623 return 25; /* PCM */
5625 mb_type = 1; /* I16x16 */
5626 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5627 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5628 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5629 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5630 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5634 static int decode_cabac_mb_type( H264Context *h ) {
5635 MpegEncContext * const s = &h->s;
5637 if( h->slice_type == I_TYPE ) {
5638 return decode_cabac_intra_mb_type(h, 3, 1);
5639 } else if( h->slice_type == P_TYPE ) {
5640 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5642 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5643 /* P_L0_D16x16, P_8x8 */
5644 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5646 /* P_L0_D8x16, P_L0_D16x8 */
5647 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5650 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5652 } else if( h->slice_type == B_TYPE ) {
5653 const int mba_xy = h->left_mb_xy[0];
5654 const int mbb_xy = h->top_mb_xy;
5658 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5660 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5663 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5664 return 0; /* B_Direct_16x16 */
5666 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5667 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5670 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5671 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5672 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5673 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5675 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5676 else if( bits == 13 ) {
5677 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5678 } else if( bits == 14 )
5679 return 11; /* B_L1_L0_8x16 */
5680 else if( bits == 15 )
5681 return 22; /* B_8x8 */
5683 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5684 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5686 /* TODO SI/SP frames? */
5691 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5692 MpegEncContext * const s = &h->s;
5696 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5697 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5700 && h->slice_table[mba_xy] == h->slice_num
5701 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5702 mba_xy += s->mb_stride;
5704 mbb_xy = mb_xy - s->mb_stride;
5706 && h->slice_table[mbb_xy] == h->slice_num
5707 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5708 mbb_xy -= s->mb_stride;
5710 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5712 int mb_xy = mb_x + mb_y*s->mb_stride;
5714 mbb_xy = mb_xy - s->mb_stride;
5717 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5719 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5722 if( h->slice_type == B_TYPE )
5724 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5727 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5730 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5733 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5734 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5735 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5737 if( mode >= pred_mode )
5743 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5744 const int mba_xy = h->left_mb_xy[0];
5745 const int mbb_xy = h->top_mb_xy;
5749 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5750 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5753 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5756 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5759 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5761 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5767 static const uint8_t block_idx_x[16] = {
5768 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5770 static const uint8_t block_idx_y[16] = {
5771 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5773 static const uint8_t block_idx_xy[4][4] = {
5780 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5785 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5787 tprintf(h->s.avctx, "cbp_b = top_cbp = %x\n", cbp_b);
5790 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5795 x = block_idx_x[4*i8x8];
5796 y = block_idx_y[4*i8x8];
5800 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5801 cbp_a = h->left_cbp;
5802 tprintf(h->s.avctx, "cbp_a = left_cbp = %x\n", cbp_a);
5808 /* No need to test for skip as we put 0 for skip block */
5809 /* No need to test for IPCM as we put 1 for IPCM block */
5811 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5812 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5817 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5818 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5822 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5828 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5832 cbp_a = (h->left_cbp>>4)&0x03;
5833 cbp_b = (h-> top_cbp>>4)&0x03;
5836 if( cbp_a > 0 ) ctx++;
5837 if( cbp_b > 0 ) ctx += 2;
5838 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5842 if( cbp_a == 2 ) ctx++;
5843 if( cbp_b == 2 ) ctx += 2;
5844 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5846 static int decode_cabac_mb_dqp( H264Context *h) {
5847 MpegEncContext * const s = &h->s;
5853 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5855 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5857 if( h->last_qscale_diff != 0 )
5860 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5866 if(val > 102) //prevent infinite loop
5873 return -(val + 1)/2;
5875 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5876 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5878 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5880 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5884 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5886 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5887 return 0; /* B_Direct_8x8 */
5888 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5889 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5891 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5892 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5893 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5896 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5897 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5901 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5902 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5905 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5906 int refa = h->ref_cache[list][scan8[n] - 1];
5907 int refb = h->ref_cache[list][scan8[n] - 8];
5911 if( h->slice_type == B_TYPE) {
5912 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5914 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5923 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5929 if(ref >= 32 /*h->ref_list[list]*/){
5930 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5931 return 0; //FIXME we should return -1 and check the return everywhere
5937 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5938 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5939 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5940 int ctxbase = (l == 0) ? 40 : 47;
5945 else if( amvd > 32 )
5950 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5955 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5963 while( get_cabac_bypass( &h->cabac ) ) {
5967 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5972 if( get_cabac_bypass( &h->cabac ) )
5976 return get_cabac_bypass_sign( &h->cabac, -mvd );
5979 static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5984 nza = h->left_cbp&0x100;
5985 nzb = h-> top_cbp&0x100;
5986 } else if( cat == 1 || cat == 2 ) {
5987 nza = h->non_zero_count_cache[scan8[idx] - 1];
5988 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5989 } else if( cat == 3 ) {
5990 nza = (h->left_cbp>>(6+idx))&0x01;
5991 nzb = (h-> top_cbp>>(6+idx))&0x01;
5994 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5995 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
6004 return ctx + 4 * cat;
6007 static const __attribute((used)) uint8_t last_coeff_flag_offset_8x8[63] = {
6008 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6009 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
6010 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
6011 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
6014 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
6015 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
6016 static const int significant_coeff_flag_offset[2][6] = {
6017 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
6018 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
6020 static const int last_coeff_flag_offset[2][6] = {
6021 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
6022 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
6024 static const int coeff_abs_level_m1_offset[6] = {
6025 227+0, 227+10, 227+20, 227+30, 227+39, 426
6027 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
6028 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
6029 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
6030 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
6031 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
6032 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
6033 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
6034 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
6035 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
6041 int coeff_count = 0;
6044 int abslevelgt1 = 0;
6046 uint8_t *significant_coeff_ctx_base;
6047 uint8_t *last_coeff_ctx_base;
6048 uint8_t *abs_level_m1_ctx_base;
6051 #define CABAC_ON_STACK
6053 #ifdef CABAC_ON_STACK
6056 cc.range = h->cabac.range;
6057 cc.low = h->cabac.low;
6058 cc.bytestream= h->cabac.bytestream;
6060 #define CC &h->cabac
6064 /* cat: 0-> DC 16x16 n = 0
6065 * 1-> AC 16x16 n = luma4x4idx
6066 * 2-> Luma4x4 n = luma4x4idx
6067 * 3-> DC Chroma n = iCbCr
6068 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
6069 * 5-> Luma8x8 n = 4 * luma8x8idx
6072 /* read coded block flag */
6074 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
6075 if( cat == 1 || cat == 2 )
6076 h->non_zero_count_cache[scan8[n]] = 0;
6078 h->non_zero_count_cache[scan8[16+n]] = 0;
6079 #ifdef CABAC_ON_STACK
6080 h->cabac.range = cc.range ;
6081 h->cabac.low = cc.low ;
6082 h->cabac.bytestream= cc.bytestream;
6088 significant_coeff_ctx_base = h->cabac_state
6089 + significant_coeff_flag_offset[MB_FIELD][cat];
6090 last_coeff_ctx_base = h->cabac_state
6091 + last_coeff_flag_offset[MB_FIELD][cat];
6092 abs_level_m1_ctx_base = h->cabac_state
6093 + coeff_abs_level_m1_offset[cat];
6096 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
6097 for(last= 0; last < coefs; last++) { \
6098 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
6099 if( get_cabac( CC, sig_ctx )) { \
6100 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
6101 index[coeff_count++] = last; \
6102 if( get_cabac( CC, last_ctx ) ) { \
6108 if( last == max_coeff -1 ) {\
6109 index[coeff_count++] = last;\
6111 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
6112 #if defined(ARCH_X86) && defined(CONFIG_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
6113 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
6115 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
6117 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
6119 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
6122 assert(coeff_count > 0);
6125 h->cbp_table[mb_xy] |= 0x100;
6126 else if( cat == 1 || cat == 2 )
6127 h->non_zero_count_cache[scan8[n]] = coeff_count;
6129 h->cbp_table[mb_xy] |= 0x40 << n;
6131 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
6134 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
6137 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
6138 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
6139 int j= scantable[index[coeff_count]];
6141 if( get_cabac( CC, ctx ) == 0 ) {
6143 block[j] = get_cabac_bypass_sign( CC, -1);
6145 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
6151 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
6152 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
6156 if( coeff_abs >= 15 ) {
6158 while( get_cabac_bypass( CC ) ) {
6164 coeff_abs += coeff_abs + get_cabac_bypass( CC );
6170 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
6171 else block[j] = coeff_abs;
6173 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
6174 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
6180 #ifdef CABAC_ON_STACK
6181 h->cabac.range = cc.range ;
6182 h->cabac.low = cc.low ;
6183 h->cabac.bytestream= cc.bytestream;
6188 static inline void compute_mb_neighbors(H264Context *h)
6190 MpegEncContext * const s = &h->s;
6191 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
6192 h->top_mb_xy = mb_xy - s->mb_stride;
6193 h->left_mb_xy[0] = mb_xy - 1;
6195 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
6196 const int top_pair_xy = pair_xy - s->mb_stride;
6197 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
6198 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
6199 const int curr_mb_frame_flag = !MB_FIELD;
6200 const int bottom = (s->mb_y & 1);
6202 ? !curr_mb_frame_flag // bottom macroblock
6203 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
6205 h->top_mb_xy -= s->mb_stride;
6207 if (left_mb_frame_flag != curr_mb_frame_flag) {
6208 h->left_mb_xy[0] = pair_xy - 1;
6215 * decodes a macroblock
6216 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
6218 static int decode_mb_cabac(H264Context *h) {
6219 MpegEncContext * const s = &h->s;
6220 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
6221 int mb_type, partition_count, cbp = 0;
6222 int dct8x8_allowed= h->pps.transform_8x8_mode;
6224 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
6226 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
6227 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
6229 /* a skipped mb needs the aff flag from the following mb */
6230 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
6231 predict_field_decoding_flag(h);
6232 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
6233 skip = h->next_mb_skipped;
6235 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
6236 /* read skip flags */
6238 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
6239 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
6240 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
6241 if(h->next_mb_skipped)
6242 predict_field_decoding_flag(h);
6244 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6249 h->cbp_table[mb_xy] = 0;
6250 h->chroma_pred_mode_table[mb_xy] = 0;
6251 h->last_qscale_diff = 0;
6258 if( (s->mb_y&1) == 0 )
6260 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6262 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
6264 h->prev_mb_skipped = 0;
6266 compute_mb_neighbors(h);
6267 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
6268 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
6272 if( h->slice_type == B_TYPE ) {
6274 partition_count= b_mb_type_info[mb_type].partition_count;
6275 mb_type= b_mb_type_info[mb_type].type;
6278 goto decode_intra_mb;
6280 } else if( h->slice_type == P_TYPE ) {
6282 partition_count= p_mb_type_info[mb_type].partition_count;
6283 mb_type= p_mb_type_info[mb_type].type;
6286 goto decode_intra_mb;
6289 assert(h->slice_type == I_TYPE);
6291 partition_count = 0;
6292 cbp= i_mb_type_info[mb_type].cbp;
6293 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
6294 mb_type= i_mb_type_info[mb_type].type;
6297 mb_type |= MB_TYPE_INTERLACED;
6299 h->slice_table[ mb_xy ]= h->slice_num;
6301 if(IS_INTRA_PCM(mb_type)) {
6305 // We assume these blocks are very rare so we dont optimize it.
6306 // FIXME The two following lines get the bitstream position in the cabac
6307 // decode, I think it should be done by a function in cabac.h (or cabac.c).
6308 ptr= h->cabac.bytestream;
6309 if(h->cabac.low&0x1) ptr--;
6311 if(h->cabac.low&0x1FF) ptr--;
6314 // The pixels are stored in the same order as levels in h->mb array.
6315 for(y=0; y<16; y++){
6316 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
6317 for(x=0; x<16; x++){
6318 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
6319 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
6323 const int index= 256 + 4*(y&3) + 32*(y>>2);
6325 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
6326 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6330 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
6332 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
6333 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6337 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6339 // All blocks are present
6340 h->cbp_table[mb_xy] = 0x1ef;
6341 h->chroma_pred_mode_table[mb_xy] = 0;
6342 // In deblocking, the quantizer is 0
6343 s->current_picture.qscale_table[mb_xy]= 0;
6344 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
6345 // All coeffs are present
6346 memset(h->non_zero_count[mb_xy], 16, 16);
6347 s->current_picture.mb_type[mb_xy]= mb_type;
6352 h->ref_count[0] <<= 1;
6353 h->ref_count[1] <<= 1;
6356 fill_caches(h, mb_type, 0);
6358 if( IS_INTRA( mb_type ) ) {
6360 if( IS_INTRA4x4( mb_type ) ) {
6361 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6362 mb_type |= MB_TYPE_8x8DCT;
6363 for( i = 0; i < 16; i+=4 ) {
6364 int pred = pred_intra_mode( h, i );
6365 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6366 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6369 for( i = 0; i < 16; i++ ) {
6370 int pred = pred_intra_mode( h, i );
6371 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6373 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6376 write_back_intra_pred_mode(h);
6377 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6379 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6380 if( h->intra16x16_pred_mode < 0 ) return -1;
6382 h->chroma_pred_mode_table[mb_xy] =
6383 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6385 pred_mode= check_intra_pred_mode( h, pred_mode );
6386 if( pred_mode < 0 ) return -1;
6387 h->chroma_pred_mode= pred_mode;
6388 } else if( partition_count == 4 ) {
6389 int i, j, sub_partition_count[4], list, ref[2][4];
6391 if( h->slice_type == B_TYPE ) {
6392 for( i = 0; i < 4; i++ ) {
6393 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6394 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6395 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6397 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
6398 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
6399 pred_direct_motion(h, &mb_type);
6400 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6401 for( i = 0; i < 4; i++ )
6402 if( IS_DIRECT(h->sub_mb_type[i]) )
6403 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6407 for( i = 0; i < 4; i++ ) {
6408 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6409 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6410 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6414 for( list = 0; list < h->list_count; list++ ) {
6415 for( i = 0; i < 4; i++ ) {
6416 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6417 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6418 if( h->ref_count[list] > 1 )
6419 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6425 h->ref_cache[list][ scan8[4*i]+1 ]=
6426 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6431 dct8x8_allowed = get_dct8x8_allowed(h);
6433 for(list=0; list<h->list_count; list++){
6435 if(IS_DIRECT(h->sub_mb_type[i])){
6436 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6439 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6441 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6442 const int sub_mb_type= h->sub_mb_type[i];
6443 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6444 for(j=0; j<sub_partition_count[i]; j++){
6447 const int index= 4*i + block_width*j;
6448 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6449 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6450 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6452 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6453 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6454 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6456 if(IS_SUB_8X8(sub_mb_type)){
6458 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6460 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6463 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6465 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6466 }else if(IS_SUB_8X4(sub_mb_type)){
6467 mv_cache[ 1 ][0]= mx;
6468 mv_cache[ 1 ][1]= my;
6470 mvd_cache[ 1 ][0]= mx - mpx;
6471 mvd_cache[ 1 ][1]= my - mpy;
6472 }else if(IS_SUB_4X8(sub_mb_type)){
6473 mv_cache[ 8 ][0]= mx;
6474 mv_cache[ 8 ][1]= my;
6476 mvd_cache[ 8 ][0]= mx - mpx;
6477 mvd_cache[ 8 ][1]= my - mpy;
6479 mv_cache[ 0 ][0]= mx;
6480 mv_cache[ 0 ][1]= my;
6482 mvd_cache[ 0 ][0]= mx - mpx;
6483 mvd_cache[ 0 ][1]= my - mpy;
6486 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6487 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6488 p[0] = p[1] = p[8] = p[9] = 0;
6489 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6493 } else if( IS_DIRECT(mb_type) ) {
6494 pred_direct_motion(h, &mb_type);
6495 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6496 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6497 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6499 int list, mx, my, i, mpx, mpy;
6500 if(IS_16X16(mb_type)){
6501 for(list=0; list<h->list_count; list++){
6502 if(IS_DIR(mb_type, 0, list)){
6503 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6504 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6506 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
6508 for(list=0; list<h->list_count; list++){
6509 if(IS_DIR(mb_type, 0, list)){
6510 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6512 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6513 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6514 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6516 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6517 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6519 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6522 else if(IS_16X8(mb_type)){
6523 for(list=0; list<h->list_count; list++){
6525 if(IS_DIR(mb_type, i, list)){
6526 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6527 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6529 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6532 for(list=0; list<h->list_count; list++){
6534 if(IS_DIR(mb_type, i, list)){
6535 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6536 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6537 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6538 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6540 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6541 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6543 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6544 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6549 assert(IS_8X16(mb_type));
6550 for(list=0; list<h->list_count; list++){
6552 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6553 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6554 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6556 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6559 for(list=0; list<h->list_count; list++){
6561 if(IS_DIR(mb_type, i, list)){
6562 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6563 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6564 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6566 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
6567 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6568 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6570 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6571 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6578 if( IS_INTER( mb_type ) ) {
6579 h->chroma_pred_mode_table[mb_xy] = 0;
6580 write_back_motion( h, mb_type );
6583 if( !IS_INTRA16x16( mb_type ) ) {
6584 cbp = decode_cabac_mb_cbp_luma( h );
6585 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6588 h->cbp_table[mb_xy] = h->cbp = cbp;
6590 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6591 if( decode_cabac_mb_transform_size( h ) )
6592 mb_type |= MB_TYPE_8x8DCT;
6594 s->current_picture.mb_type[mb_xy]= mb_type;
6596 if( cbp || IS_INTRA16x16( mb_type ) ) {
6597 const uint8_t *scan, *scan8x8, *dc_scan;
6600 if(IS_INTERLACED(mb_type)){
6601 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6602 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6603 dc_scan= luma_dc_field_scan;
6605 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6606 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6607 dc_scan= luma_dc_zigzag_scan;
6610 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6611 if( dqp == INT_MIN ){
6612 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6616 if(((unsigned)s->qscale) > 51){
6617 if(s->qscale<0) s->qscale+= 52;
6618 else s->qscale-= 52;
6620 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6622 if( IS_INTRA16x16( mb_type ) ) {
6624 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6625 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6628 for( i = 0; i < 16; i++ ) {
6629 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6630 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6634 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6638 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6639 if( cbp & (1<<i8x8) ) {
6640 if( IS_8x8DCT(mb_type) ) {
6641 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6642 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6645 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6646 const int index = 4*i8x8 + i4x4;
6647 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6649 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6651 //STOP_TIMER("decode_residual")
6654 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6655 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6662 for( c = 0; c < 2; c++ ) {
6663 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6664 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6671 for( c = 0; c < 2; c++ ) {
6672 for( i = 0; i < 4; i++ ) {
6673 const int index = 16 + 4 * c + i;
6674 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6675 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6680 uint8_t * const nnz= &h->non_zero_count_cache[0];
6681 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6682 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6685 uint8_t * const nnz= &h->non_zero_count_cache[0];
6686 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6687 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6688 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6689 h->last_qscale_diff = 0;
6692 s->current_picture.qscale_table[mb_xy]= s->qscale;
6693 write_back_non_zero_count(h);
6696 h->ref_count[0] >>= 1;
6697 h->ref_count[1] >>= 1;
6704 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6706 const int index_a = qp + h->slice_alpha_c0_offset;
6707 const int alpha = (alpha_table+52)[index_a];
6708 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6713 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6714 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6716 /* 16px edge length, because bS=4 is triggered by being at
6717 * the edge of an intra MB, so all 4 bS are the same */
6718 for( d = 0; d < 16; d++ ) {
6719 const int p0 = pix[-1];
6720 const int p1 = pix[-2];
6721 const int p2 = pix[-3];
6723 const int q0 = pix[0];
6724 const int q1 = pix[1];
6725 const int q2 = pix[2];
6727 if( FFABS( p0 - q0 ) < alpha &&
6728 FFABS( p1 - p0 ) < beta &&
6729 FFABS( q1 - q0 ) < beta ) {
6731 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6732 if( FFABS( p2 - p0 ) < beta)
6734 const int p3 = pix[-4];
6736 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6737 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6738 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6741 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6743 if( FFABS( q2 - q0 ) < beta)
6745 const int q3 = pix[3];
6747 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6748 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6749 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6752 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6756 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6757 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6759 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6765 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6767 const int index_a = qp + h->slice_alpha_c0_offset;
6768 const int alpha = (alpha_table+52)[index_a];
6769 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6774 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6775 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6777 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6781 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6783 for( i = 0; i < 16; i++, pix += stride) {
6789 int bS_index = (i >> 1);
6792 bS_index |= (i & 1);
6795 if( bS[bS_index] == 0 ) {
6799 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6800 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6801 alpha = (alpha_table+52)[index_a];
6802 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6804 if( bS[bS_index] < 4 ) {
6805 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6806 const int p0 = pix[-1];
6807 const int p1 = pix[-2];
6808 const int p2 = pix[-3];
6809 const int q0 = pix[0];
6810 const int q1 = pix[1];
6811 const int q2 = pix[2];
6813 if( FFABS( p0 - q0 ) < alpha &&
6814 FFABS( p1 - p0 ) < beta &&
6815 FFABS( q1 - q0 ) < beta ) {
6819 if( FFABS( p2 - p0 ) < beta ) {
6820 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6823 if( FFABS( q2 - q0 ) < beta ) {
6824 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6828 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6829 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6830 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6831 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6834 const int p0 = pix[-1];
6835 const int p1 = pix[-2];
6836 const int p2 = pix[-3];
6838 const int q0 = pix[0];
6839 const int q1 = pix[1];
6840 const int q2 = pix[2];
6842 if( FFABS( p0 - q0 ) < alpha &&
6843 FFABS( p1 - p0 ) < beta &&
6844 FFABS( q1 - q0 ) < beta ) {
6846 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6847 if( FFABS( p2 - p0 ) < beta)
6849 const int p3 = pix[-4];
6851 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6852 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6853 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6856 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6858 if( FFABS( q2 - q0 ) < beta)
6860 const int q3 = pix[3];
6862 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6863 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6864 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6867 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6871 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6872 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6874 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6879 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6881 for( i = 0; i < 8; i++, pix += stride) {
6889 if( bS[bS_index] == 0 ) {
6893 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6894 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6895 alpha = (alpha_table+52)[index_a];
6896 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6898 if( bS[bS_index] < 4 ) {
6899 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6900 const int p0 = pix[-1];
6901 const int p1 = pix[-2];
6902 const int q0 = pix[0];
6903 const int q1 = pix[1];
6905 if( FFABS( p0 - q0 ) < alpha &&
6906 FFABS( p1 - p0 ) < beta &&
6907 FFABS( q1 - q0 ) < beta ) {
6908 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6910 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6911 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6912 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6915 const int p0 = pix[-1];
6916 const int p1 = pix[-2];
6917 const int q0 = pix[0];
6918 const int q1 = pix[1];
6920 if( FFABS( p0 - q0 ) < alpha &&
6921 FFABS( p1 - p0 ) < beta &&
6922 FFABS( q1 - q0 ) < beta ) {
6924 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6925 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6926 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6932 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6934 const int index_a = qp + h->slice_alpha_c0_offset;
6935 const int alpha = (alpha_table+52)[index_a];
6936 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6937 const int pix_next = stride;
6942 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6943 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6945 /* 16px edge length, see filter_mb_edgev */
6946 for( d = 0; d < 16; d++ ) {
6947 const int p0 = pix[-1*pix_next];
6948 const int p1 = pix[-2*pix_next];
6949 const int p2 = pix[-3*pix_next];
6950 const int q0 = pix[0];
6951 const int q1 = pix[1*pix_next];
6952 const int q2 = pix[2*pix_next];
6954 if( FFABS( p0 - q0 ) < alpha &&
6955 FFABS( p1 - p0 ) < beta &&
6956 FFABS( q1 - q0 ) < beta ) {
6958 const int p3 = pix[-4*pix_next];
6959 const int q3 = pix[ 3*pix_next];
6961 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6962 if( FFABS( p2 - p0 ) < beta) {
6964 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6965 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6966 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6969 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6971 if( FFABS( q2 - q0 ) < beta) {
6973 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6974 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6975 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6978 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6982 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6983 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6985 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6992 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6994 const int index_a = qp + h->slice_alpha_c0_offset;
6995 const int alpha = (alpha_table+52)[index_a];
6996 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
7001 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
7002 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
7004 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
7008 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7009 MpegEncContext * const s = &h->s;
7011 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
7013 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) {
7014 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
7017 assert(!FRAME_MBAFF);
7019 mb_xy = mb_x + mb_y*s->mb_stride;
7020 mb_type = s->current_picture.mb_type[mb_xy];
7021 qp = s->current_picture.qscale_table[mb_xy];
7022 qp0 = s->current_picture.qscale_table[mb_xy-1];
7023 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
7024 qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
7025 qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
7026 qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
7027 qp0 = (qp + qp0 + 1) >> 1;
7028 qp1 = (qp + qp1 + 1) >> 1;
7029 qpc0 = (qpc + qpc0 + 1) >> 1;
7030 qpc1 = (qpc + qpc1 + 1) >> 1;
7031 qp_thresh = 15 - h->slice_alpha_c0_offset;
7032 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
7033 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
7036 if( IS_INTRA(mb_type) ) {
7037 int16_t bS4[4] = {4,4,4,4};
7038 int16_t bS3[4] = {3,3,3,3};
7039 if( IS_8x8DCT(mb_type) ) {
7040 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7041 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7042 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7043 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7045 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7046 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
7047 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7048 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
7049 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7050 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
7051 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7052 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
7054 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
7055 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
7056 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
7057 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
7058 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7059 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
7060 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7061 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
7064 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
7065 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
7067 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
7069 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
7071 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
7072 (mb_type & MB_TYPE_16x8) ? 1 : 0;
7073 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
7074 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
7076 int step = IS_8x8DCT(mb_type) ? 2 : 1;
7077 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
7078 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
7079 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
7081 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
7082 bSv[0][0] = 0x0004000400040004ULL;
7083 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
7084 bSv[1][0] = 0x0004000400040004ULL;
7086 #define FILTER(hv,dir,edge)\
7087 if(bSv[dir][edge]) {\
7088 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
7090 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7091 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7097 } else if( IS_8x8DCT(mb_type) ) {
7116 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7117 MpegEncContext * const s = &h->s;
7118 const int mb_xy= mb_x + mb_y*s->mb_stride;
7119 const int mb_type = s->current_picture.mb_type[mb_xy];
7120 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
7121 int first_vertical_edge_done = 0;
7123 /* FIXME: A given frame may occupy more than one position in
7124 * the reference list. So ref2frm should be populated with
7125 * frame numbers, not indices. */
7126 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
7127 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7129 //for sufficiently low qp, filtering wouldn't do anything
7130 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
7132 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
7133 int qp = s->current_picture.qscale_table[mb_xy];
7135 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
7136 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
7142 // left mb is in picture
7143 && h->slice_table[mb_xy-1] != 255
7144 // and current and left pair do not have the same interlaced type
7145 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
7146 // and left mb is in the same slice if deblocking_filter == 2
7147 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
7148 /* First vertical edge is different in MBAFF frames
7149 * There are 8 different bS to compute and 2 different Qp
7151 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
7152 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
7156 int mb_qp, mbn0_qp, mbn1_qp;
7158 first_vertical_edge_done = 1;
7160 if( IS_INTRA(mb_type) )
7161 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
7163 for( i = 0; i < 8; i++ ) {
7164 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
7166 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
7168 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
7169 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
7170 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
7177 mb_qp = s->current_picture.qscale_table[mb_xy];
7178 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
7179 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
7180 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
7181 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7182 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
7183 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
7184 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7185 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
7188 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
7189 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
7190 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
7191 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
7192 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
7194 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
7195 for( dir = 0; dir < 2; dir++ )
7198 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
7199 const int mbm_type = s->current_picture.mb_type[mbm_xy];
7200 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
7202 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
7203 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
7204 // how often to recheck mv-based bS when iterating between edges
7205 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
7206 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
7207 // how often to recheck mv-based bS when iterating along each edge
7208 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
7210 if (first_vertical_edge_done) {
7212 first_vertical_edge_done = 0;
7215 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
7218 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
7219 && !IS_INTERLACED(mb_type)
7220 && IS_INTERLACED(mbm_type)
7222 // This is a special case in the norm where the filtering must
7223 // be done twice (one each of the field) even if we are in a
7224 // frame macroblock.
7226 static const int nnz_idx[4] = {4,5,6,3};
7227 unsigned int tmp_linesize = 2 * linesize;
7228 unsigned int tmp_uvlinesize = 2 * uvlinesize;
7229 int mbn_xy = mb_xy - 2 * s->mb_stride;
7234 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
7235 if( IS_INTRA(mb_type) ||
7236 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
7237 bS[0] = bS[1] = bS[2] = bS[3] = 3;
7239 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
7240 for( i = 0; i < 4; i++ ) {
7241 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
7242 mbn_nnz[nnz_idx[i]] != 0 )
7248 // Do not use s->qscale as luma quantizer because it has not the same
7249 // value in IPCM macroblocks.
7250 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7251 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
7252 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
7253 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
7254 chroma_qp = ( h->chroma_qp +
7255 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7256 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7257 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7264 for( edge = start; edge < edges; edge++ ) {
7265 /* mbn_xy: neighbor macroblock */
7266 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
7267 const int mbn_type = s->current_picture.mb_type[mbn_xy];
7271 if( (edge&1) && IS_8x8DCT(mb_type) )
7274 if( IS_INTRA(mb_type) ||
7275 IS_INTRA(mbn_type) ) {
7278 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
7279 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
7288 bS[0] = bS[1] = bS[2] = bS[3] = value;
7293 if( edge & mask_edge ) {
7294 bS[0] = bS[1] = bS[2] = bS[3] = 0;
7297 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
7298 bS[0] = bS[1] = bS[2] = bS[3] = 1;
7301 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
7302 int b_idx= 8 + 4 + edge * (dir ? 8:1);
7303 int bn_idx= b_idx - (dir ? 8:1);
7305 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
7306 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7307 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7308 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
7310 bS[0] = bS[1] = bS[2] = bS[3] = v;
7316 for( i = 0; i < 4; i++ ) {
7317 int x = dir == 0 ? edge : i;
7318 int y = dir == 0 ? i : edge;
7319 int b_idx= 8 + 4 + x + 8*y;
7320 int bn_idx= b_idx - (dir ? 8:1);
7322 if( h->non_zero_count_cache[b_idx] != 0 ||
7323 h->non_zero_count_cache[bn_idx] != 0 ) {
7329 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
7330 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7331 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7332 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7340 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7345 // Do not use s->qscale as luma quantizer because it has not the same
7346 // value in IPCM macroblocks.
7347 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7348 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7349 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7350 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
7352 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7353 if( (edge&1) == 0 ) {
7354 int chroma_qp = ( h->chroma_qp +
7355 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7356 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
7357 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
7360 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7361 if( (edge&1) == 0 ) {
7362 int chroma_qp = ( h->chroma_qp +
7363 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7364 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7365 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7372 static int decode_slice(H264Context *h){
7373 MpegEncContext * const s = &h->s;
7374 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7378 if( h->pps.cabac ) {
7382 align_get_bits( &s->gb );
7385 ff_init_cabac_states( &h->cabac);
7386 ff_init_cabac_decoder( &h->cabac,
7387 s->gb.buffer + get_bits_count(&s->gb)/8,
7388 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7389 /* calculate pre-state */
7390 for( i= 0; i < 460; i++ ) {
7392 if( h->slice_type == I_TYPE )
7393 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7395 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7398 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
7400 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7405 int ret = decode_mb_cabac(h);
7407 //STOP_TIMER("decode_mb_cabac")
7409 if(ret>=0) hl_decode_mb(h);
7411 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7414 if(ret>=0) ret = decode_mb_cabac(h);
7416 if(ret>=0) hl_decode_mb(h);
7419 eos = get_cabac_terminate( &h->cabac );
7421 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
7422 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7423 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7427 if( ++s->mb_x >= s->mb_width ) {
7429 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7436 if( eos || s->mb_y >= s->mb_height ) {
7437 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7438 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7445 int ret = decode_mb_cavlc(h);
7447 if(ret>=0) hl_decode_mb(h);
7449 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7451 ret = decode_mb_cavlc(h);
7453 if(ret>=0) hl_decode_mb(h);
7458 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7459 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7464 if(++s->mb_x >= s->mb_width){
7466 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7471 if(s->mb_y >= s->mb_height){
7472 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7474 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7475 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7479 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7486 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7487 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7488 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7489 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7493 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7502 for(;s->mb_y < s->mb_height; s->mb_y++){
7503 for(;s->mb_x < s->mb_width; s->mb_x++){
7504 int ret= decode_mb(h);
7509 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7510 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7515 if(++s->mb_x >= s->mb_width){
7517 if(++s->mb_y >= s->mb_height){
7518 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7519 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7523 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7530 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7531 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7532 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7536 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7543 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7546 return -1; //not reached
7549 static int decode_unregistered_user_data(H264Context *h, int size){
7550 MpegEncContext * const s = &h->s;
7551 uint8_t user_data[16+256];
7557 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7558 user_data[i]= get_bits(&s->gb, 8);
7562 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7563 if(e==1 && build>=0)
7564 h->x264_build= build;
7566 if(s->avctx->debug & FF_DEBUG_BUGS)
7567 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7570 skip_bits(&s->gb, 8);
7575 static int decode_sei(H264Context *h){
7576 MpegEncContext * const s = &h->s;
7578 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7583 type+= show_bits(&s->gb, 8);
7584 }while(get_bits(&s->gb, 8) == 255);
7588 size+= show_bits(&s->gb, 8);
7589 }while(get_bits(&s->gb, 8) == 255);
7593 if(decode_unregistered_user_data(h, size) < 0)
7597 skip_bits(&s->gb, 8*size);
7600 //FIXME check bits here
7601 align_get_bits(&s->gb);
7607 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7608 MpegEncContext * const s = &h->s;
7610 cpb_count = get_ue_golomb(&s->gb) + 1;
7611 get_bits(&s->gb, 4); /* bit_rate_scale */
7612 get_bits(&s->gb, 4); /* cpb_size_scale */
7613 for(i=0; i<cpb_count; i++){
7614 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7615 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7616 get_bits1(&s->gb); /* cbr_flag */
7618 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7619 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7620 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7621 get_bits(&s->gb, 5); /* time_offset_length */
7624 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7625 MpegEncContext * const s = &h->s;
7626 int aspect_ratio_info_present_flag;
7627 unsigned int aspect_ratio_idc;
7628 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7630 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7632 if( aspect_ratio_info_present_flag ) {
7633 aspect_ratio_idc= get_bits(&s->gb, 8);
7634 if( aspect_ratio_idc == EXTENDED_SAR ) {
7635 sps->sar.num= get_bits(&s->gb, 16);
7636 sps->sar.den= get_bits(&s->gb, 16);
7637 }else if(aspect_ratio_idc < 14){
7638 sps->sar= pixel_aspect[aspect_ratio_idc];
7640 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7647 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7649 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7650 get_bits1(&s->gb); /* overscan_appropriate_flag */
7653 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7654 get_bits(&s->gb, 3); /* video_format */
7655 get_bits1(&s->gb); /* video_full_range_flag */
7656 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7657 get_bits(&s->gb, 8); /* colour_primaries */
7658 get_bits(&s->gb, 8); /* transfer_characteristics */
7659 get_bits(&s->gb, 8); /* matrix_coefficients */
7663 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7664 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7665 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7668 sps->timing_info_present_flag = get_bits1(&s->gb);
7669 if(sps->timing_info_present_flag){
7670 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7671 sps->time_scale = get_bits_long(&s->gb, 32);
7672 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7675 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7676 if(nal_hrd_parameters_present_flag)
7677 decode_hrd_parameters(h, sps);
7678 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7679 if(vcl_hrd_parameters_present_flag)
7680 decode_hrd_parameters(h, sps);
7681 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7682 get_bits1(&s->gb); /* low_delay_hrd_flag */
7683 get_bits1(&s->gb); /* pic_struct_present_flag */
7685 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7686 if(sps->bitstream_restriction_flag){
7687 unsigned int num_reorder_frames;
7688 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7689 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7690 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7691 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7692 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7693 num_reorder_frames= get_ue_golomb(&s->gb);
7694 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7696 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7697 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7701 sps->num_reorder_frames= num_reorder_frames;
7707 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7708 const uint8_t *jvt_list, const uint8_t *fallback_list){
7709 MpegEncContext * const s = &h->s;
7710 int i, last = 8, next = 8;
7711 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7712 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7713 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7715 for(i=0;i<size;i++){
7717 next = (last + get_se_golomb(&s->gb)) & 0xff;
7718 if(!i && !next){ /* matrix not written, we use the preset one */
7719 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7722 last = factors[scan[i]] = next ? next : last;
7726 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7727 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7728 MpegEncContext * const s = &h->s;
7729 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7730 const uint8_t *fallback[4] = {
7731 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7732 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7733 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7734 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7736 if(get_bits1(&s->gb)){
7737 sps->scaling_matrix_present |= is_sps;
7738 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7739 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7740 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7741 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7742 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7743 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7744 if(is_sps || pps->transform_8x8_mode){
7745 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7746 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7748 } else if(fallback_sps) {
7749 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7750 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7754 static inline int decode_seq_parameter_set(H264Context *h){
7755 MpegEncContext * const s = &h->s;
7756 int profile_idc, level_idc;
7757 unsigned int sps_id, tmp, mb_width, mb_height;
7761 profile_idc= get_bits(&s->gb, 8);
7762 get_bits1(&s->gb); //constraint_set0_flag
7763 get_bits1(&s->gb); //constraint_set1_flag
7764 get_bits1(&s->gb); //constraint_set2_flag
7765 get_bits1(&s->gb); //constraint_set3_flag
7766 get_bits(&s->gb, 4); // reserved
7767 level_idc= get_bits(&s->gb, 8);
7768 sps_id= get_ue_golomb(&s->gb);
7770 if (sps_id >= MAX_SPS_COUNT){
7771 // ok it has gone out of hand, someone is sending us bad stuff.
7772 av_log(h->s.avctx, AV_LOG_ERROR, "illegal sps_id (%d)\n", sps_id);
7776 sps= &h->sps_buffer[ sps_id ];
7777 sps->profile_idc= profile_idc;
7778 sps->level_idc= level_idc;
7780 if(sps->profile_idc >= 100){ //high profile
7781 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7782 get_bits1(&s->gb); //residual_color_transform_flag
7783 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7784 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7785 sps->transform_bypass = get_bits1(&s->gb);
7786 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7788 sps->scaling_matrix_present = 0;
7790 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7791 sps->poc_type= get_ue_golomb(&s->gb);
7793 if(sps->poc_type == 0){ //FIXME #define
7794 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7795 } else if(sps->poc_type == 1){//FIXME #define
7796 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7797 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7798 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7799 tmp= get_ue_golomb(&s->gb);
7801 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7802 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7805 sps->poc_cycle_length= tmp;
7807 for(i=0; i<sps->poc_cycle_length; i++)
7808 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7809 }else if(sps->poc_type != 2){
7810 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7814 tmp= get_ue_golomb(&s->gb);
7815 if(tmp > MAX_PICTURE_COUNT-2){
7816 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7818 sps->ref_frame_count= tmp;
7819 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7820 mb_width= get_ue_golomb(&s->gb) + 1;
7821 mb_height= get_ue_golomb(&s->gb) + 1;
7822 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7823 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7824 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7827 sps->mb_width = mb_width;
7828 sps->mb_height= mb_height;
7830 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7831 if(!sps->frame_mbs_only_flag)
7832 sps->mb_aff= get_bits1(&s->gb);
7836 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7838 #ifndef ALLOW_INTERLACE
7840 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7842 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7843 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7845 sps->crop= get_bits1(&s->gb);
7847 sps->crop_left = get_ue_golomb(&s->gb);
7848 sps->crop_right = get_ue_golomb(&s->gb);
7849 sps->crop_top = get_ue_golomb(&s->gb);
7850 sps->crop_bottom= get_ue_golomb(&s->gb);
7851 if(sps->crop_left || sps->crop_top){
7852 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7858 sps->crop_bottom= 0;
7861 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7862 if( sps->vui_parameters_present_flag )
7863 decode_vui_parameters(h, sps);
7865 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7866 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7867 sps_id, sps->profile_idc, sps->level_idc,
7869 sps->ref_frame_count,
7870 sps->mb_width, sps->mb_height,
7871 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7872 sps->direct_8x8_inference_flag ? "8B8" : "",
7873 sps->crop_left, sps->crop_right,
7874 sps->crop_top, sps->crop_bottom,
7875 sps->vui_parameters_present_flag ? "VUI" : ""
7881 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7882 MpegEncContext * const s = &h->s;
7883 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7886 if(pps_id>=MAX_PPS_COUNT){
7887 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
7890 pps = &h->pps_buffer[pps_id];
7892 tmp= get_ue_golomb(&s->gb);
7893 if(tmp>=MAX_SPS_COUNT){
7894 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7899 pps->cabac= get_bits1(&s->gb);
7900 pps->pic_order_present= get_bits1(&s->gb);
7901 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7902 if(pps->slice_group_count > 1 ){
7903 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7904 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7905 switch(pps->mb_slice_group_map_type){
7908 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7909 | run_length[ i ] |1 |ue(v) |
7914 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7916 | top_left_mb[ i ] |1 |ue(v) |
7917 | bottom_right_mb[ i ] |1 |ue(v) |
7925 | slice_group_change_direction_flag |1 |u(1) |
7926 | slice_group_change_rate_minus1 |1 |ue(v) |
7931 | slice_group_id_cnt_minus1 |1 |ue(v) |
7932 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7934 | slice_group_id[ i ] |1 |u(v) |
7939 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7940 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7941 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7942 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7943 pps->ref_count[0]= pps->ref_count[1]= 1;
7947 pps->weighted_pred= get_bits1(&s->gb);
7948 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7949 pps->init_qp= get_se_golomb(&s->gb) + 26;
7950 pps->init_qs= get_se_golomb(&s->gb) + 26;
7951 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7952 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7953 pps->constrained_intra_pred= get_bits1(&s->gb);
7954 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7956 pps->transform_8x8_mode= 0;
7957 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7958 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7959 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7961 if(get_bits_count(&s->gb) < bit_length){
7962 pps->transform_8x8_mode= get_bits1(&s->gb);
7963 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7964 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7967 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7968 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7969 pps_id, pps->sps_id,
7970 pps->cabac ? "CABAC" : "CAVLC",
7971 pps->slice_group_count,
7972 pps->ref_count[0], pps->ref_count[1],
7973 pps->weighted_pred ? "weighted" : "",
7974 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7975 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7976 pps->constrained_intra_pred ? "CONSTR" : "",
7977 pps->redundant_pic_cnt_present ? "REDU" : "",
7978 pps->transform_8x8_mode ? "8x8DCT" : ""
7986 * finds the end of the current frame in the bitstream.
7987 * @return the position of the first byte of the next frame, or -1
7989 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7992 ParseContext *pc = &(h->s.parse_context);
7993 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7994 // mb_addr= pc->mb_addr - 1;
7999 for(i=0; i<buf_size; i++){
8001 for(; i<buf_size; i++){
8008 if(buf[i]==1) state^= 5; //2->7, 1->4, 0->5
8009 else if(buf[i]) state = 7;
8010 else state>>=1; //2->1, 1->0, 0->0
8012 int v= buf[i] & 0x1F;
8013 if(v==7 || v==8 || v==9){
8014 if(pc->frame_start_found){
8018 pc->frame_start_found= 0;
8021 }else if(v==1 || v==2 || v==5){
8022 if(pc->frame_start_found){
8026 pc->frame_start_found = 1;
8036 return END_NOT_FOUND;
8039 #ifdef CONFIG_H264_PARSER
8040 static int h264_parse(AVCodecParserContext *s,
8041 AVCodecContext *avctx,
8042 uint8_t **poutbuf, int *poutbuf_size,
8043 const uint8_t *buf, int buf_size)
8045 H264Context *h = s->priv_data;
8046 ParseContext *pc = &h->s.parse_context;
8049 if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){
8052 next= find_frame_end(h, buf, buf_size);
8054 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
8060 if(next<0 && next != END_NOT_FOUND){
8061 assert(pc->last_index + next >= 0 );
8062 find_frame_end(h, &pc->buffer[pc->last_index + next], -next); //update state
8066 *poutbuf = (uint8_t *)buf;
8067 *poutbuf_size = buf_size;
8071 static int h264_split(AVCodecContext *avctx,
8072 const uint8_t *buf, int buf_size)
8075 uint32_t state = -1;
8078 for(i=0; i<=buf_size; i++){
8079 if((state&0xFFFFFF1F) == 0x107)
8081 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
8083 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
8085 while(i>4 && buf[i-5]==0) i--;
8090 state= (state<<8) | buf[i];
8094 #endif /* CONFIG_H264_PARSER */
8096 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
8097 MpegEncContext * const s = &h->s;
8098 AVCodecContext * const avctx= s->avctx;
8102 for(i=0; i<50; i++){
8103 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
8106 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
8108 s->current_picture_ptr= NULL;
8119 if(buf_index >= buf_size) break;
8121 for(i = 0; i < h->nal_length_size; i++)
8122 nalsize = (nalsize << 8) | buf[buf_index++];
8123 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
8128 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
8133 // start code prefix search
8134 for(; buf_index + 3 < buf_size; buf_index++){
8135 // This should always succeed in the first iteration.
8136 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
8140 if(buf_index+3 >= buf_size) break;
8145 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
8146 if (ptr==NULL || dst_length < 0){
8149 while(ptr[dst_length - 1] == 0 && dst_length > 0)
8151 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
8153 if(s->avctx->debug&FF_DEBUG_STARTCODE){
8154 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
8157 if (h->is_avc && (nalsize != consumed))
8158 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
8160 buf_index += consumed;
8162 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
8163 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
8166 switch(h->nal_unit_type){
8168 idr(h); //FIXME ensure we don't loose some frames if there is reordering
8170 init_get_bits(&s->gb, ptr, bit_length);
8172 h->inter_gb_ptr= &s->gb;
8173 s->data_partitioning = 0;
8175 if(decode_slice_header(h) < 0){
8176 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8179 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
8180 if(h->redundant_pic_count==0 && s->hurry_up < 5
8181 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8182 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8183 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8184 && avctx->skip_frame < AVDISCARD_ALL)
8188 init_get_bits(&s->gb, ptr, bit_length);
8190 h->inter_gb_ptr= NULL;
8191 s->data_partitioning = 1;
8193 if(decode_slice_header(h) < 0){
8194 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8198 init_get_bits(&h->intra_gb, ptr, bit_length);
8199 h->intra_gb_ptr= &h->intra_gb;
8202 init_get_bits(&h->inter_gb, ptr, bit_length);
8203 h->inter_gb_ptr= &h->inter_gb;
8205 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
8206 && s->context_initialized
8208 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8209 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8210 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8211 && avctx->skip_frame < AVDISCARD_ALL)
8215 init_get_bits(&s->gb, ptr, bit_length);
8219 init_get_bits(&s->gb, ptr, bit_length);
8220 decode_seq_parameter_set(h);
8222 if(s->flags& CODEC_FLAG_LOW_DELAY)
8225 if(avctx->has_b_frames < 2)
8226 avctx->has_b_frames= !s->low_delay;
8229 init_get_bits(&s->gb, ptr, bit_length);
8231 decode_picture_parameter_set(h, bit_length);
8235 case NAL_END_SEQUENCE:
8236 case NAL_END_STREAM:
8237 case NAL_FILLER_DATA:
8239 case NAL_AUXILIARY_SLICE:
8242 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
8250 * returns the number of bytes consumed for building the current frame
8252 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
8253 if(s->flags&CODEC_FLAG_TRUNCATED){
8254 pos -= s->parse_context.last_index;
8255 if(pos<0) pos=0; // FIXME remove (unneeded?)
8259 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
8260 if(pos+10>buf_size) pos=buf_size; // oops ;)
8266 static int decode_frame(AVCodecContext *avctx,
8267 void *data, int *data_size,
8268 uint8_t *buf, int buf_size)
8270 H264Context *h = avctx->priv_data;
8271 MpegEncContext *s = &h->s;
8272 AVFrame *pict = data;
8275 s->flags= avctx->flags;
8276 s->flags2= avctx->flags2;
8278 /* no supplementary picture */
8279 if (buf_size == 0) {
8283 //FIXME factorize this with the output code below
8284 out = h->delayed_pic[0];
8286 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8287 if(h->delayed_pic[i]->poc < out->poc){
8288 out = h->delayed_pic[i];
8292 for(i=out_idx; h->delayed_pic[i]; i++)
8293 h->delayed_pic[i] = h->delayed_pic[i+1];
8296 *data_size = sizeof(AVFrame);
8297 *pict= *(AVFrame*)out;
8303 if(s->flags&CODEC_FLAG_TRUNCATED){
8304 int next= find_frame_end(h, buf, buf_size);
8306 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
8308 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
8311 if(h->is_avc && !h->got_avcC) {
8312 int i, cnt, nalsize;
8313 unsigned char *p = avctx->extradata;
8314 if(avctx->extradata_size < 7) {
8315 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
8319 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
8322 /* sps and pps in the avcC always have length coded with 2 bytes,
8323 so put a fake nal_length_size = 2 while parsing them */
8324 h->nal_length_size = 2;
8325 // Decode sps from avcC
8326 cnt = *(p+5) & 0x1f; // Number of sps
8328 for (i = 0; i < cnt; i++) {
8329 nalsize = AV_RB16(p) + 2;
8330 if(decode_nal_units(h, p, nalsize) < 0) {
8331 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
8336 // Decode pps from avcC
8337 cnt = *(p++); // Number of pps
8338 for (i = 0; i < cnt; i++) {
8339 nalsize = AV_RB16(p) + 2;
8340 if(decode_nal_units(h, p, nalsize) != nalsize) {
8341 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
8346 // Now store right nal length size, that will be use to parse all other nals
8347 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
8348 // Do not reparse avcC
8352 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
8353 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
8357 buf_index=decode_nal_units(h, buf, buf_size);
8361 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
8362 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
8366 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
8367 Picture *out = s->current_picture_ptr;
8368 Picture *cur = s->current_picture_ptr;
8369 Picture *prev = h->delayed_output_pic;
8370 int i, pics, cross_idr, out_of_order, out_idx;
8374 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
8375 s->current_picture_ptr->pict_type= s->pict_type;
8377 h->prev_frame_num_offset= h->frame_num_offset;
8378 h->prev_frame_num= h->frame_num;
8379 if(s->current_picture_ptr->reference){
8380 h->prev_poc_msb= h->poc_msb;
8381 h->prev_poc_lsb= h->poc_lsb;
8383 if(s->current_picture_ptr->reference)
8384 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
8390 //FIXME do something with unavailable reference frames
8392 #if 0 //decode order
8393 *data_size = sizeof(AVFrame);
8395 /* Sort B-frames into display order */
8397 if(h->sps.bitstream_restriction_flag
8398 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
8399 s->avctx->has_b_frames = h->sps.num_reorder_frames;
8404 while(h->delayed_pic[pics]) pics++;
8406 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
8408 h->delayed_pic[pics++] = cur;
8409 if(cur->reference == 0)
8413 for(i=0; h->delayed_pic[i]; i++)
8414 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
8417 out = h->delayed_pic[0];
8419 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8420 if(h->delayed_pic[i]->poc < out->poc){
8421 out = h->delayed_pic[i];
8425 out_of_order = !cross_idr && prev && out->poc < prev->poc;
8426 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
8428 else if(prev && pics <= s->avctx->has_b_frames)
8430 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8432 ((!cross_idr && prev && out->poc > prev->poc + 2)
8433 || cur->pict_type == B_TYPE)))
8436 s->avctx->has_b_frames++;
8439 else if(out_of_order)
8442 if(out_of_order || pics > s->avctx->has_b_frames){
8443 for(i=out_idx; h->delayed_pic[i]; i++)
8444 h->delayed_pic[i] = h->delayed_pic[i+1];
8450 *data_size = sizeof(AVFrame);
8451 if(prev && prev != out && prev->reference == 1)
8452 prev->reference = 0;
8453 h->delayed_output_pic = out;
8457 *pict= *(AVFrame*)out;
8459 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8462 assert(pict->data[0] || !*data_size);
8463 ff_print_debug_info(s, pict);
8464 //printf("out %d\n", (int)pict->data[0]);
8467 /* Return the Picture timestamp as the frame number */
8468 /* we substract 1 because it is added on utils.c */
8469 avctx->frame_number = s->picture_number - 1;
8471 return get_consumed_bytes(s, buf_index, buf_size);
8474 static inline void fill_mb_avail(H264Context *h){
8475 MpegEncContext * const s = &h->s;
8476 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8479 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8480 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8481 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8487 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8488 h->mb_avail[4]= 1; //FIXME move out
8489 h->mb_avail[5]= 0; //FIXME move out
8495 #define SIZE (COUNT*40)
8501 // int int_temp[10000];
8503 AVCodecContext avctx;
8505 dsputil_init(&dsp, &avctx);
8507 init_put_bits(&pb, temp, SIZE);
8508 printf("testing unsigned exp golomb\n");
8509 for(i=0; i<COUNT; i++){
8511 set_ue_golomb(&pb, i);
8512 STOP_TIMER("set_ue_golomb");
8514 flush_put_bits(&pb);
8516 init_get_bits(&gb, temp, 8*SIZE);
8517 for(i=0; i<COUNT; i++){
8520 s= show_bits(&gb, 24);
8523 j= get_ue_golomb(&gb);
8525 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8528 STOP_TIMER("get_ue_golomb");
8532 init_put_bits(&pb, temp, SIZE);
8533 printf("testing signed exp golomb\n");
8534 for(i=0; i<COUNT; i++){
8536 set_se_golomb(&pb, i - COUNT/2);
8537 STOP_TIMER("set_se_golomb");
8539 flush_put_bits(&pb);
8541 init_get_bits(&gb, temp, 8*SIZE);
8542 for(i=0; i<COUNT; i++){
8545 s= show_bits(&gb, 24);
8548 j= get_se_golomb(&gb);
8549 if(j != i - COUNT/2){
8550 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8553 STOP_TIMER("get_se_golomb");
8556 printf("testing 4x4 (I)DCT\n");
8559 uint8_t src[16], ref[16];
8560 uint64_t error= 0, max_error=0;
8562 for(i=0; i<COUNT; i++){
8564 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8565 for(j=0; j<16; j++){
8566 ref[j]= random()%255;
8567 src[j]= random()%255;
8570 h264_diff_dct_c(block, src, ref, 4);
8573 for(j=0; j<16; j++){
8574 // printf("%d ", block[j]);
8575 block[j]= block[j]*4;
8576 if(j&1) block[j]= (block[j]*4 + 2)/5;
8577 if(j&4) block[j]= (block[j]*4 + 2)/5;
8581 s->dsp.h264_idct_add(ref, block, 4);
8582 /* for(j=0; j<16; j++){
8583 printf("%d ", ref[j]);
8587 for(j=0; j<16; j++){
8588 int diff= FFABS(src[j] - ref[j]);
8591 max_error= FFMAX(max_error, diff);
8594 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8596 printf("testing quantizer\n");
8597 for(qp=0; qp<52; qp++){
8599 src1_block[i]= src2_block[i]= random()%255;
8603 printf("Testing NAL layer\n");
8605 uint8_t bitstream[COUNT];
8606 uint8_t nal[COUNT*2];
8608 memset(&h, 0, sizeof(H264Context));
8610 for(i=0; i<COUNT; i++){
8618 for(j=0; j<COUNT; j++){
8619 bitstream[j]= (random() % 255) + 1;
8622 for(j=0; j<zeros; j++){
8623 int pos= random() % COUNT;
8624 while(bitstream[pos] == 0){
8633 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8635 printf("encoding failed\n");
8639 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8643 if(out_length != COUNT){
8644 printf("incorrect length %d %d\n", out_length, COUNT);
8648 if(consumed != nal_length){
8649 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8653 if(memcmp(bitstream, out, COUNT)){
8654 printf("missmatch\n");
8659 printf("Testing RBSP\n");
8667 static int decode_end(AVCodecContext *avctx)
8669 H264Context *h = avctx->priv_data;
8670 MpegEncContext *s = &h->s;
8672 av_freep(&h->rbsp_buffer);
8673 free_tables(h); //FIXME cleanup init stuff perhaps
8676 // memset(h, 0, sizeof(H264Context));
8682 AVCodec h264_decoder = {
8686 sizeof(H264Context),
8691 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8695 #ifdef CONFIG_H264_PARSER
8696 AVCodecParser h264_parser = {
8698 sizeof(H264Context),