2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * H.264 / AVC / MPEG4 part10 codec.
26 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
41 #define interlaced_dct interlaced_dct_is_a_bad_name
42 #define mb_intra mb_intra_isnt_initalized_see_mb_type
44 #define LUMA_DC_BLOCK_INDEX 25
45 #define CHROMA_DC_BLOCK_INDEX 26
47 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
48 #define COEFF_TOKEN_VLC_BITS 8
49 #define TOTAL_ZEROS_VLC_BITS 9
50 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
51 #define RUN_VLC_BITS 3
52 #define RUN7_VLC_BITS 6
54 #define MAX_SPS_COUNT 32
55 #define MAX_PPS_COUNT 256
57 #define MAX_MMCO_COUNT 66
59 /* Compiling in interlaced support reduces the speed
60 * of progressive decoding by about 2%. */
61 #define ALLOW_INTERLACE
63 #ifdef ALLOW_INTERLACE
64 #define MB_MBAFF h->mb_mbaff
65 #define MB_FIELD h->mb_field_decoding_flag
66 #define FRAME_MBAFF h->mb_aff_frame
72 #define IS_INTERLACED(mb_type) 0
76 * Sequence parameter set
82 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
83 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
84 int poc_type; ///< pic_order_cnt_type
85 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
86 int delta_pic_order_always_zero_flag;
87 int offset_for_non_ref_pic;
88 int offset_for_top_to_bottom_field;
89 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
90 int ref_frame_count; ///< num_ref_frames
91 int gaps_in_frame_num_allowed_flag;
92 int mb_width; ///< frame_width_in_mbs_minus1 + 1
93 int mb_height; ///< frame_height_in_mbs_minus1 + 1
94 int frame_mbs_only_flag;
95 int mb_aff; ///<mb_adaptive_frame_field_flag
96 int direct_8x8_inference_flag;
97 int crop; ///< frame_cropping_flag
98 int crop_left; ///< frame_cropping_rect_left_offset
99 int crop_right; ///< frame_cropping_rect_right_offset
100 int crop_top; ///< frame_cropping_rect_top_offset
101 int crop_bottom; ///< frame_cropping_rect_bottom_offset
102 int vui_parameters_present_flag;
104 int timing_info_present_flag;
105 uint32_t num_units_in_tick;
107 int fixed_frame_rate_flag;
108 short offset_for_ref_frame[256]; //FIXME dyn aloc?
109 int bitstream_restriction_flag;
110 int num_reorder_frames;
111 int scaling_matrix_present;
112 uint8_t scaling_matrix4[6][16];
113 uint8_t scaling_matrix8[2][64];
117 * Picture parameter set
121 int cabac; ///< entropy_coding_mode_flag
122 int pic_order_present; ///< pic_order_present_flag
123 int slice_group_count; ///< num_slice_groups_minus1 + 1
124 int mb_slice_group_map_type;
125 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
126 int weighted_pred; ///< weighted_pred_flag
127 int weighted_bipred_idc;
128 int init_qp; ///< pic_init_qp_minus26 + 26
129 int init_qs; ///< pic_init_qs_minus26 + 26
130 int chroma_qp_index_offset;
131 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
132 int constrained_intra_pred; ///< constrained_intra_pred_flag
133 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
134 int transform_8x8_mode; ///< transform_8x8_mode_flag
135 uint8_t scaling_matrix4[6][16];
136 uint8_t scaling_matrix8[2][64];
140 * Memory management control operation opcode.
142 typedef enum MMCOOpcode{
153 * Memory management control operation.
164 typedef struct H264Context{
168 uint8_t *rbsp_buffer;
169 unsigned int rbsp_buffer_size;
172 * Used to parse AVC variant of h264
174 int is_avc; ///< this flag is != 0 if codec is avc1
175 int got_avcC; ///< flag used to parse avcC data only once
176 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
184 int chroma_pred_mode;
185 int intra16x16_pred_mode;
190 int8_t intra4x4_pred_mode_cache[5*8];
191 int8_t (*intra4x4_pred_mode)[8];
192 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
193 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
194 void (*pred8x8 [4+3])(uint8_t *src, int stride);
195 void (*pred16x16[4+3])(uint8_t *src, int stride);
196 unsigned int topleft_samples_available;
197 unsigned int top_samples_available;
198 unsigned int topright_samples_available;
199 unsigned int left_samples_available;
200 uint8_t (*top_borders[2])[16+2*8];
201 uint8_t left_border[2*(17+2*9)];
204 * non zero coeff count cache.
205 * is 64 if not available.
207 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
208 uint8_t (*non_zero_count)[16];
211 * Motion vector cache.
213 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
214 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
215 #define LIST_NOT_USED -1 //FIXME rename?
216 #define PART_NOT_AVAILABLE -2
219 * is 1 if the specific list MV&references are set to 0,0,-2.
221 int mv_cache_clean[2];
224 * number of neighbors (top and/or left) that used 8x8 dct
226 int neighbor_transform_size;
229 * block_offset[ 0..23] for frame macroblocks
230 * block_offset[24..47] for field macroblocks
232 int block_offset[2*(16+8)];
234 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
236 int b_stride; //FIXME use s->b4_stride
239 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff
248 int unknown_svq3_flag;
249 int next_slice_index;
251 SPS sps_buffer[MAX_SPS_COUNT];
252 SPS sps; ///< current sps
254 PPS pps_buffer[MAX_PPS_COUNT];
258 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
260 uint32_t dequant4_buffer[6][52][16];
261 uint32_t dequant8_buffer[2][52][64];
262 uint32_t (*dequant4_coeff[6])[16];
263 uint32_t (*dequant8_coeff[2])[64];
264 int dequant_coeff_pps; ///< reinit tables when pps changes
267 uint8_t *slice_table_base;
268 uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
270 int slice_type_fixed;
272 //interlacing specific flags
274 int mb_field_decoding_flag;
275 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
282 int delta_poc_bottom;
285 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
286 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
287 int frame_num_offset; ///< for POC type 2
288 int prev_frame_num_offset; ///< for POC type 2
289 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
292 * frame_num for frames or 2*frame_num for field pics.
297 * max_frame_num or 2*max_frame_num for field pics.
301 //Weighted pred stuff
303 int use_weight_chroma;
304 int luma_log2_weight_denom;
305 int chroma_log2_weight_denom;
306 int luma_weight[2][48];
307 int luma_offset[2][48];
308 int chroma_weight[2][48][2];
309 int chroma_offset[2][48][2];
310 int implicit_weight[48][48];
313 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
314 int slice_alpha_c0_offset;
315 int slice_beta_offset;
317 int redundant_pic_count;
319 int direct_spatial_mv_pred;
320 int dist_scale_factor[16];
321 int dist_scale_factor_field[32];
322 int map_col_to_list0[2][16];
323 int map_col_to_list0_field[2][32];
326 * num_ref_idx_l0/1_active_minus1 + 1
328 int ref_count[2]; ///< counts frames or fields, depending on current mb mode
329 Picture *short_ref[32];
330 Picture *long_ref[32];
331 Picture default_ref_list[2][32];
332 Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs
333 Picture *delayed_pic[16]; //FIXME size?
334 Picture *delayed_output_pic;
337 * memory management control operations buffer.
339 MMCO mmco[MAX_MMCO_COUNT];
342 int long_ref_count; ///< number of actual long term references
343 int short_ref_count; ///< number of actual short term references
346 GetBitContext intra_gb;
347 GetBitContext inter_gb;
348 GetBitContext *intra_gb_ptr;
349 GetBitContext *inter_gb_ptr;
351 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
357 uint8_t cabac_state[460];
360 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
365 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
366 uint8_t *chroma_pred_mode_table;
367 int last_qscale_diff;
368 int16_t (*mvd_table[2])[2];
369 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
370 uint8_t *direct_table;
371 uint8_t direct_cache[5*8];
373 uint8_t zigzag_scan[16];
374 uint8_t zigzag_scan8x8[64];
375 uint8_t zigzag_scan8x8_cavlc[64];
376 uint8_t field_scan[16];
377 uint8_t field_scan8x8[64];
378 uint8_t field_scan8x8_cavlc[64];
379 const uint8_t *zigzag_scan_q0;
380 const uint8_t *zigzag_scan8x8_q0;
381 const uint8_t *zigzag_scan8x8_cavlc_q0;
382 const uint8_t *field_scan_q0;
383 const uint8_t *field_scan8x8_q0;
384 const uint8_t *field_scan8x8_cavlc_q0;
389 static VLC coeff_token_vlc[4];
390 static VLC chroma_dc_coeff_token_vlc;
392 static VLC total_zeros_vlc[15];
393 static VLC chroma_dc_total_zeros_vlc[3];
395 static VLC run_vlc[6];
398 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
399 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
400 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
401 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
403 static always_inline uint32_t pack16to32(int a, int b){
404 #ifdef WORDS_BIGENDIAN
405 return (b&0xFFFF) + (a<<16);
407 return (a&0xFFFF) + (b<<16);
413 * @param h height of the rectangle, should be a constant
414 * @param w width of the rectangle, should be a constant
415 * @param size the size of val (1 or 4), should be a constant
417 static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
418 uint8_t *p= (uint8_t*)vp;
419 assert(size==1 || size==4);
425 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
426 assert((stride&(w-1))==0);
428 const uint16_t v= size==4 ? val : val*0x0101;
429 *(uint16_t*)(p + 0*stride)= v;
431 *(uint16_t*)(p + 1*stride)= v;
433 *(uint16_t*)(p + 2*stride)=
434 *(uint16_t*)(p + 3*stride)= v;
436 const uint32_t v= size==4 ? val : val*0x01010101;
437 *(uint32_t*)(p + 0*stride)= v;
439 *(uint32_t*)(p + 1*stride)= v;
441 *(uint32_t*)(p + 2*stride)=
442 *(uint32_t*)(p + 3*stride)= v;
444 //gcc can't optimize 64bit math on x86_32
445 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
446 const uint64_t v= val*0x0100000001ULL;
447 *(uint64_t*)(p + 0*stride)= v;
449 *(uint64_t*)(p + 1*stride)= v;
451 *(uint64_t*)(p + 2*stride)=
452 *(uint64_t*)(p + 3*stride)= v;
454 const uint64_t v= val*0x0100000001ULL;
455 *(uint64_t*)(p + 0+0*stride)=
456 *(uint64_t*)(p + 8+0*stride)=
457 *(uint64_t*)(p + 0+1*stride)=
458 *(uint64_t*)(p + 8+1*stride)= v;
460 *(uint64_t*)(p + 0+2*stride)=
461 *(uint64_t*)(p + 8+2*stride)=
462 *(uint64_t*)(p + 0+3*stride)=
463 *(uint64_t*)(p + 8+3*stride)= v;
465 *(uint32_t*)(p + 0+0*stride)=
466 *(uint32_t*)(p + 4+0*stride)= val;
468 *(uint32_t*)(p + 0+1*stride)=
469 *(uint32_t*)(p + 4+1*stride)= val;
471 *(uint32_t*)(p + 0+2*stride)=
472 *(uint32_t*)(p + 4+2*stride)=
473 *(uint32_t*)(p + 0+3*stride)=
474 *(uint32_t*)(p + 4+3*stride)= val;
476 *(uint32_t*)(p + 0+0*stride)=
477 *(uint32_t*)(p + 4+0*stride)=
478 *(uint32_t*)(p + 8+0*stride)=
479 *(uint32_t*)(p +12+0*stride)=
480 *(uint32_t*)(p + 0+1*stride)=
481 *(uint32_t*)(p + 4+1*stride)=
482 *(uint32_t*)(p + 8+1*stride)=
483 *(uint32_t*)(p +12+1*stride)= val;
485 *(uint32_t*)(p + 0+2*stride)=
486 *(uint32_t*)(p + 4+2*stride)=
487 *(uint32_t*)(p + 8+2*stride)=
488 *(uint32_t*)(p +12+2*stride)=
489 *(uint32_t*)(p + 0+3*stride)=
490 *(uint32_t*)(p + 4+3*stride)=
491 *(uint32_t*)(p + 8+3*stride)=
492 *(uint32_t*)(p +12+3*stride)= val;
499 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
500 MpegEncContext * const s = &h->s;
501 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
502 int topleft_xy, top_xy, topright_xy, left_xy[2];
503 int topleft_type, top_type, topright_type, left_type[2];
507 //FIXME deblocking could skip the intra and nnz parts.
508 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
511 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
513 top_xy = mb_xy - s->mb_stride;
514 topleft_xy = top_xy - 1;
515 topright_xy= top_xy + 1;
516 left_xy[1] = left_xy[0] = mb_xy-1;
526 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
527 const int top_pair_xy = pair_xy - s->mb_stride;
528 const int topleft_pair_xy = top_pair_xy - 1;
529 const int topright_pair_xy = top_pair_xy + 1;
530 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
531 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
532 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
533 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
534 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
535 const int bottom = (s->mb_y & 1);
536 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
538 ? !curr_mb_frame_flag // bottom macroblock
539 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
541 top_xy -= s->mb_stride;
544 ? !curr_mb_frame_flag // bottom macroblock
545 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
547 topleft_xy -= s->mb_stride;
550 ? !curr_mb_frame_flag // bottom macroblock
551 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
553 topright_xy -= s->mb_stride;
555 if (left_mb_frame_flag != curr_mb_frame_flag) {
556 left_xy[1] = left_xy[0] = pair_xy - 1;
557 if (curr_mb_frame_flag) {
578 left_xy[1] += s->mb_stride;
591 h->top_mb_xy = top_xy;
592 h->left_mb_xy[0] = left_xy[0];
593 h->left_mb_xy[1] = left_xy[1];
597 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
598 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
599 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
601 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
603 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
605 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
606 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
607 if(USES_LIST(mb_type,list)){
608 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
609 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
610 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
611 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
617 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
618 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
620 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
621 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
623 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
624 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
629 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
630 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
631 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
632 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
633 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
636 if(IS_INTRA(mb_type)){
637 h->topleft_samples_available=
638 h->top_samples_available=
639 h->left_samples_available= 0xFFFF;
640 h->topright_samples_available= 0xEEEA;
642 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
643 h->topleft_samples_available= 0xB3FF;
644 h->top_samples_available= 0x33FF;
645 h->topright_samples_available= 0x26EA;
648 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
649 h->topleft_samples_available&= 0xDF5F;
650 h->left_samples_available&= 0x5F5F;
654 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
655 h->topleft_samples_available&= 0x7FFF;
657 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
658 h->topright_samples_available&= 0xFBFF;
660 if(IS_INTRA4x4(mb_type)){
661 if(IS_INTRA4x4(top_type)){
662 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
663 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
664 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
665 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
668 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
673 h->intra4x4_pred_mode_cache[4+8*0]=
674 h->intra4x4_pred_mode_cache[5+8*0]=
675 h->intra4x4_pred_mode_cache[6+8*0]=
676 h->intra4x4_pred_mode_cache[7+8*0]= pred;
679 if(IS_INTRA4x4(left_type[i])){
680 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
681 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
684 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
689 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
690 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
705 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
707 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
708 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
709 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
710 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
712 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
713 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
715 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
716 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
719 h->non_zero_count_cache[4+8*0]=
720 h->non_zero_count_cache[5+8*0]=
721 h->non_zero_count_cache[6+8*0]=
722 h->non_zero_count_cache[7+8*0]=
724 h->non_zero_count_cache[1+8*0]=
725 h->non_zero_count_cache[2+8*0]=
727 h->non_zero_count_cache[1+8*3]=
728 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
732 for (i=0; i<2; i++) {
734 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
735 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
736 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
737 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
739 h->non_zero_count_cache[3+8*1 + 2*8*i]=
740 h->non_zero_count_cache[3+8*2 + 2*8*i]=
741 h->non_zero_count_cache[0+8*1 + 8*i]=
742 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
749 h->top_cbp = h->cbp_table[top_xy];
750 } else if(IS_INTRA(mb_type)) {
757 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
758 } else if(IS_INTRA(mb_type)) {
764 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
767 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
772 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
774 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
775 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
776 /*if(!h->mv_cache_clean[list]){
777 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
778 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
779 h->mv_cache_clean[list]= 1;
783 h->mv_cache_clean[list]= 0;
785 if(USES_LIST(top_type, list)){
786 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
787 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
788 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
789 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
790 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
791 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
792 h->ref_cache[list][scan8[0] + 0 - 1*8]=
793 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
794 h->ref_cache[list][scan8[0] + 2 - 1*8]=
795 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
797 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
798 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
799 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
800 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
801 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
804 //FIXME unify cleanup or sth
805 if(USES_LIST(left_type[0], list)){
806 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
807 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
808 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
809 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
810 h->ref_cache[list][scan8[0] - 1 + 0*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
811 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1]>>1)];
813 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
814 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
815 h->ref_cache[list][scan8[0] - 1 + 0*8]=
816 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
819 if(USES_LIST(left_type[1], list)){
820 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
821 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
822 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
823 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
824 h->ref_cache[list][scan8[0] - 1 + 2*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
825 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[3]>>1)];
827 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
828 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
829 h->ref_cache[list][scan8[0] - 1 + 2*8]=
830 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
831 assert((!left_type[0]) == (!left_type[1]));
834 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
837 if(USES_LIST(topleft_type, list)){
838 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
839 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
840 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
841 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
843 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
844 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
847 if(USES_LIST(topright_type, list)){
848 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
849 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
850 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
851 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
853 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
854 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
857 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
860 h->ref_cache[list][scan8[5 ]+1] =
861 h->ref_cache[list][scan8[7 ]+1] =
862 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
863 h->ref_cache[list][scan8[4 ]] =
864 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
865 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
866 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
867 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
868 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
869 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
872 /* XXX beurk, Load mvd */
873 if(USES_LIST(top_type, list)){
874 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
875 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
876 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
877 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
878 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
880 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
881 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
882 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
883 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
885 if(USES_LIST(left_type[0], list)){
886 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
887 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
888 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
890 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
891 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
893 if(USES_LIST(left_type[1], list)){
894 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
895 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
896 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
898 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
899 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
901 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
902 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
903 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
904 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
905 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
907 if(h->slice_type == B_TYPE){
908 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
910 if(IS_DIRECT(top_type)){
911 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
912 }else if(IS_8X8(top_type)){
913 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
914 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
915 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
917 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
920 if(IS_DIRECT(left_type[0]))
921 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
922 else if(IS_8X8(left_type[0]))
923 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
925 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
927 if(IS_DIRECT(left_type[1]))
928 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
929 else if(IS_8X8(left_type[1]))
930 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
932 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
938 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
939 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
940 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
941 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
942 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
943 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
944 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
945 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
946 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
947 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
949 #define MAP_F2F(idx, mb_type)\
950 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
951 h->ref_cache[list][idx] <<= 1;\
952 h->mv_cache[list][idx][1] /= 2;\
953 h->mvd_cache[list][idx][1] /= 2;\
958 #define MAP_F2F(idx, mb_type)\
959 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
960 h->ref_cache[list][idx] >>= 1;\
961 h->mv_cache[list][idx][1] <<= 1;\
962 h->mvd_cache[list][idx][1] <<= 1;\
972 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
975 static inline void write_back_intra_pred_mode(H264Context *h){
976 MpegEncContext * const s = &h->s;
977 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
979 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
980 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
981 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
982 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
983 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
984 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
985 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
989 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
991 static inline int check_intra4x4_pred_mode(H264Context *h){
992 MpegEncContext * const s = &h->s;
993 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
994 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
997 if(!(h->top_samples_available&0x8000)){
999 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
1001 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1004 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
1009 if(!(h->left_samples_available&0x8000)){
1011 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
1013 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1016 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
1022 } //FIXME cleanup like next
1025 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1027 static inline int check_intra_pred_mode(H264Context *h, int mode){
1028 MpegEncContext * const s = &h->s;
1029 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
1030 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
1032 if(mode < 0 || mode > 6) {
1033 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
1037 if(!(h->top_samples_available&0x8000)){
1040 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1045 if(!(h->left_samples_available&0x8000)){
1048 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1057 * gets the predicted intra4x4 prediction mode.
1059 static inline int pred_intra_mode(H264Context *h, int n){
1060 const int index8= scan8[n];
1061 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
1062 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
1063 const int min= FFMIN(left, top);
1065 tprintf("mode:%d %d min:%d\n", left ,top, min);
1067 if(min<0) return DC_PRED;
1071 static inline void write_back_non_zero_count(H264Context *h){
1072 MpegEncContext * const s = &h->s;
1073 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1075 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
1076 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
1077 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
1078 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
1079 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
1080 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
1081 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
1083 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
1084 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
1085 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
1087 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1088 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1089 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1092 // store all luma nnzs, for deblocking
1095 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
1096 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
1101 * gets the predicted number of non zero coefficients.
1102 * @param n block index
1104 static inline int pred_non_zero_count(H264Context *h, int n){
1105 const int index8= scan8[n];
1106 const int left= h->non_zero_count_cache[index8 - 1];
1107 const int top = h->non_zero_count_cache[index8 - 8];
1110 if(i<64) i= (i+1)>>1;
1112 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1117 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1118 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1120 /* there is no consistent mapping of mvs to neighboring locations that will
1121 * make mbaff happy, so we can't move all this logic to fill_caches */
1123 MpegEncContext *s = &h->s;
1124 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
1126 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
1127 *C = h->mv_cache[list][scan8[0]-2];
1130 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
1131 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
1132 if(IS_INTERLACED(mb_types[topright_xy])){
1133 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
1134 const int x4 = X4, y4 = Y4;\
1135 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
1136 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
1137 return LIST_NOT_USED;\
1138 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
1139 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
1140 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
1141 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
1143 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
1146 if(topright_ref == PART_NOT_AVAILABLE
1147 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
1148 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
1150 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
1151 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
1154 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
1155 && i >= scan8[0]+8){
1156 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
1157 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
1163 if(topright_ref != PART_NOT_AVAILABLE){
1164 *C= h->mv_cache[list][ i - 8 + part_width ];
1165 return topright_ref;
1167 tprintf("topright MV not available\n");
1169 *C= h->mv_cache[list][ i - 8 - 1 ];
1170 return h->ref_cache[list][ i - 8 - 1 ];
1175 * gets the predicted MV.
1176 * @param n the block index
1177 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1178 * @param mx the x component of the predicted motion vector
1179 * @param my the y component of the predicted motion vector
1181 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1182 const int index8= scan8[n];
1183 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1184 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1185 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1186 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1188 int diagonal_ref, match_count;
1190 assert(part_width==1 || part_width==2 || part_width==4);
1200 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1201 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1202 tprintf("pred_motion match_count=%d\n", match_count);
1203 if(match_count > 1){ //most common
1204 *mx= mid_pred(A[0], B[0], C[0]);
1205 *my= mid_pred(A[1], B[1], C[1]);
1206 }else if(match_count==1){
1210 }else if(top_ref==ref){
1218 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1222 *mx= mid_pred(A[0], B[0], C[0]);
1223 *my= mid_pred(A[1], B[1], C[1]);
1227 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1231 * gets the directionally predicted 16x8 MV.
1232 * @param n the block index
1233 * @param mx the x component of the predicted motion vector
1234 * @param my the y component of the predicted motion vector
1236 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1238 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1239 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1241 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1249 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1250 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1252 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1254 if(left_ref == ref){
1262 pred_motion(h, n, 4, list, ref, mx, my);
1266 * gets the directionally predicted 8x16 MV.
1267 * @param n the block index
1268 * @param mx the x component of the predicted motion vector
1269 * @param my the y component of the predicted motion vector
1271 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1273 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1274 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1276 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1278 if(left_ref == ref){
1287 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1289 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1291 if(diagonal_ref == ref){
1299 pred_motion(h, n, 2, list, ref, mx, my);
1302 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1303 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1304 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1306 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1308 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1309 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1310 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1316 pred_motion(h, 0, 4, 0, 0, mx, my);
1321 static inline void direct_dist_scale_factor(H264Context * const h){
1322 const int poc = h->s.current_picture_ptr->poc;
1323 const int poc1 = h->ref_list[1][0].poc;
1325 for(i=0; i<h->ref_count[0]; i++){
1326 int poc0 = h->ref_list[0][i].poc;
1327 int td = clip(poc1 - poc0, -128, 127);
1328 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1329 h->dist_scale_factor[i] = 256;
1331 int tb = clip(poc - poc0, -128, 127);
1332 int tx = (16384 + (FFABS(td) >> 1)) / td;
1333 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1337 for(i=0; i<h->ref_count[0]; i++){
1338 h->dist_scale_factor_field[2*i] =
1339 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
1343 static inline void direct_ref_list_init(H264Context * const h){
1344 MpegEncContext * const s = &h->s;
1345 Picture * const ref1 = &h->ref_list[1][0];
1346 Picture * const cur = s->current_picture_ptr;
1348 if(cur->pict_type == I_TYPE)
1349 cur->ref_count[0] = 0;
1350 if(cur->pict_type != B_TYPE)
1351 cur->ref_count[1] = 0;
1352 for(list=0; list<2; list++){
1353 cur->ref_count[list] = h->ref_count[list];
1354 for(j=0; j<h->ref_count[list]; j++)
1355 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1357 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1359 for(list=0; list<2; list++){
1360 for(i=0; i<ref1->ref_count[list]; i++){
1361 const int poc = ref1->ref_poc[list][i];
1362 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1363 for(j=0; j<h->ref_count[list]; j++)
1364 if(h->ref_list[list][j].poc == poc){
1365 h->map_col_to_list0[list][i] = j;
1371 for(list=0; list<2; list++){
1372 for(i=0; i<ref1->ref_count[list]; i++){
1373 j = h->map_col_to_list0[list][i];
1374 h->map_col_to_list0_field[list][2*i] = 2*j;
1375 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1381 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1382 MpegEncContext * const s = &h->s;
1383 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1384 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1385 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1386 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1387 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1388 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1389 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1390 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1391 const int is_b8x8 = IS_8X8(*mb_type);
1395 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1396 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1397 /* FIXME save sub mb types from previous frames (or derive from MVs)
1398 * so we know exactly what block size to use */
1399 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1400 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1401 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1402 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1403 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1405 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1406 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1409 *mb_type |= MB_TYPE_DIRECT2;
1411 *mb_type |= MB_TYPE_INTERLACED;
1413 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1415 if(h->direct_spatial_mv_pred){
1420 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1422 /* ref = min(neighbors) */
1423 for(list=0; list<2; list++){
1424 int refa = h->ref_cache[list][scan8[0] - 1];
1425 int refb = h->ref_cache[list][scan8[0] - 8];
1426 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1428 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1430 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1432 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1438 if(ref[0] < 0 && ref[1] < 0){
1439 ref[0] = ref[1] = 0;
1440 mv[0][0] = mv[0][1] =
1441 mv[1][0] = mv[1][1] = 0;
1443 for(list=0; list<2; list++){
1445 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1447 mv[list][0] = mv[list][1] = 0;
1452 *mb_type &= ~MB_TYPE_P0L1;
1453 sub_mb_type &= ~MB_TYPE_P0L1;
1454 }else if(ref[0] < 0){
1455 *mb_type &= ~MB_TYPE_P0L0;
1456 sub_mb_type &= ~MB_TYPE_P0L0;
1459 if(IS_16X16(*mb_type)){
1460 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1461 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1462 if(!IS_INTRA(mb_type_col)
1463 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1464 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1465 && (h->x264_build>33 || !h->x264_build)))){
1467 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1469 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1471 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1473 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1475 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1476 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1479 for(i8=0; i8<4; i8++){
1480 const int x8 = i8&1;
1481 const int y8 = i8>>1;
1483 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1485 h->sub_mb_type[i8] = sub_mb_type;
1487 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1488 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1489 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1490 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1493 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1494 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1495 && (h->x264_build>33 || !h->x264_build)))){
1496 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1497 if(IS_SUB_8X8(sub_mb_type)){
1498 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1499 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1501 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1503 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1506 for(i4=0; i4<4; i4++){
1507 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1508 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1510 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1512 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1518 }else{ /* direct temporal mv pred */
1519 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1520 const int *dist_scale_factor = h->dist_scale_factor;
1523 if(IS_INTERLACED(*mb_type)){
1524 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1525 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1526 dist_scale_factor = h->dist_scale_factor_field;
1528 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1529 /* FIXME assumes direct_8x8_inference == 1 */
1530 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1531 int mb_types_col[2];
1534 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1535 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1536 | (*mb_type & MB_TYPE_INTERLACED);
1537 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1539 if(IS_INTERLACED(*mb_type)){
1540 /* frame to field scaling */
1541 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1542 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1544 l1ref0 -= 2*h->b8_stride;
1545 l1ref1 -= 2*h->b8_stride;
1546 l1mv0 -= 4*h->b_stride;
1547 l1mv1 -= 4*h->b_stride;
1551 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1552 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1554 *mb_type |= MB_TYPE_16x8;
1556 *mb_type |= MB_TYPE_8x8;
1558 /* field to frame scaling */
1559 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1560 * but in MBAFF, top and bottom POC are equal */
1561 int dy = (s->mb_y&1) ? 1 : 2;
1563 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1564 l1ref0 += dy*h->b8_stride;
1565 l1ref1 += dy*h->b8_stride;
1566 l1mv0 += 2*dy*h->b_stride;
1567 l1mv1 += 2*dy*h->b_stride;
1570 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1572 *mb_type |= MB_TYPE_16x16;
1574 *mb_type |= MB_TYPE_8x8;
1577 for(i8=0; i8<4; i8++){
1578 const int x8 = i8&1;
1579 const int y8 = i8>>1;
1581 const int16_t (*l1mv)[2]= l1mv0;
1583 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1585 h->sub_mb_type[i8] = sub_mb_type;
1587 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1588 if(IS_INTRA(mb_types_col[y8])){
1589 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1590 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1591 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1595 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1597 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1599 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1602 scale = dist_scale_factor[ref0];
1603 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1606 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1607 int my_col = (mv_col[1]<<y_shift)/2;
1608 int mx = (scale * mv_col[0] + 128) >> 8;
1609 int my = (scale * my_col + 128) >> 8;
1610 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1611 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1618 /* one-to-one mv scaling */
1620 if(IS_16X16(*mb_type)){
1621 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1622 if(IS_INTRA(mb_type_col)){
1623 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1624 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1625 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1627 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1628 : map_col_to_list0[1][l1ref1[0]];
1629 const int scale = dist_scale_factor[ref0];
1630 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1632 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1633 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1634 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1635 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1636 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1639 for(i8=0; i8<4; i8++){
1640 const int x8 = i8&1;
1641 const int y8 = i8>>1;
1643 const int16_t (*l1mv)[2]= l1mv0;
1645 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1647 h->sub_mb_type[i8] = sub_mb_type;
1648 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1649 if(IS_INTRA(mb_type_col)){
1650 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1651 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1652 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1656 ref0 = l1ref0[x8 + y8*h->b8_stride];
1658 ref0 = map_col_to_list0[0][ref0];
1660 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1663 scale = dist_scale_factor[ref0];
1665 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1666 if(IS_SUB_8X8(sub_mb_type)){
1667 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1668 int mx = (scale * mv_col[0] + 128) >> 8;
1669 int my = (scale * mv_col[1] + 128) >> 8;
1670 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1671 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1673 for(i4=0; i4<4; i4++){
1674 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1675 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1676 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1677 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1678 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1679 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1686 static inline void write_back_motion(H264Context *h, int mb_type){
1687 MpegEncContext * const s = &h->s;
1688 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1689 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1692 if(!USES_LIST(mb_type, 0))
1693 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1695 for(list=0; list<2; list++){
1697 if(!USES_LIST(mb_type, list))
1701 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1702 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1704 if( h->pps.cabac ) {
1705 if(IS_SKIP(mb_type))
1706 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1709 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1710 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1715 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1716 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1717 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1718 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1719 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1723 if(h->slice_type == B_TYPE && h->pps.cabac){
1724 if(IS_8X8(mb_type)){
1725 uint8_t *direct_table = &h->direct_table[b8_xy];
1726 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1727 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1728 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1734 * Decodes a network abstraction layer unit.
1735 * @param consumed is the number of bytes used as input
1736 * @param length is the length of the array
1737 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1738 * @returns decoded bytes, might be src+1 if no escapes
1740 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1744 // src[0]&0x80; //forbidden bit
1745 h->nal_ref_idc= src[0]>>5;
1746 h->nal_unit_type= src[0]&0x1F;
1750 for(i=0; i<length; i++)
1751 printf("%2X ", src[i]);
1753 for(i=0; i+1<length; i+=2){
1754 if(src[i]) continue;
1755 if(i>0 && src[i-1]==0) i--;
1756 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1758 /* startcode, so we must be past the end */
1765 if(i>=length-1){ //no escaped 0
1766 *dst_length= length;
1767 *consumed= length+1; //+1 for the header
1771 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1772 dst= h->rbsp_buffer;
1774 //printf("decoding esc\n");
1777 //remove escapes (very rare 1:2^22)
1778 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1779 if(src[si+2]==3){ //escape
1784 }else //next start code
1788 dst[di++]= src[si++];
1792 *consumed= si + 1;//+1 for the header
1793 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1799 * @param src the data which should be escaped
1800 * @param dst the target buffer, dst+1 == src is allowed as a special case
1801 * @param length the length of the src data
1802 * @param dst_length the length of the dst array
1803 * @returns length of escaped data in bytes or -1 if an error occured
1805 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1806 int i, escape_count, si, di;
1810 assert(dst_length>0);
1812 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1814 if(length==0) return 1;
1817 for(i=0; i<length; i+=2){
1818 if(src[i]) continue;
1819 if(i>0 && src[i-1]==0)
1821 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1827 if(escape_count==0){
1829 memcpy(dst+1, src, length);
1833 if(length + escape_count + 1> dst_length)
1836 //this should be damn rare (hopefully)
1838 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1839 temp= h->rbsp_buffer;
1840 //printf("encoding esc\n");
1845 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1846 temp[di++]= 0; si++;
1847 temp[di++]= 0; si++;
1849 temp[di++]= src[si++];
1852 temp[di++]= src[si++];
1854 memcpy(dst+1, temp, length+escape_count);
1856 assert(di == length+escape_count);
1862 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1864 static void encode_rbsp_trailing(PutBitContext *pb){
1867 length= (-put_bits_count(pb))&7;
1868 if(length) put_bits(pb, length, 0);
1873 * identifies the exact end of the bitstream
1874 * @return the length of the trailing, or 0 if damaged
1876 static int decode_rbsp_trailing(uint8_t *src){
1880 tprintf("rbsp trailing %X\n", v);
1890 * idct tranforms the 16 dc values and dequantize them.
1891 * @param qp quantization parameter
1893 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1896 int temp[16]; //FIXME check if this is a good idea
1897 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1898 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1900 //memset(block, 64, 2*256);
1903 const int offset= y_offset[i];
1904 const int z0= block[offset+stride*0] + block[offset+stride*4];
1905 const int z1= block[offset+stride*0] - block[offset+stride*4];
1906 const int z2= block[offset+stride*1] - block[offset+stride*5];
1907 const int z3= block[offset+stride*1] + block[offset+stride*5];
1916 const int offset= x_offset[i];
1917 const int z0= temp[4*0+i] + temp[4*2+i];
1918 const int z1= temp[4*0+i] - temp[4*2+i];
1919 const int z2= temp[4*1+i] - temp[4*3+i];
1920 const int z3= temp[4*1+i] + temp[4*3+i];
1922 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1923 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1924 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1925 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1931 * dct tranforms the 16 dc values.
1932 * @param qp quantization parameter ??? FIXME
1934 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1935 // const int qmul= dequant_coeff[qp][0];
1937 int temp[16]; //FIXME check if this is a good idea
1938 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1939 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1942 const int offset= y_offset[i];
1943 const int z0= block[offset+stride*0] + block[offset+stride*4];
1944 const int z1= block[offset+stride*0] - block[offset+stride*4];
1945 const int z2= block[offset+stride*1] - block[offset+stride*5];
1946 const int z3= block[offset+stride*1] + block[offset+stride*5];
1955 const int offset= x_offset[i];
1956 const int z0= temp[4*0+i] + temp[4*2+i];
1957 const int z1= temp[4*0+i] - temp[4*2+i];
1958 const int z2= temp[4*1+i] - temp[4*3+i];
1959 const int z3= temp[4*1+i] + temp[4*3+i];
1961 block[stride*0 +offset]= (z0 + z3)>>1;
1962 block[stride*2 +offset]= (z1 + z2)>>1;
1963 block[stride*8 +offset]= (z1 - z2)>>1;
1964 block[stride*10+offset]= (z0 - z3)>>1;
1972 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1973 const int stride= 16*2;
1974 const int xStride= 16;
1977 a= block[stride*0 + xStride*0];
1978 b= block[stride*0 + xStride*1];
1979 c= block[stride*1 + xStride*0];
1980 d= block[stride*1 + xStride*1];
1987 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1988 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1989 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1990 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1994 static void chroma_dc_dct_c(DCTELEM *block){
1995 const int stride= 16*2;
1996 const int xStride= 16;
1999 a= block[stride*0 + xStride*0];
2000 b= block[stride*0 + xStride*1];
2001 c= block[stride*1 + xStride*0];
2002 d= block[stride*1 + xStride*1];
2009 block[stride*0 + xStride*0]= (a+c);
2010 block[stride*0 + xStride*1]= (e+b);
2011 block[stride*1 + xStride*0]= (a-c);
2012 block[stride*1 + xStride*1]= (e-b);
2017 * gets the chroma qp.
2019 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
2021 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
2026 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
2028 //FIXME try int temp instead of block
2031 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
2032 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
2033 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
2034 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
2035 const int z0= d0 + d3;
2036 const int z3= d0 - d3;
2037 const int z1= d1 + d2;
2038 const int z2= d1 - d2;
2040 block[0 + 4*i]= z0 + z1;
2041 block[1 + 4*i]= 2*z3 + z2;
2042 block[2 + 4*i]= z0 - z1;
2043 block[3 + 4*i]= z3 - 2*z2;
2047 const int z0= block[0*4 + i] + block[3*4 + i];
2048 const int z3= block[0*4 + i] - block[3*4 + i];
2049 const int z1= block[1*4 + i] + block[2*4 + i];
2050 const int z2= block[1*4 + i] - block[2*4 + i];
2052 block[0*4 + i]= z0 + z1;
2053 block[1*4 + i]= 2*z3 + z2;
2054 block[2*4 + i]= z0 - z1;
2055 block[3*4 + i]= z3 - 2*z2;
2060 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
2061 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
2062 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
2064 const int * const quant_table= quant_coeff[qscale];
2065 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
2066 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
2067 const unsigned int threshold2= (threshold1<<1);
2073 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
2074 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
2075 const unsigned int dc_threshold2= (dc_threshold1<<1);
2077 int level= block[0]*quant_coeff[qscale+18][0];
2078 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2080 level= (dc_bias + level)>>(QUANT_SHIFT-2);
2083 level= (dc_bias - level)>>(QUANT_SHIFT-2);
2086 // last_non_zero = i;
2091 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
2092 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
2093 const unsigned int dc_threshold2= (dc_threshold1<<1);
2095 int level= block[0]*quant_table[0];
2096 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2098 level= (dc_bias + level)>>(QUANT_SHIFT+1);
2101 level= (dc_bias - level)>>(QUANT_SHIFT+1);
2104 // last_non_zero = i;
2117 const int j= scantable[i];
2118 int level= block[j]*quant_table[j];
2120 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
2121 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
2122 if(((unsigned)(level+threshold1))>threshold2){
2124 level= (bias + level)>>QUANT_SHIFT;
2127 level= (bias - level)>>QUANT_SHIFT;
2136 return last_non_zero;
2139 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
2140 const uint32_t a= ((uint32_t*)(src-stride))[0];
2141 ((uint32_t*)(src+0*stride))[0]= a;
2142 ((uint32_t*)(src+1*stride))[0]= a;
2143 ((uint32_t*)(src+2*stride))[0]= a;
2144 ((uint32_t*)(src+3*stride))[0]= a;
2147 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
2148 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
2149 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
2150 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
2151 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
2154 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
2155 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
2156 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
2158 ((uint32_t*)(src+0*stride))[0]=
2159 ((uint32_t*)(src+1*stride))[0]=
2160 ((uint32_t*)(src+2*stride))[0]=
2161 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2164 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
2165 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
2167 ((uint32_t*)(src+0*stride))[0]=
2168 ((uint32_t*)(src+1*stride))[0]=
2169 ((uint32_t*)(src+2*stride))[0]=
2170 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2173 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
2174 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
2176 ((uint32_t*)(src+0*stride))[0]=
2177 ((uint32_t*)(src+1*stride))[0]=
2178 ((uint32_t*)(src+2*stride))[0]=
2179 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2182 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
2183 ((uint32_t*)(src+0*stride))[0]=
2184 ((uint32_t*)(src+1*stride))[0]=
2185 ((uint32_t*)(src+2*stride))[0]=
2186 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
2190 #define LOAD_TOP_RIGHT_EDGE\
2191 const int t4= topright[0];\
2192 const int t5= topright[1];\
2193 const int t6= topright[2];\
2194 const int t7= topright[3];\
2196 #define LOAD_LEFT_EDGE\
2197 const int l0= src[-1+0*stride];\
2198 const int l1= src[-1+1*stride];\
2199 const int l2= src[-1+2*stride];\
2200 const int l3= src[-1+3*stride];\
2202 #define LOAD_TOP_EDGE\
2203 const int t0= src[ 0-1*stride];\
2204 const int t1= src[ 1-1*stride];\
2205 const int t2= src[ 2-1*stride];\
2206 const int t3= src[ 3-1*stride];\
2208 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
2209 const int lt= src[-1-1*stride];
2213 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
2215 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
2218 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
2222 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2225 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
2227 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2228 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2231 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
2236 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
2238 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
2241 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
2245 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
2248 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
2250 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
2251 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
2254 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
2255 const int lt= src[-1-1*stride];
2258 const __attribute__((unused)) int unu= l3;
2261 src[1+2*stride]=(lt + t0 + 1)>>1;
2263 src[2+2*stride]=(t0 + t1 + 1)>>1;
2265 src[3+2*stride]=(t1 + t2 + 1)>>1;
2266 src[3+0*stride]=(t2 + t3 + 1)>>1;
2268 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2270 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2272 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2273 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2274 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2275 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2278 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2281 const __attribute__((unused)) int unu= t7;
2283 src[0+0*stride]=(t0 + t1 + 1)>>1;
2285 src[0+2*stride]=(t1 + t2 + 1)>>1;
2287 src[1+2*stride]=(t2 + t3 + 1)>>1;
2289 src[2+2*stride]=(t3 + t4+ 1)>>1;
2290 src[3+2*stride]=(t4 + t5+ 1)>>1;
2291 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2293 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2295 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2297 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2298 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2301 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2304 src[0+0*stride]=(l0 + l1 + 1)>>1;
2305 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2307 src[0+1*stride]=(l1 + l2 + 1)>>1;
2309 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2311 src[0+2*stride]=(l2 + l3 + 1)>>1;
2313 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2322 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2323 const int lt= src[-1-1*stride];
2326 const __attribute__((unused)) int unu= t3;
2329 src[2+1*stride]=(lt + l0 + 1)>>1;
2331 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2332 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2333 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2335 src[2+2*stride]=(l0 + l1 + 1)>>1;
2337 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2339 src[2+3*stride]=(l1 + l2+ 1)>>1;
2341 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2342 src[0+3*stride]=(l2 + l3 + 1)>>1;
2343 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2346 static void pred16x16_vertical_c(uint8_t *src, int stride){
2348 const uint32_t a= ((uint32_t*)(src-stride))[0];
2349 const uint32_t b= ((uint32_t*)(src-stride))[1];
2350 const uint32_t c= ((uint32_t*)(src-stride))[2];
2351 const uint32_t d= ((uint32_t*)(src-stride))[3];
2353 for(i=0; i<16; i++){
2354 ((uint32_t*)(src+i*stride))[0]= a;
2355 ((uint32_t*)(src+i*stride))[1]= b;
2356 ((uint32_t*)(src+i*stride))[2]= c;
2357 ((uint32_t*)(src+i*stride))[3]= d;
2361 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2364 for(i=0; i<16; i++){
2365 ((uint32_t*)(src+i*stride))[0]=
2366 ((uint32_t*)(src+i*stride))[1]=
2367 ((uint32_t*)(src+i*stride))[2]=
2368 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2372 static void pred16x16_dc_c(uint8_t *src, int stride){
2376 dc+= src[-1+i*stride];
2383 dc= 0x01010101*((dc + 16)>>5);
2385 for(i=0; i<16; i++){
2386 ((uint32_t*)(src+i*stride))[0]=
2387 ((uint32_t*)(src+i*stride))[1]=
2388 ((uint32_t*)(src+i*stride))[2]=
2389 ((uint32_t*)(src+i*stride))[3]= dc;
2393 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2397 dc+= src[-1+i*stride];
2400 dc= 0x01010101*((dc + 8)>>4);
2402 for(i=0; i<16; i++){
2403 ((uint32_t*)(src+i*stride))[0]=
2404 ((uint32_t*)(src+i*stride))[1]=
2405 ((uint32_t*)(src+i*stride))[2]=
2406 ((uint32_t*)(src+i*stride))[3]= dc;
2410 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2416 dc= 0x01010101*((dc + 8)>>4);
2418 for(i=0; i<16; i++){
2419 ((uint32_t*)(src+i*stride))[0]=
2420 ((uint32_t*)(src+i*stride))[1]=
2421 ((uint32_t*)(src+i*stride))[2]=
2422 ((uint32_t*)(src+i*stride))[3]= dc;
2426 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2429 for(i=0; i<16; i++){
2430 ((uint32_t*)(src+i*stride))[0]=
2431 ((uint32_t*)(src+i*stride))[1]=
2432 ((uint32_t*)(src+i*stride))[2]=
2433 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2437 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2440 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2441 const uint8_t * const src0 = src+7-stride;
2442 const uint8_t *src1 = src+8*stride-1;
2443 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2444 int H = src0[1] - src0[-1];
2445 int V = src1[0] - src2[ 0];
2446 for(k=2; k<=8; ++k) {
2447 src1 += stride; src2 -= stride;
2448 H += k*(src0[k] - src0[-k]);
2449 V += k*(src1[0] - src2[ 0]);
2452 H = ( 5*(H/4) ) / 16;
2453 V = ( 5*(V/4) ) / 16;
2455 /* required for 100% accuracy */
2456 i = H; H = V; V = i;
2458 H = ( 5*H+32 ) >> 6;
2459 V = ( 5*V+32 ) >> 6;
2462 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2463 for(j=16; j>0; --j) {
2466 for(i=-16; i<0; i+=4) {
2467 src[16+i] = cm[ (b ) >> 5 ];
2468 src[17+i] = cm[ (b+ H) >> 5 ];
2469 src[18+i] = cm[ (b+2*H) >> 5 ];
2470 src[19+i] = cm[ (b+3*H) >> 5 ];
2477 static void pred16x16_plane_c(uint8_t *src, int stride){
2478 pred16x16_plane_compat_c(src, stride, 0);
2481 static void pred8x8_vertical_c(uint8_t *src, int stride){
2483 const uint32_t a= ((uint32_t*)(src-stride))[0];
2484 const uint32_t b= ((uint32_t*)(src-stride))[1];
2487 ((uint32_t*)(src+i*stride))[0]= a;
2488 ((uint32_t*)(src+i*stride))[1]= b;
2492 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2496 ((uint32_t*)(src+i*stride))[0]=
2497 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2501 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2505 ((uint32_t*)(src+i*stride))[0]=
2506 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2510 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2516 dc0+= src[-1+i*stride];
2517 dc2+= src[-1+(i+4)*stride];
2519 dc0= 0x01010101*((dc0 + 2)>>2);
2520 dc2= 0x01010101*((dc2 + 2)>>2);
2523 ((uint32_t*)(src+i*stride))[0]=
2524 ((uint32_t*)(src+i*stride))[1]= dc0;
2527 ((uint32_t*)(src+i*stride))[0]=
2528 ((uint32_t*)(src+i*stride))[1]= dc2;
2532 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2538 dc0+= src[i-stride];
2539 dc1+= src[4+i-stride];
2541 dc0= 0x01010101*((dc0 + 2)>>2);
2542 dc1= 0x01010101*((dc1 + 2)>>2);
2545 ((uint32_t*)(src+i*stride))[0]= dc0;
2546 ((uint32_t*)(src+i*stride))[1]= dc1;
2549 ((uint32_t*)(src+i*stride))[0]= dc0;
2550 ((uint32_t*)(src+i*stride))[1]= dc1;
2555 static void pred8x8_dc_c(uint8_t *src, int stride){
2557 int dc0, dc1, dc2, dc3;
2561 dc0+= src[-1+i*stride] + src[i-stride];
2562 dc1+= src[4+i-stride];
2563 dc2+= src[-1+(i+4)*stride];
2565 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2566 dc0= 0x01010101*((dc0 + 4)>>3);
2567 dc1= 0x01010101*((dc1 + 2)>>2);
2568 dc2= 0x01010101*((dc2 + 2)>>2);
2571 ((uint32_t*)(src+i*stride))[0]= dc0;
2572 ((uint32_t*)(src+i*stride))[1]= dc1;
2575 ((uint32_t*)(src+i*stride))[0]= dc2;
2576 ((uint32_t*)(src+i*stride))[1]= dc3;
2580 static void pred8x8_plane_c(uint8_t *src, int stride){
2583 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2584 const uint8_t * const src0 = src+3-stride;
2585 const uint8_t *src1 = src+4*stride-1;
2586 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2587 int H = src0[1] - src0[-1];
2588 int V = src1[0] - src2[ 0];
2589 for(k=2; k<=4; ++k) {
2590 src1 += stride; src2 -= stride;
2591 H += k*(src0[k] - src0[-k]);
2592 V += k*(src1[0] - src2[ 0]);
2594 H = ( 17*H+16 ) >> 5;
2595 V = ( 17*V+16 ) >> 5;
2597 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2598 for(j=8; j>0; --j) {
2601 src[0] = cm[ (b ) >> 5 ];
2602 src[1] = cm[ (b+ H) >> 5 ];
2603 src[2] = cm[ (b+2*H) >> 5 ];
2604 src[3] = cm[ (b+3*H) >> 5 ];
2605 src[4] = cm[ (b+4*H) >> 5 ];
2606 src[5] = cm[ (b+5*H) >> 5 ];
2607 src[6] = cm[ (b+6*H) >> 5 ];
2608 src[7] = cm[ (b+7*H) >> 5 ];
2613 #define SRC(x,y) src[(x)+(y)*stride]
2615 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2616 #define PREDICT_8x8_LOAD_LEFT \
2617 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2618 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2619 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2620 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2623 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2624 #define PREDICT_8x8_LOAD_TOP \
2625 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2626 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2627 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2628 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2629 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2632 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2633 #define PREDICT_8x8_LOAD_TOPRIGHT \
2634 int t8, t9, t10, t11, t12, t13, t14, t15; \
2635 if(has_topright) { \
2636 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2637 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2638 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2640 #define PREDICT_8x8_LOAD_TOPLEFT \
2641 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2643 #define PREDICT_8x8_DC(v) \
2645 for( y = 0; y < 8; y++ ) { \
2646 ((uint32_t*)src)[0] = \
2647 ((uint32_t*)src)[1] = v; \
2651 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2653 PREDICT_8x8_DC(0x80808080);
2655 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2657 PREDICT_8x8_LOAD_LEFT;
2658 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2661 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2663 PREDICT_8x8_LOAD_TOP;
2664 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2667 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2669 PREDICT_8x8_LOAD_LEFT;
2670 PREDICT_8x8_LOAD_TOP;
2671 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2672 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2675 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2677 PREDICT_8x8_LOAD_LEFT;
2678 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2679 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2680 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2683 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2686 PREDICT_8x8_LOAD_TOP;
2695 for( y = 1; y < 8; y++ )
2696 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2698 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2700 PREDICT_8x8_LOAD_TOP;
2701 PREDICT_8x8_LOAD_TOPRIGHT;
2702 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2703 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2704 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2705 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2706 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2707 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2708 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2709 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2710 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2711 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2712 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2713 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2714 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2715 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2716 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2718 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2720 PREDICT_8x8_LOAD_TOP;
2721 PREDICT_8x8_LOAD_LEFT;
2722 PREDICT_8x8_LOAD_TOPLEFT;
2723 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2724 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2725 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2726 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2727 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2728 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2729 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2730 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2731 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2732 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2733 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2734 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2735 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2736 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2737 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2740 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2742 PREDICT_8x8_LOAD_TOP;
2743 PREDICT_8x8_LOAD_LEFT;
2744 PREDICT_8x8_LOAD_TOPLEFT;
2745 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2746 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2747 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2748 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2749 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2750 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2751 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2752 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2753 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2754 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2755 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2756 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2757 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2758 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2759 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2760 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2761 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2762 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2763 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2764 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2765 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2766 SRC(7,0)= (t6 + t7 + 1) >> 1;
2768 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2770 PREDICT_8x8_LOAD_TOP;
2771 PREDICT_8x8_LOAD_LEFT;
2772 PREDICT_8x8_LOAD_TOPLEFT;
2773 SRC(0,7)= (l6 + l7 + 1) >> 1;
2774 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2775 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2776 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2777 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2778 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2779 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2780 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2781 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2782 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2783 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2784 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2785 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2786 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2787 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2788 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2789 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2790 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2791 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2792 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2793 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2794 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2796 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2798 PREDICT_8x8_LOAD_TOP;
2799 PREDICT_8x8_LOAD_TOPRIGHT;
2800 SRC(0,0)= (t0 + t1 + 1) >> 1;
2801 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2802 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2803 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2804 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2805 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2806 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2807 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2808 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2809 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2810 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2811 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2812 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2813 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2814 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2815 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2816 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2817 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2818 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2819 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2820 SRC(7,6)= (t10 + t11 + 1) >> 1;
2821 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2823 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2825 PREDICT_8x8_LOAD_LEFT;
2826 SRC(0,0)= (l0 + l1 + 1) >> 1;
2827 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2828 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2829 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2830 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2831 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2832 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2833 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2834 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2835 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2836 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2837 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2838 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2839 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2840 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2841 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2842 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2843 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2845 #undef PREDICT_8x8_LOAD_LEFT
2846 #undef PREDICT_8x8_LOAD_TOP
2847 #undef PREDICT_8x8_LOAD_TOPLEFT
2848 #undef PREDICT_8x8_LOAD_TOPRIGHT
2849 #undef PREDICT_8x8_DC
2855 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2856 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2857 int src_x_offset, int src_y_offset,
2858 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2859 MpegEncContext * const s = &h->s;
2860 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2861 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2862 const int luma_xy= (mx&3) + ((my&3)<<2);
2863 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2864 uint8_t * src_cb, * src_cr;
2865 int extra_width= h->emu_edge_width;
2866 int extra_height= h->emu_edge_height;
2868 const int full_mx= mx>>2;
2869 const int full_my= my>>2;
2870 const int pic_width = 16*s->mb_width;
2871 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2876 if(mx&7) extra_width -= 3;
2877 if(my&7) extra_height -= 3;
2879 if( full_mx < 0-extra_width
2880 || full_my < 0-extra_height
2881 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2882 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2883 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2884 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2888 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2890 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2893 if(s->flags&CODEC_FLAG_GRAY) return;
2896 // chroma offset when predicting from a field of opposite parity
2897 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2898 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2900 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2901 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2904 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2905 src_cb= s->edge_emu_buffer;
2907 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2910 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2911 src_cr= s->edge_emu_buffer;
2913 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2916 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2917 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2918 int x_offset, int y_offset,
2919 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2920 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2921 int list0, int list1){
2922 MpegEncContext * const s = &h->s;
2923 qpel_mc_func *qpix_op= qpix_put;
2924 h264_chroma_mc_func chroma_op= chroma_put;
2926 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2927 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2928 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2929 x_offset += 8*s->mb_x;
2930 y_offset += 8*(s->mb_y >> MB_MBAFF);
2933 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2934 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2935 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2936 qpix_op, chroma_op);
2939 chroma_op= chroma_avg;
2943 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2944 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2945 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2946 qpix_op, chroma_op);
2950 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2951 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2952 int x_offset, int y_offset,
2953 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2954 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2955 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2956 int list0, int list1){
2957 MpegEncContext * const s = &h->s;
2959 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2960 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2961 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2962 x_offset += 8*s->mb_x;
2963 y_offset += 8*(s->mb_y >> MB_MBAFF);
2966 /* don't optimize for luma-only case, since B-frames usually
2967 * use implicit weights => chroma too. */
2968 uint8_t *tmp_cb = s->obmc_scratchpad;
2969 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2970 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2971 int refn0 = h->ref_cache[0][ scan8[n] ];
2972 int refn1 = h->ref_cache[1][ scan8[n] ];
2974 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2975 dest_y, dest_cb, dest_cr,
2976 x_offset, y_offset, qpix_put, chroma_put);
2977 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2978 tmp_y, tmp_cb, tmp_cr,
2979 x_offset, y_offset, qpix_put, chroma_put);
2981 if(h->use_weight == 2){
2982 int weight0 = h->implicit_weight[refn0][refn1];
2983 int weight1 = 64 - weight0;
2984 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2985 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
2986 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
2988 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
2989 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2990 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
2991 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2992 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2993 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
2994 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2995 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2996 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
2999 int list = list1 ? 1 : 0;
3000 int refn = h->ref_cache[list][ scan8[n] ];
3001 Picture *ref= &h->ref_list[list][refn];
3002 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
3003 dest_y, dest_cb, dest_cr, x_offset, y_offset,
3004 qpix_put, chroma_put);
3006 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
3007 h->luma_weight[list][refn], h->luma_offset[list][refn]);
3008 if(h->use_weight_chroma){
3009 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3010 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
3011 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3012 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
3017 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
3018 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3019 int x_offset, int y_offset,
3020 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
3021 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
3022 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
3023 int list0, int list1){
3024 if((h->use_weight==2 && list0 && list1
3025 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
3026 || h->use_weight==1)
3027 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3028 x_offset, y_offset, qpix_put, chroma_put,
3029 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
3031 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3032 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
3035 static inline void prefetch_motion(H264Context *h, int list){
3036 /* fetch pixels for estimated mv 4 macroblocks ahead
3037 * optimized for 64byte cache lines */
3038 MpegEncContext * const s = &h->s;
3039 const int refn = h->ref_cache[list][scan8[0]];
3041 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
3042 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
3043 uint8_t **src= h->ref_list[list][refn].data;
3044 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
3045 s->dsp.prefetch(src[0]+off, s->linesize, 4);
3046 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3047 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
3051 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3052 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
3053 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
3054 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
3055 MpegEncContext * const s = &h->s;
3056 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
3057 const int mb_type= s->current_picture.mb_type[mb_xy];
3059 assert(IS_INTER(mb_type));
3061 prefetch_motion(h, 0);
3063 if(IS_16X16(mb_type)){
3064 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
3065 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
3066 &weight_op[0], &weight_avg[0],
3067 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3068 }else if(IS_16X8(mb_type)){
3069 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
3070 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3071 &weight_op[1], &weight_avg[1],
3072 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3073 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
3074 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3075 &weight_op[1], &weight_avg[1],
3076 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3077 }else if(IS_8X16(mb_type)){
3078 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
3079 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3080 &weight_op[2], &weight_avg[2],
3081 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3082 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
3083 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3084 &weight_op[2], &weight_avg[2],
3085 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3089 assert(IS_8X8(mb_type));
3092 const int sub_mb_type= h->sub_mb_type[i];
3094 int x_offset= (i&1)<<2;
3095 int y_offset= (i&2)<<1;
3097 if(IS_SUB_8X8(sub_mb_type)){
3098 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3099 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3100 &weight_op[3], &weight_avg[3],
3101 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3102 }else if(IS_SUB_8X4(sub_mb_type)){
3103 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3104 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3105 &weight_op[4], &weight_avg[4],
3106 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3107 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
3108 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3109 &weight_op[4], &weight_avg[4],
3110 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3111 }else if(IS_SUB_4X8(sub_mb_type)){
3112 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3113 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3114 &weight_op[5], &weight_avg[5],
3115 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3116 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
3117 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3118 &weight_op[5], &weight_avg[5],
3119 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3122 assert(IS_SUB_4X4(sub_mb_type));
3124 int sub_x_offset= x_offset + 2*(j&1);
3125 int sub_y_offset= y_offset + (j&2);
3126 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
3127 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3128 &weight_op[6], &weight_avg[6],
3129 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3135 prefetch_motion(h, 1);
3138 static void decode_init_vlc(){
3139 static int done = 0;
3145 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
3146 &chroma_dc_coeff_token_len [0], 1, 1,
3147 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
3150 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
3151 &coeff_token_len [i][0], 1, 1,
3152 &coeff_token_bits[i][0], 1, 1, 1);
3156 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
3157 &chroma_dc_total_zeros_len [i][0], 1, 1,
3158 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
3160 for(i=0; i<15; i++){
3161 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
3162 &total_zeros_len [i][0], 1, 1,
3163 &total_zeros_bits[i][0], 1, 1, 1);
3167 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
3168 &run_len [i][0], 1, 1,
3169 &run_bits[i][0], 1, 1, 1);
3171 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
3172 &run_len [6][0], 1, 1,
3173 &run_bits[6][0], 1, 1, 1);
3178 * Sets the intra prediction function pointers.
3180 static void init_pred_ptrs(H264Context *h){
3181 // MpegEncContext * const s = &h->s;
3183 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
3184 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
3185 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
3186 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
3187 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
3188 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
3189 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
3190 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
3191 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
3192 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
3193 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
3194 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
3196 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
3197 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
3198 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
3199 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
3200 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
3201 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
3202 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
3203 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
3204 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
3205 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
3206 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
3207 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
3209 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
3210 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
3211 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
3212 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
3213 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
3214 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
3215 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
3217 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
3218 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
3219 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
3220 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
3221 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
3222 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
3223 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
3226 static void free_tables(H264Context *h){
3227 av_freep(&h->intra4x4_pred_mode);
3228 av_freep(&h->chroma_pred_mode_table);
3229 av_freep(&h->cbp_table);
3230 av_freep(&h->mvd_table[0]);
3231 av_freep(&h->mvd_table[1]);
3232 av_freep(&h->direct_table);
3233 av_freep(&h->non_zero_count);
3234 av_freep(&h->slice_table_base);
3235 av_freep(&h->top_borders[1]);
3236 av_freep(&h->top_borders[0]);
3237 h->slice_table= NULL;
3239 av_freep(&h->mb2b_xy);
3240 av_freep(&h->mb2b8_xy);
3242 av_freep(&h->s.obmc_scratchpad);
3245 static void init_dequant8_coeff_table(H264Context *h){
3247 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
3248 h->dequant8_coeff[0] = h->dequant8_buffer[0];
3249 h->dequant8_coeff[1] = h->dequant8_buffer[1];
3251 for(i=0; i<2; i++ ){
3252 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
3253 h->dequant8_coeff[1] = h->dequant8_buffer[0];
3257 for(q=0; q<52; q++){
3258 int shift = div6[q];
3261 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
3262 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
3263 h->pps.scaling_matrix8[i][x]) << shift;
3268 static void init_dequant4_coeff_table(H264Context *h){
3270 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
3271 for(i=0; i<6; i++ ){
3272 h->dequant4_coeff[i] = h->dequant4_buffer[i];
3274 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
3275 h->dequant4_coeff[i] = h->dequant4_buffer[j];
3282 for(q=0; q<52; q++){
3283 int shift = div6[q] + 2;
3286 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
3287 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
3288 h->pps.scaling_matrix4[i][x]) << shift;
3293 static void init_dequant_tables(H264Context *h){
3295 init_dequant4_coeff_table(h);
3296 if(h->pps.transform_8x8_mode)
3297 init_dequant8_coeff_table(h);
3298 if(h->sps.transform_bypass){
3301 h->dequant4_coeff[i][0][x] = 1<<6;
3302 if(h->pps.transform_8x8_mode)
3305 h->dequant8_coeff[i][0][x] = 1<<6;
3312 * needs width/height
3314 static int alloc_tables(H264Context *h){
3315 MpegEncContext * const s = &h->s;
3316 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3319 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3321 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3322 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
3323 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3324 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3325 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3327 if( h->pps.cabac ) {
3328 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3329 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3330 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3331 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3334 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
3335 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
3337 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3338 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3339 for(y=0; y<s->mb_height; y++){
3340 for(x=0; x<s->mb_width; x++){
3341 const int mb_xy= x + y*s->mb_stride;
3342 const int b_xy = 4*x + 4*y*h->b_stride;
3343 const int b8_xy= 2*x + 2*y*h->b8_stride;
3345 h->mb2b_xy [mb_xy]= b_xy;
3346 h->mb2b8_xy[mb_xy]= b8_xy;
3350 s->obmc_scratchpad = NULL;
3352 if(!h->dequant4_coeff[0])
3353 init_dequant_tables(h);
3361 static void common_init(H264Context *h){
3362 MpegEncContext * const s = &h->s;
3364 s->width = s->avctx->width;
3365 s->height = s->avctx->height;
3366 s->codec_id= s->avctx->codec->id;
3370 h->dequant_coeff_pps= -1;
3371 s->unrestricted_mv=1;
3372 s->decode=1; //FIXME
3374 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3375 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3378 static int decode_init(AVCodecContext *avctx){
3379 H264Context *h= avctx->priv_data;
3380 MpegEncContext * const s = &h->s;
3382 MPV_decode_defaults(s);
3387 s->out_format = FMT_H264;
3388 s->workaround_bugs= avctx->workaround_bugs;
3391 // s->decode_mb= ff_h263_decode_mb;
3393 avctx->pix_fmt= PIX_FMT_YUV420P;
3397 if(avctx->extradata_size > 0 && avctx->extradata &&
3398 *(char *)avctx->extradata == 1){
3408 static int frame_start(H264Context *h){
3409 MpegEncContext * const s = &h->s;
3412 if(MPV_frame_start(s, s->avctx) < 0)
3414 ff_er_frame_start(s);
3416 assert(s->linesize && s->uvlinesize);
3418 for(i=0; i<16; i++){
3419 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3420 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3423 h->block_offset[16+i]=
3424 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3425 h->block_offset[24+16+i]=
3426 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3429 /* can't be in alloc_tables because linesize isn't known there.
3430 * FIXME: redo bipred weight to not require extra buffer? */
3431 if(!s->obmc_scratchpad)
3432 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
3434 /* some macroblocks will be accessed before they're available */
3436 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
3438 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3442 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3443 MpegEncContext * const s = &h->s;
3447 src_cb -= uvlinesize;
3448 src_cr -= uvlinesize;
3450 // There are two lines saved, the line above the the top macroblock of a pair,
3451 // and the line above the bottom macroblock
3452 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3453 for(i=1; i<17; i++){
3454 h->left_border[i]= src_y[15+i* linesize];
3457 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3458 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3460 if(!(s->flags&CODEC_FLAG_GRAY)){
3461 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3462 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3464 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3465 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3467 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3468 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3472 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3473 MpegEncContext * const s = &h->s;
3476 int deblock_left = (s->mb_x > 0);
3477 int deblock_top = (s->mb_y > 0);
3479 src_y -= linesize + 1;
3480 src_cb -= uvlinesize + 1;
3481 src_cr -= uvlinesize + 1;
3483 #define XCHG(a,b,t,xchg)\
3490 for(i = !deblock_top; i<17; i++){
3491 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3496 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3497 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3498 if(s->mb_x+1 < s->mb_width){
3499 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3503 if(!(s->flags&CODEC_FLAG_GRAY)){
3505 for(i = !deblock_top; i<9; i++){
3506 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3507 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3511 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3512 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3517 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3518 MpegEncContext * const s = &h->s;
3521 src_y -= 2 * linesize;
3522 src_cb -= 2 * uvlinesize;
3523 src_cr -= 2 * uvlinesize;
3525 // There are two lines saved, the line above the the top macroblock of a pair,
3526 // and the line above the bottom macroblock
3527 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3528 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3529 for(i=2; i<34; i++){
3530 h->left_border[i]= src_y[15+i* linesize];
3533 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3534 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3535 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3536 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3538 if(!(s->flags&CODEC_FLAG_GRAY)){
3539 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3540 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3541 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3542 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3543 for(i=2; i<18; i++){
3544 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3545 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3547 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3548 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3549 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3550 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3554 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3555 MpegEncContext * const s = &h->s;
3558 int deblock_left = (s->mb_x > 0);
3559 int deblock_top = (s->mb_y > 1);
3561 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3563 src_y -= 2 * linesize + 1;
3564 src_cb -= 2 * uvlinesize + 1;
3565 src_cr -= 2 * uvlinesize + 1;
3567 #define XCHG(a,b,t,xchg)\
3574 for(i = (!deblock_top)<<1; i<34; i++){
3575 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3580 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3581 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3582 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3583 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3584 if(s->mb_x+1 < s->mb_width){
3585 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3586 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3590 if(!(s->flags&CODEC_FLAG_GRAY)){
3592 for(i = (!deblock_top) << 1; i<18; i++){
3593 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3594 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3598 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3599 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3600 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3601 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3606 static void hl_decode_mb(H264Context *h){
3607 MpegEncContext * const s = &h->s;
3608 const int mb_x= s->mb_x;
3609 const int mb_y= s->mb_y;
3610 const int mb_xy= mb_x + mb_y*s->mb_stride;
3611 const int mb_type= s->current_picture.mb_type[mb_xy];
3612 uint8_t *dest_y, *dest_cb, *dest_cr;
3613 int linesize, uvlinesize /*dct_offset*/;
3615 int *block_offset = &h->block_offset[0];
3616 const unsigned int bottom = mb_y & 1;
3617 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3618 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3619 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3624 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3625 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3626 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3628 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
3629 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
3632 linesize = h->mb_linesize = s->linesize * 2;
3633 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3634 block_offset = &h->block_offset[24];
3635 if(mb_y&1){ //FIXME move out of this func?
3636 dest_y -= s->linesize*15;
3637 dest_cb-= s->uvlinesize*7;
3638 dest_cr-= s->uvlinesize*7;
3642 for(list=0; list<2; list++){
3643 if(!USES_LIST(mb_type, list))
3645 if(IS_16X16(mb_type)){
3646 int8_t *ref = &h->ref_cache[list][scan8[0]];
3647 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3649 for(i=0; i<16; i+=4){
3650 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3651 int ref = h->ref_cache[list][scan8[i]];
3653 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3659 linesize = h->mb_linesize = s->linesize;
3660 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3661 // dct_offset = s->linesize * 16;
3664 if(transform_bypass){
3666 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3667 }else if(IS_8x8DCT(mb_type)){
3668 idct_dc_add = s->dsp.h264_idct8_dc_add;
3669 idct_add = s->dsp.h264_idct8_add;
3671 idct_dc_add = s->dsp.h264_idct_dc_add;
3672 idct_add = s->dsp.h264_idct_add;
3675 if(FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3676 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3677 int mbt_y = mb_y&~1;
3678 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3679 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3680 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3681 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3684 if (IS_INTRA_PCM(mb_type)) {
3687 // The pixels are stored in h->mb array in the same order as levels,
3688 // copy them in output in the correct order.
3689 for(i=0; i<16; i++) {
3690 for (y=0; y<4; y++) {
3691 for (x=0; x<4; x++) {
3692 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3696 for(i=16; i<16+4; i++) {
3697 for (y=0; y<4; y++) {
3698 for (x=0; x<4; x++) {
3699 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3703 for(i=20; i<20+4; i++) {
3704 for (y=0; y<4; y++) {
3705 for (x=0; x<4; x++) {
3706 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3711 if(IS_INTRA(mb_type)){
3712 if(h->deblocking_filter && !FRAME_MBAFF)
3713 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3715 if(!(s->flags&CODEC_FLAG_GRAY)){
3716 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3717 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3720 if(IS_INTRA4x4(mb_type)){
3722 if(IS_8x8DCT(mb_type)){
3723 for(i=0; i<16; i+=4){
3724 uint8_t * const ptr= dest_y + block_offset[i];
3725 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3726 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3727 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3728 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3730 if(nnz == 1 && h->mb[i*16])
3731 idct_dc_add(ptr, h->mb + i*16, linesize);
3733 idct_add(ptr, h->mb + i*16, linesize);
3737 for(i=0; i<16; i++){
3738 uint8_t * const ptr= dest_y + block_offset[i];
3740 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3743 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3744 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3745 assert(mb_y || linesize <= block_offset[i]);
3746 if(!topright_avail){
3747 tr= ptr[3 - linesize]*0x01010101;
3748 topright= (uint8_t*) &tr;
3750 topright= ptr + 4 - linesize;
3754 h->pred4x4[ dir ](ptr, topright, linesize);
3755 nnz = h->non_zero_count_cache[ scan8[i] ];
3757 if(s->codec_id == CODEC_ID_H264){
3758 if(nnz == 1 && h->mb[i*16])
3759 idct_dc_add(ptr, h->mb + i*16, linesize);
3761 idct_add(ptr, h->mb + i*16, linesize);
3763 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3768 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3769 if(s->codec_id == CODEC_ID_H264){
3770 if(!transform_bypass)
3771 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3773 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3775 if(h->deblocking_filter && !FRAME_MBAFF)
3776 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3777 }else if(s->codec_id == CODEC_ID_H264){
3778 hl_motion(h, dest_y, dest_cb, dest_cr,
3779 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
3780 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
3781 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3785 if(!IS_INTRA4x4(mb_type)){
3786 if(s->codec_id == CODEC_ID_H264){
3787 if(IS_INTRA16x16(mb_type)){
3788 for(i=0; i<16; i++){
3789 if(h->non_zero_count_cache[ scan8[i] ])
3790 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3791 else if(h->mb[i*16])
3792 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3795 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3796 for(i=0; i<16; i+=di){
3797 int nnz = h->non_zero_count_cache[ scan8[i] ];
3799 if(nnz==1 && h->mb[i*16])
3800 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3802 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3807 for(i=0; i<16; i++){
3808 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3809 uint8_t * const ptr= dest_y + block_offset[i];
3810 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3816 if(!(s->flags&CODEC_FLAG_GRAY)){
3817 uint8_t *dest[2] = {dest_cb, dest_cr};
3818 if(transform_bypass){
3819 idct_add = idct_dc_add = s->dsp.add_pixels4;
3821 idct_add = s->dsp.h264_idct_add;
3822 idct_dc_add = s->dsp.h264_idct_dc_add;
3823 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3824 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3826 if(s->codec_id == CODEC_ID_H264){
3827 for(i=16; i<16+8; i++){
3828 if(h->non_zero_count_cache[ scan8[i] ])
3829 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3830 else if(h->mb[i*16])
3831 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3834 for(i=16; i<16+8; i++){
3835 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3836 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3837 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3843 if(h->deblocking_filter) {
3845 //FIXME try deblocking one mb at a time?
3846 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3847 const int mb_y = s->mb_y - 1;
3848 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3849 const int mb_xy= mb_x + mb_y*s->mb_stride;
3850 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3851 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3852 if (!bottom) return;
3853 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3854 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3855 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3857 if(IS_INTRA(mb_type_top | mb_type_bottom))
3858 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3860 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3864 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3865 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3866 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
3867 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3870 tprintf("call mbaff filter_mb\n");
3871 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3872 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3873 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3875 tprintf("call filter_mb\n");
3876 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3877 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3878 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3884 * fills the default_ref_list.
3886 static int fill_default_ref_list(H264Context *h){
3887 MpegEncContext * const s = &h->s;
3889 int smallest_poc_greater_than_current = -1;
3890 Picture sorted_short_ref[32];
3892 if(h->slice_type==B_TYPE){
3896 /* sort frame according to poc in B slice */
3897 for(out_i=0; out_i<h->short_ref_count; out_i++){
3899 int best_poc=INT_MAX;
3901 for(i=0; i<h->short_ref_count; i++){
3902 const int poc= h->short_ref[i]->poc;
3903 if(poc > limit && poc < best_poc){
3909 assert(best_i != INT_MIN);
3912 sorted_short_ref[out_i]= *h->short_ref[best_i];
3913 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3914 if (-1 == smallest_poc_greater_than_current) {
3915 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3916 smallest_poc_greater_than_current = out_i;
3922 if(s->picture_structure == PICT_FRAME){
3923 if(h->slice_type==B_TYPE){
3925 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3927 // find the largest poc
3928 for(list=0; list<2; list++){
3931 int step= list ? -1 : 1;
3933 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3934 while(j<0 || j>= h->short_ref_count){
3935 if(j != -99 && step == (list ? -1 : 1))
3938 j= smallest_poc_greater_than_current + (step>>1);
3940 if(sorted_short_ref[j].reference != 3) continue;
3941 h->default_ref_list[list][index ]= sorted_short_ref[j];
3942 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3945 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3946 if(h->long_ref[i] == NULL) continue;
3947 if(h->long_ref[i]->reference != 3) continue;
3949 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3950 h->default_ref_list[ list ][index++].pic_id= i;;
3953 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3954 // swap the two first elements of L1 when
3955 // L0 and L1 are identical
3956 Picture temp= h->default_ref_list[1][0];
3957 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3958 h->default_ref_list[1][1] = temp;
3961 if(index < h->ref_count[ list ])
3962 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3966 for(i=0; i<h->short_ref_count; i++){
3967 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3968 h->default_ref_list[0][index ]= *h->short_ref[i];
3969 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3971 for(i = 0; i < 16; i++){
3972 if(h->long_ref[i] == NULL) continue;
3973 if(h->long_ref[i]->reference != 3) continue;
3974 h->default_ref_list[0][index ]= *h->long_ref[i];
3975 h->default_ref_list[0][index++].pic_id= i;;
3977 if(index < h->ref_count[0])
3978 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3981 if(h->slice_type==B_TYPE){
3983 //FIXME second field balh
3987 for (i=0; i<h->ref_count[0]; i++) {
3988 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3990 if(h->slice_type==B_TYPE){
3991 for (i=0; i<h->ref_count[1]; i++) {
3992 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3999 static void print_short_term(H264Context *h);
4000 static void print_long_term(H264Context *h);
4002 static int decode_ref_pic_list_reordering(H264Context *h){
4003 MpegEncContext * const s = &h->s;
4006 print_short_term(h);
4008 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
4010 for(list=0; list<2; list++){
4011 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
4013 if(get_bits1(&s->gb)){
4014 int pred= h->curr_pic_num;
4016 for(index=0; ; index++){
4017 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
4020 Picture *ref = NULL;
4022 if(reordering_of_pic_nums_idc==3)
4025 if(index >= h->ref_count[list]){
4026 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
4030 if(reordering_of_pic_nums_idc<3){
4031 if(reordering_of_pic_nums_idc<2){
4032 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
4034 if(abs_diff_pic_num >= h->max_pic_num){
4035 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
4039 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
4040 else pred+= abs_diff_pic_num;
4041 pred &= h->max_pic_num - 1;
4043 for(i= h->short_ref_count-1; i>=0; i--){
4044 ref = h->short_ref[i];
4045 assert(ref->reference == 3);
4046 assert(!ref->long_ref);
4047 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
4051 ref->pic_id= ref->frame_num;
4053 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
4054 ref = h->long_ref[pic_id];
4055 ref->pic_id= pic_id;
4056 assert(ref->reference == 3);
4057 assert(ref->long_ref);
4062 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
4063 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
4065 for(i=index; i+1<h->ref_count[list]; i++){
4066 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
4069 for(; i > index; i--){
4070 h->ref_list[list][i]= h->ref_list[list][i-1];
4072 h->ref_list[list][index]= *ref;
4075 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
4081 if(h->slice_type!=B_TYPE) break;
4083 for(list=0; list<2; list++){
4084 for(index= 0; index < h->ref_count[list]; index++){
4085 if(!h->ref_list[list][index].data[0])
4086 h->ref_list[list][index]= s->current_picture;
4088 if(h->slice_type!=B_TYPE) break;
4091 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
4092 direct_dist_scale_factor(h);
4093 direct_ref_list_init(h);
4097 static void fill_mbaff_ref_list(H264Context *h){
4099 for(list=0; list<2; list++){
4100 for(i=0; i<h->ref_count[list]; i++){
4101 Picture *frame = &h->ref_list[list][i];
4102 Picture *field = &h->ref_list[list][16+2*i];
4105 field[0].linesize[j] <<= 1;
4106 field[1] = field[0];
4108 field[1].data[j] += frame->linesize[j];
4110 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
4111 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
4113 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
4114 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
4118 for(j=0; j<h->ref_count[1]; j++){
4119 for(i=0; i<h->ref_count[0]; i++)
4120 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
4121 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
4122 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
4126 static int pred_weight_table(H264Context *h){
4127 MpegEncContext * const s = &h->s;
4129 int luma_def, chroma_def;
4132 h->use_weight_chroma= 0;
4133 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
4134 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
4135 luma_def = 1<<h->luma_log2_weight_denom;
4136 chroma_def = 1<<h->chroma_log2_weight_denom;
4138 for(list=0; list<2; list++){
4139 for(i=0; i<h->ref_count[list]; i++){
4140 int luma_weight_flag, chroma_weight_flag;
4142 luma_weight_flag= get_bits1(&s->gb);
4143 if(luma_weight_flag){
4144 h->luma_weight[list][i]= get_se_golomb(&s->gb);
4145 h->luma_offset[list][i]= get_se_golomb(&s->gb);
4146 if( h->luma_weight[list][i] != luma_def
4147 || h->luma_offset[list][i] != 0)
4150 h->luma_weight[list][i]= luma_def;
4151 h->luma_offset[list][i]= 0;
4154 chroma_weight_flag= get_bits1(&s->gb);
4155 if(chroma_weight_flag){
4158 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
4159 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
4160 if( h->chroma_weight[list][i][j] != chroma_def
4161 || h->chroma_offset[list][i][j] != 0)
4162 h->use_weight_chroma= 1;
4167 h->chroma_weight[list][i][j]= chroma_def;
4168 h->chroma_offset[list][i][j]= 0;
4172 if(h->slice_type != B_TYPE) break;
4174 h->use_weight= h->use_weight || h->use_weight_chroma;
4178 static void implicit_weight_table(H264Context *h){
4179 MpegEncContext * const s = &h->s;
4181 int cur_poc = s->current_picture_ptr->poc;
4183 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
4184 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
4186 h->use_weight_chroma= 0;
4191 h->use_weight_chroma= 2;
4192 h->luma_log2_weight_denom= 5;
4193 h->chroma_log2_weight_denom= 5;
4195 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
4196 int poc0 = h->ref_list[0][ref0].poc;
4197 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
4198 int poc1 = h->ref_list[1][ref1].poc;
4199 int td = clip(poc1 - poc0, -128, 127);
4201 int tb = clip(cur_poc - poc0, -128, 127);
4202 int tx = (16384 + (FFABS(td) >> 1)) / td;
4203 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
4204 if(dist_scale_factor < -64 || dist_scale_factor > 128)
4205 h->implicit_weight[ref0][ref1] = 32;
4207 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
4209 h->implicit_weight[ref0][ref1] = 32;
4214 static inline void unreference_pic(H264Context *h, Picture *pic){
4217 if(pic == h->delayed_output_pic)
4220 for(i = 0; h->delayed_pic[i]; i++)
4221 if(pic == h->delayed_pic[i]){
4229 * instantaneous decoder refresh.
4231 static void idr(H264Context *h){
4234 for(i=0; i<16; i++){
4235 if (h->long_ref[i] != NULL) {
4236 unreference_pic(h, h->long_ref[i]);
4237 h->long_ref[i]= NULL;
4240 h->long_ref_count=0;
4242 for(i=0; i<h->short_ref_count; i++){
4243 unreference_pic(h, h->short_ref[i]);
4244 h->short_ref[i]= NULL;
4246 h->short_ref_count=0;
4249 /* forget old pics after a seek */
4250 static void flush_dpb(AVCodecContext *avctx){
4251 H264Context *h= avctx->priv_data;
4253 for(i=0; i<16; i++) {
4254 if(h->delayed_pic[i])
4255 h->delayed_pic[i]->reference= 0;
4256 h->delayed_pic[i]= NULL;
4258 if(h->delayed_output_pic)
4259 h->delayed_output_pic->reference= 0;
4260 h->delayed_output_pic= NULL;
4262 if(h->s.current_picture_ptr)
4263 h->s.current_picture_ptr->reference= 0;
4268 * @return the removed picture or NULL if an error occurs
4270 static Picture * remove_short(H264Context *h, int frame_num){
4271 MpegEncContext * const s = &h->s;
4274 if(s->avctx->debug&FF_DEBUG_MMCO)
4275 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
4277 for(i=0; i<h->short_ref_count; i++){
4278 Picture *pic= h->short_ref[i];
4279 if(s->avctx->debug&FF_DEBUG_MMCO)
4280 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
4281 if(pic->frame_num == frame_num){
4282 h->short_ref[i]= NULL;
4283 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
4284 h->short_ref_count--;
4293 * @return the removed picture or NULL if an error occurs
4295 static Picture * remove_long(H264Context *h, int i){
4298 pic= h->long_ref[i];
4299 h->long_ref[i]= NULL;
4300 if(pic) h->long_ref_count--;
4306 * print short term list
4308 static void print_short_term(H264Context *h) {
4310 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4311 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
4312 for(i=0; i<h->short_ref_count; i++){
4313 Picture *pic= h->short_ref[i];
4314 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4320 * print long term list
4322 static void print_long_term(H264Context *h) {
4324 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4325 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
4326 for(i = 0; i < 16; i++){
4327 Picture *pic= h->long_ref[i];
4329 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4336 * Executes the reference picture marking (memory management control operations).
4338 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
4339 MpegEncContext * const s = &h->s;
4341 int current_is_long=0;
4344 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
4345 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4347 for(i=0; i<mmco_count; i++){
4348 if(s->avctx->debug&FF_DEBUG_MMCO)
4349 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4351 switch(mmco[i].opcode){
4352 case MMCO_SHORT2UNUSED:
4353 pic= remove_short(h, mmco[i].short_frame_num);
4355 unreference_pic(h, pic);
4356 else if(s->avctx->debug&FF_DEBUG_MMCO)
4357 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4359 case MMCO_SHORT2LONG:
4360 pic= remove_long(h, mmco[i].long_index);
4361 if(pic) unreference_pic(h, pic);
4363 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4364 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4365 h->long_ref_count++;
4367 case MMCO_LONG2UNUSED:
4368 pic= remove_long(h, mmco[i].long_index);
4370 unreference_pic(h, pic);
4371 else if(s->avctx->debug&FF_DEBUG_MMCO)
4372 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4375 pic= remove_long(h, mmco[i].long_index);
4376 if(pic) unreference_pic(h, pic);
4378 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4379 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4380 h->long_ref_count++;
4384 case MMCO_SET_MAX_LONG:
4385 assert(mmco[i].long_index <= 16);
4386 // just remove the long term which index is greater than new max
4387 for(j = mmco[i].long_index; j<16; j++){
4388 pic = remove_long(h, j);
4389 if (pic) unreference_pic(h, pic);
4393 while(h->short_ref_count){
4394 pic= remove_short(h, h->short_ref[0]->frame_num);
4395 unreference_pic(h, pic);
4397 for(j = 0; j < 16; j++) {
4398 pic= remove_long(h, j);
4399 if(pic) unreference_pic(h, pic);
4406 if(!current_is_long){
4407 pic= remove_short(h, s->current_picture_ptr->frame_num);
4409 unreference_pic(h, pic);
4410 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4413 if(h->short_ref_count)
4414 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4416 h->short_ref[0]= s->current_picture_ptr;
4417 h->short_ref[0]->long_ref=0;
4418 h->short_ref_count++;
4421 print_short_term(h);
4426 static int decode_ref_pic_marking(H264Context *h){
4427 MpegEncContext * const s = &h->s;
4430 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4431 s->broken_link= get_bits1(&s->gb) -1;
4432 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4433 if(h->mmco[0].long_index == -1)
4436 h->mmco[0].opcode= MMCO_LONG;
4440 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4441 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4442 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4444 h->mmco[i].opcode= opcode;
4445 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4446 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4447 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4448 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4452 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4453 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4454 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4455 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4460 if(opcode > MMCO_LONG){
4461 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4464 if(opcode == MMCO_END)
4469 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4471 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4472 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4473 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4483 static int init_poc(H264Context *h){
4484 MpegEncContext * const s = &h->s;
4485 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4488 if(h->nal_unit_type == NAL_IDR_SLICE){
4489 h->frame_num_offset= 0;
4491 if(h->frame_num < h->prev_frame_num)
4492 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4494 h->frame_num_offset= h->prev_frame_num_offset;
4497 if(h->sps.poc_type==0){
4498 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4500 if(h->nal_unit_type == NAL_IDR_SLICE){
4505 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4506 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4507 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4508 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4510 h->poc_msb = h->prev_poc_msb;
4511 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4513 field_poc[1] = h->poc_msb + h->poc_lsb;
4514 if(s->picture_structure == PICT_FRAME)
4515 field_poc[1] += h->delta_poc_bottom;
4516 }else if(h->sps.poc_type==1){
4517 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4520 if(h->sps.poc_cycle_length != 0)
4521 abs_frame_num = h->frame_num_offset + h->frame_num;
4525 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4528 expected_delta_per_poc_cycle = 0;
4529 for(i=0; i < h->sps.poc_cycle_length; i++)
4530 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4532 if(abs_frame_num > 0){
4533 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4534 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4536 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4537 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4538 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4542 if(h->nal_ref_idc == 0)
4543 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4545 field_poc[0] = expectedpoc + h->delta_poc[0];
4546 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4548 if(s->picture_structure == PICT_FRAME)
4549 field_poc[1] += h->delta_poc[1];
4552 if(h->nal_unit_type == NAL_IDR_SLICE){
4555 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4556 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4562 if(s->picture_structure != PICT_BOTTOM_FIELD)
4563 s->current_picture_ptr->field_poc[0]= field_poc[0];
4564 if(s->picture_structure != PICT_TOP_FIELD)
4565 s->current_picture_ptr->field_poc[1]= field_poc[1];
4566 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4567 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4573 * decodes a slice header.
4574 * this will allso call MPV_common_init() and frame_start() as needed
4576 static int decode_slice_header(H264Context *h){
4577 MpegEncContext * const s = &h->s;
4578 int first_mb_in_slice, pps_id;
4579 int num_ref_idx_active_override_flag;
4580 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4582 int default_ref_list_done = 0;
4584 s->current_picture.reference= h->nal_ref_idc != 0;
4585 s->dropable= h->nal_ref_idc == 0;
4587 first_mb_in_slice= get_ue_golomb(&s->gb);
4589 slice_type= get_ue_golomb(&s->gb);
4591 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4596 h->slice_type_fixed=1;
4598 h->slice_type_fixed=0;
4600 slice_type= slice_type_map[ slice_type ];
4601 if (slice_type == I_TYPE
4602 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4603 default_ref_list_done = 1;
4605 h->slice_type= slice_type;
4607 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4609 pps_id= get_ue_golomb(&s->gb);
4611 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4614 h->pps= h->pps_buffer[pps_id];
4615 if(h->pps.slice_group_count == 0){
4616 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4620 h->sps= h->sps_buffer[ h->pps.sps_id ];
4621 if(h->sps.log2_max_frame_num == 0){
4622 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4626 if(h->dequant_coeff_pps != pps_id){
4627 h->dequant_coeff_pps = pps_id;
4628 init_dequant_tables(h);
4631 s->mb_width= h->sps.mb_width;
4632 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4634 h->b_stride= s->mb_width*4;
4635 h->b8_stride= s->mb_width*2;
4637 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4638 if(h->sps.frame_mbs_only_flag)
4639 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4641 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4643 if (s->context_initialized
4644 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4648 if (!s->context_initialized) {
4649 if (MPV_common_init(s) < 0)
4652 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4653 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4654 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4657 for(i=0; i<16; i++){
4658 #define T(x) (x>>2) | ((x<<2) & 0xF)
4659 h->zigzag_scan[i] = T(zigzag_scan[i]);
4660 h-> field_scan[i] = T( field_scan[i]);
4664 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4665 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4666 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4667 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4668 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4671 for(i=0; i<64; i++){
4672 #define T(x) (x>>3) | ((x&7)<<3)
4673 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4674 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4675 h->field_scan8x8[i] = T(field_scan8x8[i]);
4676 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4680 if(h->sps.transform_bypass){ //FIXME same ugly
4681 h->zigzag_scan_q0 = zigzag_scan;
4682 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4683 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4684 h->field_scan_q0 = field_scan;
4685 h->field_scan8x8_q0 = field_scan8x8;
4686 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4688 h->zigzag_scan_q0 = h->zigzag_scan;
4689 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4690 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4691 h->field_scan_q0 = h->field_scan;
4692 h->field_scan8x8_q0 = h->field_scan8x8;
4693 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4698 s->avctx->width = s->width;
4699 s->avctx->height = s->height;
4700 s->avctx->sample_aspect_ratio= h->sps.sar;
4701 if(!s->avctx->sample_aspect_ratio.den)
4702 s->avctx->sample_aspect_ratio.den = 1;
4704 if(h->sps.timing_info_present_flag){
4705 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4706 if(h->x264_build > 0 && h->x264_build < 44)
4707 s->avctx->time_base.den *= 2;
4708 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4709 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4713 if(h->slice_num == 0){
4714 if(frame_start(h) < 0)
4718 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4719 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4722 h->mb_aff_frame = 0;
4723 if(h->sps.frame_mbs_only_flag){
4724 s->picture_structure= PICT_FRAME;
4726 if(get_bits1(&s->gb)) { //field_pic_flag
4727 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4728 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4730 s->picture_structure= PICT_FRAME;
4731 h->mb_aff_frame = h->sps.mb_aff;
4735 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4736 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4737 if(s->mb_y >= s->mb_height){
4741 if(s->picture_structure==PICT_FRAME){
4742 h->curr_pic_num= h->frame_num;
4743 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4745 h->curr_pic_num= 2*h->frame_num;
4746 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4749 if(h->nal_unit_type == NAL_IDR_SLICE){
4750 get_ue_golomb(&s->gb); /* idr_pic_id */
4753 if(h->sps.poc_type==0){
4754 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4756 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4757 h->delta_poc_bottom= get_se_golomb(&s->gb);
4761 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4762 h->delta_poc[0]= get_se_golomb(&s->gb);
4764 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4765 h->delta_poc[1]= get_se_golomb(&s->gb);
4770 if(h->pps.redundant_pic_cnt_present){
4771 h->redundant_pic_count= get_ue_golomb(&s->gb);
4774 //set defaults, might be overriden a few line later
4775 h->ref_count[0]= h->pps.ref_count[0];
4776 h->ref_count[1]= h->pps.ref_count[1];
4778 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4779 if(h->slice_type == B_TYPE){
4780 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4781 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4782 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4784 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4786 if(num_ref_idx_active_override_flag){
4787 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4788 if(h->slice_type==B_TYPE)
4789 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4791 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4792 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4798 if(!default_ref_list_done){
4799 fill_default_ref_list(h);
4802 if(decode_ref_pic_list_reordering(h) < 0)
4805 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4806 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4807 pred_weight_table(h);
4808 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4809 implicit_weight_table(h);
4813 if(s->current_picture.reference)
4814 decode_ref_pic_marking(h);
4817 fill_mbaff_ref_list(h);
4819 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4820 h->cabac_init_idc = get_ue_golomb(&s->gb);
4822 h->last_qscale_diff = 0;
4823 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4824 if(s->qscale<0 || s->qscale>51){
4825 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4828 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4829 //FIXME qscale / qp ... stuff
4830 if(h->slice_type == SP_TYPE){
4831 get_bits1(&s->gb); /* sp_for_switch_flag */
4833 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4834 get_se_golomb(&s->gb); /* slice_qs_delta */
4837 h->deblocking_filter = 1;
4838 h->slice_alpha_c0_offset = 0;
4839 h->slice_beta_offset = 0;
4840 if( h->pps.deblocking_filter_parameters_present ) {
4841 h->deblocking_filter= get_ue_golomb(&s->gb);
4842 if(h->deblocking_filter < 2)
4843 h->deblocking_filter^= 1; // 1<->0
4845 if( h->deblocking_filter ) {
4846 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4847 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4850 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4851 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4852 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4853 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4854 h->deblocking_filter= 0;
4857 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4858 slice_group_change_cycle= get_bits(&s->gb, ?);
4863 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4864 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4866 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4867 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4869 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4871 av_get_pict_type_char(h->slice_type),
4872 pps_id, h->frame_num,
4873 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4874 h->ref_count[0], h->ref_count[1],
4876 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4878 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4882 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
4883 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4884 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4886 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4887 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4896 static inline int get_level_prefix(GetBitContext *gb){
4900 OPEN_READER(re, gb);
4901 UPDATE_CACHE(re, gb);
4902 buf=GET_CACHE(re, gb);
4904 log= 32 - av_log2(buf);
4906 print_bin(buf>>(32-log), log);
4907 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4910 LAST_SKIP_BITS(re, gb, log);
4911 CLOSE_READER(re, gb);
4916 static inline int get_dct8x8_allowed(H264Context *h){
4919 if(!IS_SUB_8X8(h->sub_mb_type[i])
4920 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4927 * decodes a residual block.
4928 * @param n block index
4929 * @param scantable scantable
4930 * @param max_coeff number of coefficients in the block
4931 * @return <0 if an error occured
4933 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4934 MpegEncContext * const s = &h->s;
4935 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4937 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4939 //FIXME put trailing_onex into the context
4941 if(n == CHROMA_DC_BLOCK_INDEX){
4942 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4943 total_coeff= coeff_token>>2;
4945 if(n == LUMA_DC_BLOCK_INDEX){
4946 total_coeff= pred_non_zero_count(h, 0);
4947 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4948 total_coeff= coeff_token>>2;
4950 total_coeff= pred_non_zero_count(h, n);
4951 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4952 total_coeff= coeff_token>>2;
4953 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4957 //FIXME set last_non_zero?
4962 trailing_ones= coeff_token&3;
4963 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4964 assert(total_coeff<=16);
4966 for(i=0; i<trailing_ones; i++){
4967 level[i]= 1 - 2*get_bits1(gb);
4971 int level_code, mask;
4972 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4973 int prefix= get_level_prefix(gb);
4975 //first coefficient has suffix_length equal to 0 or 1
4976 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4978 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4980 level_code= (prefix<<suffix_length); //part
4981 }else if(prefix==14){
4983 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4985 level_code= prefix + get_bits(gb, 4); //part
4986 }else if(prefix==15){
4987 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4988 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4990 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4994 if(trailing_ones < 3) level_code += 2;
4999 mask= -(level_code&1);
5000 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5003 //remaining coefficients have suffix_length > 0
5004 for(;i<total_coeff;i++) {
5005 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
5006 prefix = get_level_prefix(gb);
5008 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
5009 }else if(prefix==15){
5010 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
5012 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
5015 mask= -(level_code&1);
5016 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5017 if(level_code > suffix_limit[suffix_length])
5022 if(total_coeff == max_coeff)
5025 if(n == CHROMA_DC_BLOCK_INDEX)
5026 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
5028 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
5031 coeff_num = zeros_left + total_coeff - 1;
5032 j = scantable[coeff_num];
5034 block[j] = level[0];
5035 for(i=1;i<total_coeff;i++) {
5038 else if(zeros_left < 7){
5039 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5041 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5043 zeros_left -= run_before;
5044 coeff_num -= 1 + run_before;
5045 j= scantable[ coeff_num ];
5050 block[j] = (level[0] * qmul[j] + 32)>>6;
5051 for(i=1;i<total_coeff;i++) {
5054 else if(zeros_left < 7){
5055 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5057 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5059 zeros_left -= run_before;
5060 coeff_num -= 1 + run_before;
5061 j= scantable[ coeff_num ];
5063 block[j]= (level[i] * qmul[j] + 32)>>6;
5068 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
5075 static void predict_field_decoding_flag(H264Context *h){
5076 MpegEncContext * const s = &h->s;
5077 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5078 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
5079 ? s->current_picture.mb_type[mb_xy-1]
5080 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
5081 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
5083 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
5087 * decodes a P_SKIP or B_SKIP macroblock
5089 static void decode_mb_skip(H264Context *h){
5090 MpegEncContext * const s = &h->s;
5091 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5094 memset(h->non_zero_count[mb_xy], 0, 16);
5095 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
5098 mb_type|= MB_TYPE_INTERLACED;
5100 if( h->slice_type == B_TYPE )
5102 // just for fill_caches. pred_direct_motion will set the real mb_type
5103 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
5105 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5106 pred_direct_motion(h, &mb_type);
5107 mb_type|= MB_TYPE_SKIP;
5112 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
5114 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5115 pred_pskip_motion(h, &mx, &my);
5116 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
5117 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
5120 write_back_motion(h, mb_type);
5121 s->current_picture.mb_type[mb_xy]= mb_type;
5122 s->current_picture.qscale_table[mb_xy]= s->qscale;
5123 h->slice_table[ mb_xy ]= h->slice_num;
5124 h->prev_mb_skipped= 1;
5128 * decodes a macroblock
5129 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5131 static int decode_mb_cavlc(H264Context *h){
5132 MpegEncContext * const s = &h->s;
5133 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5134 int mb_type, partition_count, cbp;
5135 int dct8x8_allowed= h->pps.transform_8x8_mode;
5137 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
5139 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5140 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
5142 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
5143 if(s->mb_skip_run==-1)
5144 s->mb_skip_run= get_ue_golomb(&s->gb);
5146 if (s->mb_skip_run--) {
5147 if(FRAME_MBAFF && (s->mb_y&1) == 0){
5148 if(s->mb_skip_run==0)
5149 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5151 predict_field_decoding_flag(h);
5158 if( (s->mb_y&1) == 0 )
5159 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5161 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5163 h->prev_mb_skipped= 0;
5165 mb_type= get_ue_golomb(&s->gb);
5166 if(h->slice_type == B_TYPE){
5168 partition_count= b_mb_type_info[mb_type].partition_count;
5169 mb_type= b_mb_type_info[mb_type].type;
5172 goto decode_intra_mb;
5174 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
5176 partition_count= p_mb_type_info[mb_type].partition_count;
5177 mb_type= p_mb_type_info[mb_type].type;
5180 goto decode_intra_mb;
5183 assert(h->slice_type == I_TYPE);
5186 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
5190 cbp= i_mb_type_info[mb_type].cbp;
5191 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5192 mb_type= i_mb_type_info[mb_type].type;
5196 mb_type |= MB_TYPE_INTERLACED;
5198 h->slice_table[ mb_xy ]= h->slice_num;
5200 if(IS_INTRA_PCM(mb_type)){
5203 // we assume these blocks are very rare so we dont optimize it
5204 align_get_bits(&s->gb);
5206 // The pixels are stored in the same order as levels in h->mb array.
5207 for(y=0; y<16; y++){
5208 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5209 for(x=0; x<16; x++){
5210 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5211 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
5215 const int index= 256 + 4*(y&3) + 32*(y>>2);
5217 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5218 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5222 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5224 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5225 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5229 // In deblocking, the quantizer is 0
5230 s->current_picture.qscale_table[mb_xy]= 0;
5231 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5232 // All coeffs are present
5233 memset(h->non_zero_count[mb_xy], 16, 16);
5235 s->current_picture.mb_type[mb_xy]= mb_type;
5240 h->ref_count[0] <<= 1;
5241 h->ref_count[1] <<= 1;
5244 fill_caches(h, mb_type, 0);
5247 if(IS_INTRA(mb_type)){
5248 // init_top_left_availability(h);
5249 if(IS_INTRA4x4(mb_type)){
5252 if(dct8x8_allowed && get_bits1(&s->gb)){
5253 mb_type |= MB_TYPE_8x8DCT;
5257 // fill_intra4x4_pred_table(h);
5258 for(i=0; i<16; i+=di){
5259 int mode= pred_intra_mode(h, i);
5261 if(!get_bits1(&s->gb)){
5262 const int rem_mode= get_bits(&s->gb, 3);
5263 mode = rem_mode + (rem_mode >= mode);
5267 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5269 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
5271 write_back_intra_pred_mode(h);
5272 if( check_intra4x4_pred_mode(h) < 0)
5275 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
5276 if(h->intra16x16_pred_mode < 0)
5279 h->chroma_pred_mode= get_ue_golomb(&s->gb);
5281 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
5282 if(h->chroma_pred_mode < 0)
5284 }else if(partition_count==4){
5285 int i, j, sub_partition_count[4], list, ref[2][4];
5287 if(h->slice_type == B_TYPE){
5289 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5290 if(h->sub_mb_type[i] >=13){
5291 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5294 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5295 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5297 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5298 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5299 pred_direct_motion(h, &mb_type);
5300 h->ref_cache[0][scan8[4]] =
5301 h->ref_cache[1][scan8[4]] =
5302 h->ref_cache[0][scan8[12]] =
5303 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5306 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
5308 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5309 if(h->sub_mb_type[i] >=4){
5310 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5313 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5314 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5318 for(list=0; list<2; list++){
5319 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5320 if(ref_count == 0) continue;
5322 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5323 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5324 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
5333 dct8x8_allowed = get_dct8x8_allowed(h);
5335 for(list=0; list<2; list++){
5336 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5337 if(ref_count == 0) continue;
5340 if(IS_DIRECT(h->sub_mb_type[i])) {
5341 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
5344 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
5345 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5347 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5348 const int sub_mb_type= h->sub_mb_type[i];
5349 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5350 for(j=0; j<sub_partition_count[i]; j++){
5352 const int index= 4*i + block_width*j;
5353 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5354 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
5355 mx += get_se_golomb(&s->gb);
5356 my += get_se_golomb(&s->gb);
5357 tprintf("final mv:%d %d\n", mx, my);
5359 if(IS_SUB_8X8(sub_mb_type)){
5360 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5361 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5362 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5363 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5364 }else if(IS_SUB_8X4(sub_mb_type)){
5365 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5366 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5367 }else if(IS_SUB_4X8(sub_mb_type)){
5368 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5369 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5371 assert(IS_SUB_4X4(sub_mb_type));
5372 mv_cache[ 0 ][0]= mx;
5373 mv_cache[ 0 ][1]= my;
5377 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5383 }else if(IS_DIRECT(mb_type)){
5384 pred_direct_motion(h, &mb_type);
5385 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5387 int list, mx, my, i;
5388 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5389 if(IS_16X16(mb_type)){
5390 for(list=0; list<2; list++){
5391 if(h->ref_count[list]>0){
5392 if(IS_DIR(mb_type, 0, list)){
5393 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5394 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5396 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
5399 for(list=0; list<2; list++){
5400 if(IS_DIR(mb_type, 0, list)){
5401 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5402 mx += get_se_golomb(&s->gb);
5403 my += get_se_golomb(&s->gb);
5404 tprintf("final mv:%d %d\n", mx, my);
5406 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5408 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5411 else if(IS_16X8(mb_type)){
5412 for(list=0; list<2; list++){
5413 if(h->ref_count[list]>0){
5415 if(IS_DIR(mb_type, i, list)){
5416 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5417 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5419 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5423 for(list=0; list<2; list++){
5425 if(IS_DIR(mb_type, i, list)){
5426 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5427 mx += get_se_golomb(&s->gb);
5428 my += get_se_golomb(&s->gb);
5429 tprintf("final mv:%d %d\n", mx, my);
5431 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5433 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5437 assert(IS_8X16(mb_type));
5438 for(list=0; list<2; list++){
5439 if(h->ref_count[list]>0){
5441 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5442 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5443 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5445 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5449 for(list=0; list<2; list++){
5451 if(IS_DIR(mb_type, i, list)){
5452 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5453 mx += get_se_golomb(&s->gb);
5454 my += get_se_golomb(&s->gb);
5455 tprintf("final mv:%d %d\n", mx, my);
5457 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5459 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5465 if(IS_INTER(mb_type))
5466 write_back_motion(h, mb_type);
5468 if(!IS_INTRA16x16(mb_type)){
5469 cbp= get_ue_golomb(&s->gb);
5471 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5475 if(IS_INTRA4x4(mb_type))
5476 cbp= golomb_to_intra4x4_cbp[cbp];
5478 cbp= golomb_to_inter_cbp[cbp];
5482 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5483 if(get_bits1(&s->gb))
5484 mb_type |= MB_TYPE_8x8DCT;
5486 s->current_picture.mb_type[mb_xy]= mb_type;
5488 if(cbp || IS_INTRA16x16(mb_type)){
5489 int i8x8, i4x4, chroma_idx;
5490 int chroma_qp, dquant;
5491 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5492 const uint8_t *scan, *scan8x8, *dc_scan;
5494 // fill_non_zero_count_cache(h);
5496 if(IS_INTERLACED(mb_type)){
5497 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5498 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5499 dc_scan= luma_dc_field_scan;
5501 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5502 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5503 dc_scan= luma_dc_zigzag_scan;
5506 dquant= get_se_golomb(&s->gb);
5508 if( dquant > 25 || dquant < -26 ){
5509 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5513 s->qscale += dquant;
5514 if(((unsigned)s->qscale) > 51){
5515 if(s->qscale<0) s->qscale+= 52;
5516 else s->qscale-= 52;
5519 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5520 if(IS_INTRA16x16(mb_type)){
5521 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5522 return -1; //FIXME continue if partitioned and other return -1 too
5525 assert((cbp&15) == 0 || (cbp&15) == 15);
5528 for(i8x8=0; i8x8<4; i8x8++){
5529 for(i4x4=0; i4x4<4; i4x4++){
5530 const int index= i4x4 + 4*i8x8;
5531 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5537 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5540 for(i8x8=0; i8x8<4; i8x8++){
5541 if(cbp & (1<<i8x8)){
5542 if(IS_8x8DCT(mb_type)){
5543 DCTELEM *buf = &h->mb[64*i8x8];
5545 for(i4x4=0; i4x4<4; i4x4++){
5546 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5547 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5550 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5551 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5553 for(i4x4=0; i4x4<4; i4x4++){
5554 const int index= i4x4 + 4*i8x8;
5556 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5562 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5563 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5569 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5570 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5576 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5577 for(i4x4=0; i4x4<4; i4x4++){
5578 const int index= 16 + 4*chroma_idx + i4x4;
5579 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5585 uint8_t * const nnz= &h->non_zero_count_cache[0];
5586 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5587 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5590 uint8_t * const nnz= &h->non_zero_count_cache[0];
5591 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5592 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5593 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5595 s->current_picture.qscale_table[mb_xy]= s->qscale;
5596 write_back_non_zero_count(h);
5599 h->ref_count[0] >>= 1;
5600 h->ref_count[1] >>= 1;
5606 static int decode_cabac_field_decoding_flag(H264Context *h) {
5607 MpegEncContext * const s = &h->s;
5608 const int mb_x = s->mb_x;
5609 const int mb_y = s->mb_y & ~1;
5610 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5611 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5613 unsigned int ctx = 0;
5615 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5618 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5622 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5625 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5626 uint8_t *state= &h->cabac_state[ctx_base];
5630 MpegEncContext * const s = &h->s;
5631 const int mba_xy = h->left_mb_xy[0];
5632 const int mbb_xy = h->top_mb_xy;
5634 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5636 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5638 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5639 return 0; /* I4x4 */
5642 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5643 return 0; /* I4x4 */
5646 if( get_cabac_terminate( &h->cabac ) )
5647 return 25; /* PCM */
5649 mb_type = 1; /* I16x16 */
5650 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5651 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5652 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5653 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5654 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5658 static int decode_cabac_mb_type( H264Context *h ) {
5659 MpegEncContext * const s = &h->s;
5661 if( h->slice_type == I_TYPE ) {
5662 return decode_cabac_intra_mb_type(h, 3, 1);
5663 } else if( h->slice_type == P_TYPE ) {
5664 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5666 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5667 /* P_L0_D16x16, P_8x8 */
5668 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5670 /* P_L0_D8x16, P_L0_D16x8 */
5671 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5674 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5676 } else if( h->slice_type == B_TYPE ) {
5677 const int mba_xy = h->left_mb_xy[0];
5678 const int mbb_xy = h->top_mb_xy;
5682 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5684 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5687 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5688 return 0; /* B_Direct_16x16 */
5690 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5691 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5694 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5695 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5696 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5697 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5699 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5700 else if( bits == 13 ) {
5701 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5702 } else if( bits == 14 )
5703 return 11; /* B_L1_L0_8x16 */
5704 else if( bits == 15 )
5705 return 22; /* B_8x8 */
5707 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5708 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5710 /* TODO SI/SP frames? */
5715 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5716 MpegEncContext * const s = &h->s;
5720 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5721 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5724 && h->slice_table[mba_xy] == h->slice_num
5725 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5726 mba_xy += s->mb_stride;
5728 mbb_xy = mb_xy - s->mb_stride;
5730 && h->slice_table[mbb_xy] == h->slice_num
5731 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5732 mbb_xy -= s->mb_stride;
5734 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5736 int mb_xy = mb_x + mb_y*s->mb_stride;
5738 mbb_xy = mb_xy - s->mb_stride;
5741 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5743 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5746 if( h->slice_type == B_TYPE )
5748 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5751 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5754 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5757 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5758 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5759 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5761 if( mode >= pred_mode )
5767 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5768 const int mba_xy = h->left_mb_xy[0];
5769 const int mbb_xy = h->top_mb_xy;
5773 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5774 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5777 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5780 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5783 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5785 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5791 static const uint8_t block_idx_x[16] = {
5792 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5794 static const uint8_t block_idx_y[16] = {
5795 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5797 static const uint8_t block_idx_xy[4][4] = {
5804 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5809 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5811 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5814 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5819 x = block_idx_x[4*i8x8];
5820 y = block_idx_y[4*i8x8];
5824 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5825 cbp_a = h->left_cbp;
5826 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5832 /* No need to test for skip as we put 0 for skip block */
5833 /* No need to test for IPCM as we put 1 for IPCM block */
5835 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5836 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5841 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5842 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5846 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5852 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5856 cbp_a = (h->left_cbp>>4)&0x03;
5857 cbp_b = (h-> top_cbp>>4)&0x03;
5860 if( cbp_a > 0 ) ctx++;
5861 if( cbp_b > 0 ) ctx += 2;
5862 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5866 if( cbp_a == 2 ) ctx++;
5867 if( cbp_b == 2 ) ctx += 2;
5868 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5870 static int decode_cabac_mb_dqp( H264Context *h) {
5871 MpegEncContext * const s = &h->s;
5877 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5879 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5881 if( h->last_qscale_diff != 0 )
5884 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5890 if(val > 102) //prevent infinite loop
5897 return -(val + 1)/2;
5899 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5900 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5902 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5904 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5908 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5910 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5911 return 0; /* B_Direct_8x8 */
5912 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5913 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5915 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5916 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5917 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5920 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5921 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5925 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5926 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5929 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5930 int refa = h->ref_cache[list][scan8[n] - 1];
5931 int refb = h->ref_cache[list][scan8[n] - 8];
5935 if( h->slice_type == B_TYPE) {
5936 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5938 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5947 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5957 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5958 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5959 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5960 int ctxbase = (l == 0) ? 40 : 47;
5965 else if( amvd > 32 )
5970 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5975 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5983 while( get_cabac_bypass( &h->cabac ) ) {
5988 if( get_cabac_bypass( &h->cabac ) )
5992 return get_cabac_bypass_sign( &h->cabac, -mvd );
5995 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
6000 nza = h->left_cbp&0x100;
6001 nzb = h-> top_cbp&0x100;
6002 } else if( cat == 1 || cat == 2 ) {
6003 nza = h->non_zero_count_cache[scan8[idx] - 1];
6004 nzb = h->non_zero_count_cache[scan8[idx] - 8];
6005 } else if( cat == 3 ) {
6006 nza = (h->left_cbp>>(6+idx))&0x01;
6007 nzb = (h-> top_cbp>>(6+idx))&0x01;
6010 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
6011 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
6020 return ctx + 4 * cat;
6023 static const __attribute((used)) uint8_t last_coeff_flag_offset_8x8[63] = {
6024 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6025 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
6026 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
6027 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
6030 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
6031 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
6032 static const int significant_coeff_flag_offset[2][6] = {
6033 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
6034 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
6036 static const int last_coeff_flag_offset[2][6] = {
6037 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
6038 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
6040 static const int coeff_abs_level_m1_offset[6] = {
6041 227+0, 227+10, 227+20, 227+30, 227+39, 426
6043 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
6044 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
6045 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
6046 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
6047 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
6048 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
6049 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
6050 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
6051 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
6057 int coeff_count = 0;
6060 int abslevelgt1 = 0;
6062 uint8_t *significant_coeff_ctx_base;
6063 uint8_t *last_coeff_ctx_base;
6064 uint8_t *abs_level_m1_ctx_base;
6067 #define CABAC_ON_STACK
6069 #ifdef CABAC_ON_STACK
6072 cc.range = h->cabac.range;
6073 cc.low = h->cabac.low;
6074 cc.bytestream= h->cabac.bytestream;
6076 #define CC &h->cabac
6080 /* cat: 0-> DC 16x16 n = 0
6081 * 1-> AC 16x16 n = luma4x4idx
6082 * 2-> Luma4x4 n = luma4x4idx
6083 * 3-> DC Chroma n = iCbCr
6084 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
6085 * 5-> Luma8x8 n = 4 * luma8x8idx
6088 /* read coded block flag */
6090 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
6091 if( cat == 1 || cat == 2 )
6092 h->non_zero_count_cache[scan8[n]] = 0;
6094 h->non_zero_count_cache[scan8[16+n]] = 0;
6095 #ifdef CABAC_ON_STACK
6096 h->cabac.range = cc.range ;
6097 h->cabac.low = cc.low ;
6098 h->cabac.bytestream= cc.bytestream;
6104 significant_coeff_ctx_base = h->cabac_state
6105 + significant_coeff_flag_offset[MB_FIELD][cat];
6106 last_coeff_ctx_base = h->cabac_state
6107 + last_coeff_flag_offset[MB_FIELD][cat];
6108 abs_level_m1_ctx_base = h->cabac_state
6109 + coeff_abs_level_m1_offset[cat];
6112 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
6113 for(last= 0; last < coefs; last++) { \
6114 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
6115 if( get_cabac( CC, sig_ctx )) { \
6116 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
6117 index[coeff_count++] = last; \
6118 if( get_cabac( CC, last_ctx ) ) { \
6124 if( last == max_coeff -1 ) {\
6125 index[coeff_count++] = last;\
6127 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
6128 #if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
6129 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
6131 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
6133 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
6135 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
6138 assert(coeff_count > 0);
6141 h->cbp_table[mb_xy] |= 0x100;
6142 else if( cat == 1 || cat == 2 )
6143 h->non_zero_count_cache[scan8[n]] = coeff_count;
6145 h->cbp_table[mb_xy] |= 0x40 << n;
6147 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
6150 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
6153 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
6154 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
6155 int j= scantable[index[coeff_count]];
6157 if( get_cabac( CC, ctx ) == 0 ) {
6159 block[j] = get_cabac_bypass_sign( CC, -1);
6161 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
6167 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
6168 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
6172 if( coeff_abs >= 15 ) {
6174 while( get_cabac_bypass( CC ) ) {
6180 coeff_abs += coeff_abs + get_cabac_bypass( CC );
6186 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
6187 else block[j] = coeff_abs;
6189 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
6190 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
6196 #ifdef CABAC_ON_STACK
6197 h->cabac.range = cc.range ;
6198 h->cabac.low = cc.low ;
6199 h->cabac.bytestream= cc.bytestream;
6204 static void inline compute_mb_neighbors(H264Context *h)
6206 MpegEncContext * const s = &h->s;
6207 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
6208 h->top_mb_xy = mb_xy - s->mb_stride;
6209 h->left_mb_xy[0] = mb_xy - 1;
6211 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
6212 const int top_pair_xy = pair_xy - s->mb_stride;
6213 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
6214 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
6215 const int curr_mb_frame_flag = !MB_FIELD;
6216 const int bottom = (s->mb_y & 1);
6218 ? !curr_mb_frame_flag // bottom macroblock
6219 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
6221 h->top_mb_xy -= s->mb_stride;
6223 if (left_mb_frame_flag != curr_mb_frame_flag) {
6224 h->left_mb_xy[0] = pair_xy - 1;
6231 * decodes a macroblock
6232 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
6234 static int decode_mb_cabac(H264Context *h) {
6235 MpegEncContext * const s = &h->s;
6236 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
6237 int mb_type, partition_count, cbp = 0;
6238 int dct8x8_allowed= h->pps.transform_8x8_mode;
6240 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
6242 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
6243 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
6245 /* a skipped mb needs the aff flag from the following mb */
6246 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
6247 predict_field_decoding_flag(h);
6248 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
6249 skip = h->next_mb_skipped;
6251 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
6252 /* read skip flags */
6254 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
6255 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
6256 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
6257 if(h->next_mb_skipped)
6258 predict_field_decoding_flag(h);
6260 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6265 h->cbp_table[mb_xy] = 0;
6266 h->chroma_pred_mode_table[mb_xy] = 0;
6267 h->last_qscale_diff = 0;
6274 if( (s->mb_y&1) == 0 )
6276 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6278 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
6280 h->prev_mb_skipped = 0;
6282 compute_mb_neighbors(h);
6283 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
6284 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
6288 if( h->slice_type == B_TYPE ) {
6290 partition_count= b_mb_type_info[mb_type].partition_count;
6291 mb_type= b_mb_type_info[mb_type].type;
6294 goto decode_intra_mb;
6296 } else if( h->slice_type == P_TYPE ) {
6298 partition_count= p_mb_type_info[mb_type].partition_count;
6299 mb_type= p_mb_type_info[mb_type].type;
6302 goto decode_intra_mb;
6305 assert(h->slice_type == I_TYPE);
6307 partition_count = 0;
6308 cbp= i_mb_type_info[mb_type].cbp;
6309 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
6310 mb_type= i_mb_type_info[mb_type].type;
6313 mb_type |= MB_TYPE_INTERLACED;
6315 h->slice_table[ mb_xy ]= h->slice_num;
6317 if(IS_INTRA_PCM(mb_type)) {
6321 // We assume these blocks are very rare so we dont optimize it.
6322 // FIXME The two following lines get the bitstream position in the cabac
6323 // decode, I think it should be done by a function in cabac.h (or cabac.c).
6324 ptr= h->cabac.bytestream;
6325 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
6327 // The pixels are stored in the same order as levels in h->mb array.
6328 for(y=0; y<16; y++){
6329 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
6330 for(x=0; x<16; x++){
6331 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
6332 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
6336 const int index= 256 + 4*(y&3) + 32*(y>>2);
6338 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
6339 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6343 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
6345 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
6346 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6350 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6352 // All blocks are present
6353 h->cbp_table[mb_xy] = 0x1ef;
6354 h->chroma_pred_mode_table[mb_xy] = 0;
6355 // In deblocking, the quantizer is 0
6356 s->current_picture.qscale_table[mb_xy]= 0;
6357 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
6358 // All coeffs are present
6359 memset(h->non_zero_count[mb_xy], 16, 16);
6360 s->current_picture.mb_type[mb_xy]= mb_type;
6365 h->ref_count[0] <<= 1;
6366 h->ref_count[1] <<= 1;
6369 fill_caches(h, mb_type, 0);
6371 if( IS_INTRA( mb_type ) ) {
6373 if( IS_INTRA4x4( mb_type ) ) {
6374 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6375 mb_type |= MB_TYPE_8x8DCT;
6376 for( i = 0; i < 16; i+=4 ) {
6377 int pred = pred_intra_mode( h, i );
6378 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6379 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6382 for( i = 0; i < 16; i++ ) {
6383 int pred = pred_intra_mode( h, i );
6384 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6386 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6389 write_back_intra_pred_mode(h);
6390 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6392 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6393 if( h->intra16x16_pred_mode < 0 ) return -1;
6395 h->chroma_pred_mode_table[mb_xy] =
6396 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6398 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
6399 if( h->chroma_pred_mode < 0 ) return -1;
6400 } else if( partition_count == 4 ) {
6401 int i, j, sub_partition_count[4], list, ref[2][4];
6403 if( h->slice_type == B_TYPE ) {
6404 for( i = 0; i < 4; i++ ) {
6405 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6406 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6407 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6409 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
6410 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
6411 pred_direct_motion(h, &mb_type);
6412 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6413 for( i = 0; i < 4; i++ )
6414 if( IS_DIRECT(h->sub_mb_type[i]) )
6415 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6419 for( i = 0; i < 4; i++ ) {
6420 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6421 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6422 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6426 for( list = 0; list < 2; list++ ) {
6427 if( h->ref_count[list] > 0 ) {
6428 for( i = 0; i < 4; i++ ) {
6429 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6430 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6431 if( h->ref_count[list] > 1 )
6432 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6438 h->ref_cache[list][ scan8[4*i]+1 ]=
6439 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6445 dct8x8_allowed = get_dct8x8_allowed(h);
6447 for(list=0; list<2; list++){
6449 if(IS_DIRECT(h->sub_mb_type[i])){
6450 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6453 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6455 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6456 const int sub_mb_type= h->sub_mb_type[i];
6457 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6458 for(j=0; j<sub_partition_count[i]; j++){
6461 const int index= 4*i + block_width*j;
6462 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6463 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6464 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6466 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6467 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6468 tprintf("final mv:%d %d\n", mx, my);
6470 if(IS_SUB_8X8(sub_mb_type)){
6471 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
6472 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6473 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
6474 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6476 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
6477 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6478 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
6479 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6480 }else if(IS_SUB_8X4(sub_mb_type)){
6481 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
6482 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
6484 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
6485 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
6486 }else if(IS_SUB_4X8(sub_mb_type)){
6487 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
6488 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
6490 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
6491 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
6493 assert(IS_SUB_4X4(sub_mb_type));
6494 mv_cache[ 0 ][0]= mx;
6495 mv_cache[ 0 ][1]= my;
6497 mvd_cache[ 0 ][0]= mx - mpx;
6498 mvd_cache[ 0 ][1]= my - mpy;
6502 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6503 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6504 p[0] = p[1] = p[8] = p[9] = 0;
6505 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6509 } else if( IS_DIRECT(mb_type) ) {
6510 pred_direct_motion(h, &mb_type);
6511 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6512 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6513 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6515 int list, mx, my, i, mpx, mpy;
6516 if(IS_16X16(mb_type)){
6517 for(list=0; list<2; list++){
6518 if(IS_DIR(mb_type, 0, list)){
6519 if(h->ref_count[list] > 0 ){
6520 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6521 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6524 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6526 for(list=0; list<2; list++){
6527 if(IS_DIR(mb_type, 0, list)){
6528 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6530 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6531 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6532 tprintf("final mv:%d %d\n", mx, my);
6534 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6535 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6537 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6540 else if(IS_16X8(mb_type)){
6541 for(list=0; list<2; list++){
6542 if(h->ref_count[list]>0){
6544 if(IS_DIR(mb_type, i, list)){
6545 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6546 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6548 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6552 for(list=0; list<2; list++){
6554 if(IS_DIR(mb_type, i, list)){
6555 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6556 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6557 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6558 tprintf("final mv:%d %d\n", mx, my);
6560 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6561 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6563 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6564 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6569 assert(IS_8X16(mb_type));
6570 for(list=0; list<2; list++){
6571 if(h->ref_count[list]>0){
6573 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6574 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6575 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6577 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6581 for(list=0; list<2; list++){
6583 if(IS_DIR(mb_type, i, list)){
6584 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6585 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6586 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6588 tprintf("final mv:%d %d\n", mx, my);
6589 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6590 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6592 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6593 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6600 if( IS_INTER( mb_type ) ) {
6601 h->chroma_pred_mode_table[mb_xy] = 0;
6602 write_back_motion( h, mb_type );
6605 if( !IS_INTRA16x16( mb_type ) ) {
6606 cbp = decode_cabac_mb_cbp_luma( h );
6607 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6610 h->cbp_table[mb_xy] = h->cbp = cbp;
6612 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6613 if( decode_cabac_mb_transform_size( h ) )
6614 mb_type |= MB_TYPE_8x8DCT;
6616 s->current_picture.mb_type[mb_xy]= mb_type;
6618 if( cbp || IS_INTRA16x16( mb_type ) ) {
6619 const uint8_t *scan, *scan8x8, *dc_scan;
6622 if(IS_INTERLACED(mb_type)){
6623 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6624 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6625 dc_scan= luma_dc_field_scan;
6627 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6628 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6629 dc_scan= luma_dc_zigzag_scan;
6632 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6633 if( dqp == INT_MIN ){
6634 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6638 if(((unsigned)s->qscale) > 51){
6639 if(s->qscale<0) s->qscale+= 52;
6640 else s->qscale-= 52;
6642 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6644 if( IS_INTRA16x16( mb_type ) ) {
6646 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6647 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6650 for( i = 0; i < 16; i++ ) {
6651 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6652 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6656 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6660 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6661 if( cbp & (1<<i8x8) ) {
6662 if( IS_8x8DCT(mb_type) ) {
6663 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6664 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6667 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6668 const int index = 4*i8x8 + i4x4;
6669 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6671 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6673 //STOP_TIMER("decode_residual")
6676 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6677 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6684 for( c = 0; c < 2; c++ ) {
6685 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6686 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6693 for( c = 0; c < 2; c++ ) {
6694 for( i = 0; i < 4; i++ ) {
6695 const int index = 16 + 4 * c + i;
6696 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6697 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6702 uint8_t * const nnz= &h->non_zero_count_cache[0];
6703 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6704 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6707 uint8_t * const nnz= &h->non_zero_count_cache[0];
6708 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6709 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6710 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6711 h->last_qscale_diff = 0;
6714 s->current_picture.qscale_table[mb_xy]= s->qscale;
6715 write_back_non_zero_count(h);
6718 h->ref_count[0] >>= 1;
6719 h->ref_count[1] >>= 1;
6726 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6728 const int index_a = qp + h->slice_alpha_c0_offset;
6729 const int alpha = (alpha_table+52)[index_a];
6730 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6735 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6736 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6738 /* 16px edge length, because bS=4 is triggered by being at
6739 * the edge of an intra MB, so all 4 bS are the same */
6740 for( d = 0; d < 16; d++ ) {
6741 const int p0 = pix[-1];
6742 const int p1 = pix[-2];
6743 const int p2 = pix[-3];
6745 const int q0 = pix[0];
6746 const int q1 = pix[1];
6747 const int q2 = pix[2];
6749 if( FFABS( p0 - q0 ) < alpha &&
6750 FFABS( p1 - p0 ) < beta &&
6751 FFABS( q1 - q0 ) < beta ) {
6753 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6754 if( FFABS( p2 - p0 ) < beta)
6756 const int p3 = pix[-4];
6758 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6759 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6760 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6763 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6765 if( FFABS( q2 - q0 ) < beta)
6767 const int q3 = pix[3];
6769 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6770 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6771 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6774 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6778 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6779 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6781 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6787 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6789 const int index_a = qp + h->slice_alpha_c0_offset;
6790 const int alpha = (alpha_table+52)[index_a];
6791 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6796 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6797 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6799 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6803 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6805 for( i = 0; i < 16; i++, pix += stride) {
6811 int bS_index = (i >> 1);
6814 bS_index |= (i & 1);
6817 if( bS[bS_index] == 0 ) {
6821 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6822 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6823 alpha = (alpha_table+52)[index_a];
6824 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6826 if( bS[bS_index] < 4 ) {
6827 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6828 const int p0 = pix[-1];
6829 const int p1 = pix[-2];
6830 const int p2 = pix[-3];
6831 const int q0 = pix[0];
6832 const int q1 = pix[1];
6833 const int q2 = pix[2];
6835 if( FFABS( p0 - q0 ) < alpha &&
6836 FFABS( p1 - p0 ) < beta &&
6837 FFABS( q1 - q0 ) < beta ) {
6841 if( FFABS( p2 - p0 ) < beta ) {
6842 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6845 if( FFABS( q2 - q0 ) < beta ) {
6846 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6850 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6851 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6852 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6853 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6856 const int p0 = pix[-1];
6857 const int p1 = pix[-2];
6858 const int p2 = pix[-3];
6860 const int q0 = pix[0];
6861 const int q1 = pix[1];
6862 const int q2 = pix[2];
6864 if( FFABS( p0 - q0 ) < alpha &&
6865 FFABS( p1 - p0 ) < beta &&
6866 FFABS( q1 - q0 ) < beta ) {
6868 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6869 if( FFABS( p2 - p0 ) < beta)
6871 const int p3 = pix[-4];
6873 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6874 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6875 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6878 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6880 if( FFABS( q2 - q0 ) < beta)
6882 const int q3 = pix[3];
6884 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6885 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6886 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6889 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6893 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6894 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6896 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6901 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6903 for( i = 0; i < 8; i++, pix += stride) {
6911 if( bS[bS_index] == 0 ) {
6915 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6916 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6917 alpha = (alpha_table+52)[index_a];
6918 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6920 if( bS[bS_index] < 4 ) {
6921 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6922 const int p0 = pix[-1];
6923 const int p1 = pix[-2];
6924 const int q0 = pix[0];
6925 const int q1 = pix[1];
6927 if( FFABS( p0 - q0 ) < alpha &&
6928 FFABS( p1 - p0 ) < beta &&
6929 FFABS( q1 - q0 ) < beta ) {
6930 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6932 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6933 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6934 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6937 const int p0 = pix[-1];
6938 const int p1 = pix[-2];
6939 const int q0 = pix[0];
6940 const int q1 = pix[1];
6942 if( FFABS( p0 - q0 ) < alpha &&
6943 FFABS( p1 - p0 ) < beta &&
6944 FFABS( q1 - q0 ) < beta ) {
6946 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6947 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6948 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6954 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6956 const int index_a = qp + h->slice_alpha_c0_offset;
6957 const int alpha = (alpha_table+52)[index_a];
6958 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6959 const int pix_next = stride;
6964 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6965 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6967 /* 16px edge length, see filter_mb_edgev */
6968 for( d = 0; d < 16; d++ ) {
6969 const int p0 = pix[-1*pix_next];
6970 const int p1 = pix[-2*pix_next];
6971 const int p2 = pix[-3*pix_next];
6972 const int q0 = pix[0];
6973 const int q1 = pix[1*pix_next];
6974 const int q2 = pix[2*pix_next];
6976 if( FFABS( p0 - q0 ) < alpha &&
6977 FFABS( p1 - p0 ) < beta &&
6978 FFABS( q1 - q0 ) < beta ) {
6980 const int p3 = pix[-4*pix_next];
6981 const int q3 = pix[ 3*pix_next];
6983 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6984 if( FFABS( p2 - p0 ) < beta) {
6986 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6987 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6988 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6991 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6993 if( FFABS( q2 - q0 ) < beta) {
6995 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6996 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6997 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
7000 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
7004 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
7005 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
7007 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
7014 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
7016 const int index_a = qp + h->slice_alpha_c0_offset;
7017 const int alpha = (alpha_table+52)[index_a];
7018 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
7023 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
7024 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
7026 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
7030 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7031 MpegEncContext * const s = &h->s;
7033 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
7035 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) {
7036 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
7039 assert(!FRAME_MBAFF);
7041 mb_xy = mb_x + mb_y*s->mb_stride;
7042 mb_type = s->current_picture.mb_type[mb_xy];
7043 qp = s->current_picture.qscale_table[mb_xy];
7044 qp0 = s->current_picture.qscale_table[mb_xy-1];
7045 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
7046 qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
7047 qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
7048 qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
7049 qp0 = (qp + qp0 + 1) >> 1;
7050 qp1 = (qp + qp1 + 1) >> 1;
7051 qpc0 = (qpc + qpc0 + 1) >> 1;
7052 qpc1 = (qpc + qpc1 + 1) >> 1;
7053 qp_thresh = 15 - h->slice_alpha_c0_offset;
7054 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
7055 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
7058 if( IS_INTRA(mb_type) ) {
7059 int16_t bS4[4] = {4,4,4,4};
7060 int16_t bS3[4] = {3,3,3,3};
7061 if( IS_8x8DCT(mb_type) ) {
7062 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7063 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7064 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7065 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7067 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7068 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
7069 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7070 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
7071 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7072 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
7073 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7074 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
7076 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
7077 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
7078 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
7079 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
7080 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7081 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
7082 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7083 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
7086 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
7087 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
7089 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
7091 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
7093 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
7094 (mb_type & MB_TYPE_16x8) ? 1 : 0;
7095 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
7096 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
7098 int step = IS_8x8DCT(mb_type) ? 2 : 1;
7099 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
7100 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
7101 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
7103 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
7104 bSv[0][0] = 0x0004000400040004ULL;
7105 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
7106 bSv[1][0] = 0x0004000400040004ULL;
7108 #define FILTER(hv,dir,edge)\
7109 if(bSv[dir][edge]) {\
7110 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
7112 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7113 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7119 } else if( IS_8x8DCT(mb_type) ) {
7138 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7139 MpegEncContext * const s = &h->s;
7140 const int mb_xy= mb_x + mb_y*s->mb_stride;
7141 const int mb_type = s->current_picture.mb_type[mb_xy];
7142 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
7143 int first_vertical_edge_done = 0;
7145 /* FIXME: A given frame may occupy more than one position in
7146 * the reference list. So ref2frm should be populated with
7147 * frame numbers, not indices. */
7148 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
7149 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7151 //for sufficiently low qp, filtering wouldn't do anything
7152 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
7154 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
7155 int qp = s->current_picture.qscale_table[mb_xy];
7157 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
7158 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
7164 // left mb is in picture
7165 && h->slice_table[mb_xy-1] != 255
7166 // and current and left pair do not have the same interlaced type
7167 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
7168 // and left mb is in the same slice if deblocking_filter == 2
7169 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
7170 /* First vertical edge is different in MBAFF frames
7171 * There are 8 different bS to compute and 2 different Qp
7173 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
7174 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
7178 int mb_qp, mbn0_qp, mbn1_qp;
7180 first_vertical_edge_done = 1;
7182 if( IS_INTRA(mb_type) )
7183 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
7185 for( i = 0; i < 8; i++ ) {
7186 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
7188 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
7190 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
7191 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
7192 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
7199 mb_qp = s->current_picture.qscale_table[mb_xy];
7200 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
7201 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
7202 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
7203 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7204 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
7205 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
7206 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7207 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
7210 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
7211 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7212 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
7213 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
7214 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
7216 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
7217 for( dir = 0; dir < 2; dir++ )
7220 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
7221 const int mbm_type = s->current_picture.mb_type[mbm_xy];
7222 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
7224 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
7225 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
7226 // how often to recheck mv-based bS when iterating between edges
7227 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
7228 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
7229 // how often to recheck mv-based bS when iterating along each edge
7230 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
7232 if (first_vertical_edge_done) {
7234 first_vertical_edge_done = 0;
7237 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
7240 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
7241 && !IS_INTERLACED(mb_type)
7242 && IS_INTERLACED(mbm_type)
7244 // This is a special case in the norm where the filtering must
7245 // be done twice (one each of the field) even if we are in a
7246 // frame macroblock.
7248 static const int nnz_idx[4] = {4,5,6,3};
7249 unsigned int tmp_linesize = 2 * linesize;
7250 unsigned int tmp_uvlinesize = 2 * uvlinesize;
7251 int mbn_xy = mb_xy - 2 * s->mb_stride;
7256 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
7257 if( IS_INTRA(mb_type) ||
7258 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
7259 bS[0] = bS[1] = bS[2] = bS[3] = 3;
7261 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
7262 for( i = 0; i < 4; i++ ) {
7263 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
7264 mbn_nnz[nnz_idx[i]] != 0 )
7270 // Do not use s->qscale as luma quantizer because it has not the same
7271 // value in IPCM macroblocks.
7272 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7273 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
7274 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7275 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
7276 chroma_qp = ( h->chroma_qp +
7277 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7278 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7279 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7286 for( edge = start; edge < edges; edge++ ) {
7287 /* mbn_xy: neighbor macroblock */
7288 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
7289 const int mbn_type = s->current_picture.mb_type[mbn_xy];
7293 if( (edge&1) && IS_8x8DCT(mb_type) )
7296 if( IS_INTRA(mb_type) ||
7297 IS_INTRA(mbn_type) ) {
7300 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
7301 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
7310 bS[0] = bS[1] = bS[2] = bS[3] = value;
7315 if( edge & mask_edge ) {
7316 bS[0] = bS[1] = bS[2] = bS[3] = 0;
7319 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
7320 bS[0] = bS[1] = bS[2] = bS[3] = 1;
7323 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
7324 int b_idx= 8 + 4 + edge * (dir ? 8:1);
7325 int bn_idx= b_idx - (dir ? 8:1);
7327 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
7328 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7329 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7330 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
7332 bS[0] = bS[1] = bS[2] = bS[3] = v;
7338 for( i = 0; i < 4; i++ ) {
7339 int x = dir == 0 ? edge : i;
7340 int y = dir == 0 ? i : edge;
7341 int b_idx= 8 + 4 + x + 8*y;
7342 int bn_idx= b_idx - (dir ? 8:1);
7344 if( h->non_zero_count_cache[b_idx] != 0 ||
7345 h->non_zero_count_cache[bn_idx] != 0 ) {
7351 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
7352 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7353 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7354 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7362 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7367 // Do not use s->qscale as luma quantizer because it has not the same
7368 // value in IPCM macroblocks.
7369 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7370 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7371 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7372 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7374 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7375 if( (edge&1) == 0 ) {
7376 int chroma_qp = ( h->chroma_qp +
7377 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7378 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
7379 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
7382 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7383 if( (edge&1) == 0 ) {
7384 int chroma_qp = ( h->chroma_qp +
7385 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7386 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7387 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7394 static int decode_slice(H264Context *h){
7395 MpegEncContext * const s = &h->s;
7396 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7400 if( h->pps.cabac ) {
7404 align_get_bits( &s->gb );
7407 ff_init_cabac_states( &h->cabac);
7408 ff_init_cabac_decoder( &h->cabac,
7409 s->gb.buffer + get_bits_count(&s->gb)/8,
7410 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7411 /* calculate pre-state */
7412 for( i= 0; i < 460; i++ ) {
7414 if( h->slice_type == I_TYPE )
7415 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7417 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7420 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
7422 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7427 int ret = decode_mb_cabac(h);
7429 //STOP_TIMER("decode_mb_cabac")
7431 if(ret>=0) hl_decode_mb(h);
7433 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7436 if(ret>=0) ret = decode_mb_cabac(h);
7438 if(ret>=0) hl_decode_mb(h);
7441 eos = get_cabac_terminate( &h->cabac );
7443 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
7444 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7445 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7449 if( ++s->mb_x >= s->mb_width ) {
7451 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7458 if( eos || s->mb_y >= s->mb_height ) {
7459 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7460 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7467 int ret = decode_mb_cavlc(h);
7469 if(ret>=0) hl_decode_mb(h);
7471 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7473 ret = decode_mb_cavlc(h);
7475 if(ret>=0) hl_decode_mb(h);
7480 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7481 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7486 if(++s->mb_x >= s->mb_width){
7488 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7493 if(s->mb_y >= s->mb_height){
7494 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7496 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7497 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7501 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7508 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7509 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7510 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7511 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7515 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7524 for(;s->mb_y < s->mb_height; s->mb_y++){
7525 for(;s->mb_x < s->mb_width; s->mb_x++){
7526 int ret= decode_mb(h);
7531 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7532 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7537 if(++s->mb_x >= s->mb_width){
7539 if(++s->mb_y >= s->mb_height){
7540 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7541 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7545 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7552 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7553 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7554 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7558 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7565 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7568 return -1; //not reached
7571 static int decode_unregistered_user_data(H264Context *h, int size){
7572 MpegEncContext * const s = &h->s;
7573 uint8_t user_data[16+256];
7579 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7580 user_data[i]= get_bits(&s->gb, 8);
7584 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7585 if(e==1 && build>=0)
7586 h->x264_build= build;
7588 if(s->avctx->debug & FF_DEBUG_BUGS)
7589 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7592 skip_bits(&s->gb, 8);
7597 static int decode_sei(H264Context *h){
7598 MpegEncContext * const s = &h->s;
7600 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7605 type+= show_bits(&s->gb, 8);
7606 }while(get_bits(&s->gb, 8) == 255);
7610 size+= show_bits(&s->gb, 8);
7611 }while(get_bits(&s->gb, 8) == 255);
7615 if(decode_unregistered_user_data(h, size) < 0)
7619 skip_bits(&s->gb, 8*size);
7622 //FIXME check bits here
7623 align_get_bits(&s->gb);
7629 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7630 MpegEncContext * const s = &h->s;
7632 cpb_count = get_ue_golomb(&s->gb) + 1;
7633 get_bits(&s->gb, 4); /* bit_rate_scale */
7634 get_bits(&s->gb, 4); /* cpb_size_scale */
7635 for(i=0; i<cpb_count; i++){
7636 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7637 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7638 get_bits1(&s->gb); /* cbr_flag */
7640 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7641 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7642 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7643 get_bits(&s->gb, 5); /* time_offset_length */
7646 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7647 MpegEncContext * const s = &h->s;
7648 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7649 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7651 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7653 if( aspect_ratio_info_present_flag ) {
7654 aspect_ratio_idc= get_bits(&s->gb, 8);
7655 if( aspect_ratio_idc == EXTENDED_SAR ) {
7656 sps->sar.num= get_bits(&s->gb, 16);
7657 sps->sar.den= get_bits(&s->gb, 16);
7658 }else if(aspect_ratio_idc < 14){
7659 sps->sar= pixel_aspect[aspect_ratio_idc];
7661 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7668 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7670 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7671 get_bits1(&s->gb); /* overscan_appropriate_flag */
7674 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7675 get_bits(&s->gb, 3); /* video_format */
7676 get_bits1(&s->gb); /* video_full_range_flag */
7677 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7678 get_bits(&s->gb, 8); /* colour_primaries */
7679 get_bits(&s->gb, 8); /* transfer_characteristics */
7680 get_bits(&s->gb, 8); /* matrix_coefficients */
7684 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7685 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7686 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7689 sps->timing_info_present_flag = get_bits1(&s->gb);
7690 if(sps->timing_info_present_flag){
7691 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7692 sps->time_scale = get_bits_long(&s->gb, 32);
7693 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7696 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7697 if(nal_hrd_parameters_present_flag)
7698 decode_hrd_parameters(h, sps);
7699 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7700 if(vcl_hrd_parameters_present_flag)
7701 decode_hrd_parameters(h, sps);
7702 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7703 get_bits1(&s->gb); /* low_delay_hrd_flag */
7704 get_bits1(&s->gb); /* pic_struct_present_flag */
7706 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7707 if(sps->bitstream_restriction_flag){
7708 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7709 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7710 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7711 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7712 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7713 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7714 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7720 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7721 const uint8_t *jvt_list, const uint8_t *fallback_list){
7722 MpegEncContext * const s = &h->s;
7723 int i, last = 8, next = 8;
7724 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7725 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7726 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7728 for(i=0;i<size;i++){
7730 next = (last + get_se_golomb(&s->gb)) & 0xff;
7731 if(!i && !next){ /* matrix not written, we use the preset one */
7732 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7735 last = factors[scan[i]] = next ? next : last;
7739 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7740 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7741 MpegEncContext * const s = &h->s;
7742 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7743 const uint8_t *fallback[4] = {
7744 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7745 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7746 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7747 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7749 if(get_bits1(&s->gb)){
7750 sps->scaling_matrix_present |= is_sps;
7751 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7752 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7753 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7754 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7755 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7756 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7757 if(is_sps || pps->transform_8x8_mode){
7758 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7759 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7761 } else if(fallback_sps) {
7762 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7763 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7767 static inline int decode_seq_parameter_set(H264Context *h){
7768 MpegEncContext * const s = &h->s;
7769 int profile_idc, level_idc;
7773 profile_idc= get_bits(&s->gb, 8);
7774 get_bits1(&s->gb); //constraint_set0_flag
7775 get_bits1(&s->gb); //constraint_set1_flag
7776 get_bits1(&s->gb); //constraint_set2_flag
7777 get_bits1(&s->gb); //constraint_set3_flag
7778 get_bits(&s->gb, 4); // reserved
7779 level_idc= get_bits(&s->gb, 8);
7780 sps_id= get_ue_golomb(&s->gb);
7782 sps= &h->sps_buffer[ sps_id ];
7783 sps->profile_idc= profile_idc;
7784 sps->level_idc= level_idc;
7786 if(sps->profile_idc >= 100){ //high profile
7787 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7788 get_bits1(&s->gb); //residual_color_transform_flag
7789 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7790 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7791 sps->transform_bypass = get_bits1(&s->gb);
7792 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7794 sps->scaling_matrix_present = 0;
7796 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7797 sps->poc_type= get_ue_golomb(&s->gb);
7799 if(sps->poc_type == 0){ //FIXME #define
7800 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7801 } else if(sps->poc_type == 1){//FIXME #define
7802 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7803 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7804 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7805 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7807 for(i=0; i<sps->poc_cycle_length; i++)
7808 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7810 if(sps->poc_type > 2){
7811 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7815 sps->ref_frame_count= get_ue_golomb(&s->gb);
7816 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7817 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7819 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7820 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7821 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7822 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7823 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7826 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7827 if(!sps->frame_mbs_only_flag)
7828 sps->mb_aff= get_bits1(&s->gb);
7832 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7834 #ifndef ALLOW_INTERLACE
7836 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7838 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7839 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7841 sps->crop= get_bits1(&s->gb);
7843 sps->crop_left = get_ue_golomb(&s->gb);
7844 sps->crop_right = get_ue_golomb(&s->gb);
7845 sps->crop_top = get_ue_golomb(&s->gb);
7846 sps->crop_bottom= get_ue_golomb(&s->gb);
7847 if(sps->crop_left || sps->crop_top){
7848 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7854 sps->crop_bottom= 0;
7857 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7858 if( sps->vui_parameters_present_flag )
7859 decode_vui_parameters(h, sps);
7861 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7862 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7863 sps_id, sps->profile_idc, sps->level_idc,
7865 sps->ref_frame_count,
7866 sps->mb_width, sps->mb_height,
7867 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7868 sps->direct_8x8_inference_flag ? "8B8" : "",
7869 sps->crop_left, sps->crop_right,
7870 sps->crop_top, sps->crop_bottom,
7871 sps->vui_parameters_present_flag ? "VUI" : ""
7877 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7878 MpegEncContext * const s = &h->s;
7879 int pps_id= get_ue_golomb(&s->gb);
7880 PPS *pps= &h->pps_buffer[pps_id];
7882 pps->sps_id= get_ue_golomb(&s->gb);
7883 pps->cabac= get_bits1(&s->gb);
7884 pps->pic_order_present= get_bits1(&s->gb);
7885 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7886 if(pps->slice_group_count > 1 ){
7887 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7888 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7889 switch(pps->mb_slice_group_map_type){
7892 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7893 | run_length[ i ] |1 |ue(v) |
7898 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7900 | top_left_mb[ i ] |1 |ue(v) |
7901 | bottom_right_mb[ i ] |1 |ue(v) |
7909 | slice_group_change_direction_flag |1 |u(1) |
7910 | slice_group_change_rate_minus1 |1 |ue(v) |
7915 | slice_group_id_cnt_minus1 |1 |ue(v) |
7916 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7918 | slice_group_id[ i ] |1 |u(v) |
7923 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7924 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7925 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7926 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7930 pps->weighted_pred= get_bits1(&s->gb);
7931 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7932 pps->init_qp= get_se_golomb(&s->gb) + 26;
7933 pps->init_qs= get_se_golomb(&s->gb) + 26;
7934 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7935 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7936 pps->constrained_intra_pred= get_bits1(&s->gb);
7937 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7939 pps->transform_8x8_mode= 0;
7940 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7941 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7942 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7944 if(get_bits_count(&s->gb) < bit_length){
7945 pps->transform_8x8_mode= get_bits1(&s->gb);
7946 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7947 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7950 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7951 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7952 pps_id, pps->sps_id,
7953 pps->cabac ? "CABAC" : "CAVLC",
7954 pps->slice_group_count,
7955 pps->ref_count[0], pps->ref_count[1],
7956 pps->weighted_pred ? "weighted" : "",
7957 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7958 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7959 pps->constrained_intra_pred ? "CONSTR" : "",
7960 pps->redundant_pic_cnt_present ? "REDU" : "",
7961 pps->transform_8x8_mode ? "8x8DCT" : ""
7969 * finds the end of the current frame in the bitstream.
7970 * @return the position of the first byte of the next frame, or -1
7972 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7975 ParseContext *pc = &(h->s.parse_context);
7976 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7977 // mb_addr= pc->mb_addr - 1;
7979 for(i=0; i<=buf_size; i++){
7980 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7981 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7982 if(pc->frame_start_found){
7983 // If there isn't one more byte in the buffer
7984 // the test on first_mb_in_slice cannot be done yet
7985 // do it at next call.
7986 if (i >= buf_size) break;
7987 if (buf[i] & 0x80) {
7988 // first_mb_in_slice is 0, probably the first nal of a new
7990 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7992 pc->frame_start_found= 0;
7996 pc->frame_start_found = 1;
7998 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7999 if(pc->frame_start_found){
8001 pc->frame_start_found= 0;
8006 state= (state<<8) | buf[i];
8010 return END_NOT_FOUND;
8013 #ifdef CONFIG_H264_PARSER
8014 static int h264_parse(AVCodecParserContext *s,
8015 AVCodecContext *avctx,
8016 uint8_t **poutbuf, int *poutbuf_size,
8017 const uint8_t *buf, int buf_size)
8019 H264Context *h = s->priv_data;
8020 ParseContext *pc = &h->s.parse_context;
8023 next= find_frame_end(h, buf, buf_size);
8025 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
8031 *poutbuf = (uint8_t *)buf;
8032 *poutbuf_size = buf_size;
8036 static int h264_split(AVCodecContext *avctx,
8037 const uint8_t *buf, int buf_size)
8040 uint32_t state = -1;
8043 for(i=0; i<=buf_size; i++){
8044 if((state&0xFFFFFF1F) == 0x107)
8046 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
8048 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
8050 while(i>4 && buf[i-5]==0) i--;
8055 state= (state<<8) | buf[i];
8059 #endif /* CONFIG_H264_PARSER */
8061 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
8062 MpegEncContext * const s = &h->s;
8063 AVCodecContext * const avctx= s->avctx;
8067 for(i=0; i<50; i++){
8068 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
8072 s->current_picture_ptr= NULL;
8081 if(buf_index >= buf_size) break;
8083 for(i = 0; i < h->nal_length_size; i++)
8084 nalsize = (nalsize << 8) | buf[buf_index++];
8090 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
8095 // start code prefix search
8096 for(; buf_index + 3 < buf_size; buf_index++){
8097 // this should allways succeed in the first iteration
8098 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
8102 if(buf_index+3 >= buf_size) break;
8107 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
8108 while(ptr[dst_length - 1] == 0 && dst_length > 1)
8110 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
8112 if(s->avctx->debug&FF_DEBUG_STARTCODE){
8113 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
8116 if (h->is_avc && (nalsize != consumed))
8117 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
8119 buf_index += consumed;
8121 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
8122 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
8125 switch(h->nal_unit_type){
8127 idr(h); //FIXME ensure we don't loose some frames if there is reordering
8129 init_get_bits(&s->gb, ptr, bit_length);
8131 h->inter_gb_ptr= &s->gb;
8132 s->data_partitioning = 0;
8134 if(decode_slice_header(h) < 0){
8135 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8138 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
8139 if(h->redundant_pic_count==0 && s->hurry_up < 5
8140 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8141 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8142 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8143 && avctx->skip_frame < AVDISCARD_ALL)
8147 init_get_bits(&s->gb, ptr, bit_length);
8149 h->inter_gb_ptr= NULL;
8150 s->data_partitioning = 1;
8152 if(decode_slice_header(h) < 0){
8153 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8157 init_get_bits(&h->intra_gb, ptr, bit_length);
8158 h->intra_gb_ptr= &h->intra_gb;
8161 init_get_bits(&h->inter_gb, ptr, bit_length);
8162 h->inter_gb_ptr= &h->inter_gb;
8164 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
8166 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8167 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8168 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8169 && avctx->skip_frame < AVDISCARD_ALL)
8173 init_get_bits(&s->gb, ptr, bit_length);
8177 init_get_bits(&s->gb, ptr, bit_length);
8178 decode_seq_parameter_set(h);
8180 if(s->flags& CODEC_FLAG_LOW_DELAY)
8183 if(avctx->has_b_frames < 2)
8184 avctx->has_b_frames= !s->low_delay;
8187 init_get_bits(&s->gb, ptr, bit_length);
8189 decode_picture_parameter_set(h, bit_length);
8193 case NAL_END_SEQUENCE:
8194 case NAL_END_STREAM:
8195 case NAL_FILLER_DATA:
8197 case NAL_AUXILIARY_SLICE:
8200 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
8204 if(!s->current_picture_ptr) return buf_index; //no frame
8206 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
8207 s->current_picture_ptr->pict_type= s->pict_type;
8209 h->prev_frame_num_offset= h->frame_num_offset;
8210 h->prev_frame_num= h->frame_num;
8211 if(s->current_picture_ptr->reference){
8212 h->prev_poc_msb= h->poc_msb;
8213 h->prev_poc_lsb= h->poc_lsb;
8215 if(s->current_picture_ptr->reference)
8216 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
8226 * returns the number of bytes consumed for building the current frame
8228 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
8229 if(s->flags&CODEC_FLAG_TRUNCATED){
8230 pos -= s->parse_context.last_index;
8231 if(pos<0) pos=0; // FIXME remove (unneeded?)
8235 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
8236 if(pos+10>buf_size) pos=buf_size; // oops ;)
8242 static int decode_frame(AVCodecContext *avctx,
8243 void *data, int *data_size,
8244 uint8_t *buf, int buf_size)
8246 H264Context *h = avctx->priv_data;
8247 MpegEncContext *s = &h->s;
8248 AVFrame *pict = data;
8251 s->flags= avctx->flags;
8252 s->flags2= avctx->flags2;
8254 /* no supplementary picture */
8255 if (buf_size == 0) {
8259 if(s->flags&CODEC_FLAG_TRUNCATED){
8260 int next= find_frame_end(h, buf, buf_size);
8262 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
8264 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
8267 if(h->is_avc && !h->got_avcC) {
8268 int i, cnt, nalsize;
8269 unsigned char *p = avctx->extradata;
8270 if(avctx->extradata_size < 7) {
8271 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
8275 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
8278 /* sps and pps in the avcC always have length coded with 2 bytes,
8279 so put a fake nal_length_size = 2 while parsing them */
8280 h->nal_length_size = 2;
8281 // Decode sps from avcC
8282 cnt = *(p+5) & 0x1f; // Number of sps
8284 for (i = 0; i < cnt; i++) {
8285 nalsize = BE_16(p) + 2;
8286 if(decode_nal_units(h, p, nalsize) < 0) {
8287 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
8292 // Decode pps from avcC
8293 cnt = *(p++); // Number of pps
8294 for (i = 0; i < cnt; i++) {
8295 nalsize = BE_16(p) + 2;
8296 if(decode_nal_units(h, p, nalsize) != nalsize) {
8297 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
8302 // Now store right nal length size, that will be use to parse all other nals
8303 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
8304 // Do not reparse avcC
8308 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
8309 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
8313 buf_index=decode_nal_units(h, buf, buf_size);
8317 //FIXME do something with unavailable reference frames
8319 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
8320 if(!s->current_picture_ptr){
8321 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
8326 Picture *out = s->current_picture_ptr;
8327 #if 0 //decode order
8328 *data_size = sizeof(AVFrame);
8330 /* Sort B-frames into display order */
8331 Picture *cur = s->current_picture_ptr;
8332 Picture *prev = h->delayed_output_pic;
8333 int i, pics, cross_idr, out_of_order, out_idx;
8335 if(h->sps.bitstream_restriction_flag
8336 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
8337 s->avctx->has_b_frames = h->sps.num_reorder_frames;
8342 while(h->delayed_pic[pics]) pics++;
8343 h->delayed_pic[pics++] = cur;
8344 if(cur->reference == 0)
8348 for(i=0; h->delayed_pic[i]; i++)
8349 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
8352 out = h->delayed_pic[0];
8354 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8355 if(h->delayed_pic[i]->poc < out->poc){
8356 out = h->delayed_pic[i];
8360 out_of_order = !cross_idr && prev && out->poc < prev->poc;
8361 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
8363 else if(prev && pics <= s->avctx->has_b_frames)
8365 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8367 ((!cross_idr && prev && out->poc > prev->poc + 2)
8368 || cur->pict_type == B_TYPE)))
8371 s->avctx->has_b_frames++;
8374 else if(out_of_order)
8377 if(out_of_order || pics > s->avctx->has_b_frames){
8378 for(i=out_idx; h->delayed_pic[i]; i++)
8379 h->delayed_pic[i] = h->delayed_pic[i+1];
8385 *data_size = sizeof(AVFrame);
8386 if(prev && prev != out && prev->reference == 1)
8387 prev->reference = 0;
8388 h->delayed_output_pic = out;
8392 *pict= *(AVFrame*)out;
8394 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8397 assert(pict->data[0] || !*data_size);
8398 ff_print_debug_info(s, pict);
8399 //printf("out %d\n", (int)pict->data[0]);
8402 /* Return the Picture timestamp as the frame number */
8403 /* we substract 1 because it is added on utils.c */
8404 avctx->frame_number = s->picture_number - 1;
8406 return get_consumed_bytes(s, buf_index, buf_size);
8409 static inline void fill_mb_avail(H264Context *h){
8410 MpegEncContext * const s = &h->s;
8411 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8414 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8415 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8416 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8422 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8423 h->mb_avail[4]= 1; //FIXME move out
8424 h->mb_avail[5]= 0; //FIXME move out
8430 #define SIZE (COUNT*40)
8436 // int int_temp[10000];
8438 AVCodecContext avctx;
8440 dsputil_init(&dsp, &avctx);
8442 init_put_bits(&pb, temp, SIZE);
8443 printf("testing unsigned exp golomb\n");
8444 for(i=0; i<COUNT; i++){
8446 set_ue_golomb(&pb, i);
8447 STOP_TIMER("set_ue_golomb");
8449 flush_put_bits(&pb);
8451 init_get_bits(&gb, temp, 8*SIZE);
8452 for(i=0; i<COUNT; i++){
8455 s= show_bits(&gb, 24);
8458 j= get_ue_golomb(&gb);
8460 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8463 STOP_TIMER("get_ue_golomb");
8467 init_put_bits(&pb, temp, SIZE);
8468 printf("testing signed exp golomb\n");
8469 for(i=0; i<COUNT; i++){
8471 set_se_golomb(&pb, i - COUNT/2);
8472 STOP_TIMER("set_se_golomb");
8474 flush_put_bits(&pb);
8476 init_get_bits(&gb, temp, 8*SIZE);
8477 for(i=0; i<COUNT; i++){
8480 s= show_bits(&gb, 24);
8483 j= get_se_golomb(&gb);
8484 if(j != i - COUNT/2){
8485 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8488 STOP_TIMER("get_se_golomb");
8491 printf("testing 4x4 (I)DCT\n");
8494 uint8_t src[16], ref[16];
8495 uint64_t error= 0, max_error=0;
8497 for(i=0; i<COUNT; i++){
8499 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8500 for(j=0; j<16; j++){
8501 ref[j]= random()%255;
8502 src[j]= random()%255;
8505 h264_diff_dct_c(block, src, ref, 4);
8508 for(j=0; j<16; j++){
8509 // printf("%d ", block[j]);
8510 block[j]= block[j]*4;
8511 if(j&1) block[j]= (block[j]*4 + 2)/5;
8512 if(j&4) block[j]= (block[j]*4 + 2)/5;
8516 s->dsp.h264_idct_add(ref, block, 4);
8517 /* for(j=0; j<16; j++){
8518 printf("%d ", ref[j]);
8522 for(j=0; j<16; j++){
8523 int diff= FFABS(src[j] - ref[j]);
8526 max_error= FFMAX(max_error, diff);
8529 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8531 printf("testing quantizer\n");
8532 for(qp=0; qp<52; qp++){
8534 src1_block[i]= src2_block[i]= random()%255;
8538 printf("Testing NAL layer\n");
8540 uint8_t bitstream[COUNT];
8541 uint8_t nal[COUNT*2];
8543 memset(&h, 0, sizeof(H264Context));
8545 for(i=0; i<COUNT; i++){
8553 for(j=0; j<COUNT; j++){
8554 bitstream[j]= (random() % 255) + 1;
8557 for(j=0; j<zeros; j++){
8558 int pos= random() % COUNT;
8559 while(bitstream[pos] == 0){
8568 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8570 printf("encoding failed\n");
8574 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8578 if(out_length != COUNT){
8579 printf("incorrect length %d %d\n", out_length, COUNT);
8583 if(consumed != nal_length){
8584 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8588 if(memcmp(bitstream, out, COUNT)){
8589 printf("missmatch\n");
8594 printf("Testing RBSP\n");
8602 static int decode_end(AVCodecContext *avctx)
8604 H264Context *h = avctx->priv_data;
8605 MpegEncContext *s = &h->s;
8607 av_freep(&h->rbsp_buffer);
8608 free_tables(h); //FIXME cleanup init stuff perhaps
8611 // memset(h, 0, sizeof(H264Context));
8617 AVCodec h264_decoder = {
8621 sizeof(H264Context),
8626 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8630 #ifdef CONFIG_H264_PARSER
8631 AVCodecParser h264_parser = {
8633 sizeof(H264Context),