2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * H.264 / AVC / MPEG4 part10 codec.
26 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
41 #define interlaced_dct interlaced_dct_is_a_bad_name
42 #define mb_intra mb_intra_isnt_initalized_see_mb_type
44 #define LUMA_DC_BLOCK_INDEX 25
45 #define CHROMA_DC_BLOCK_INDEX 26
47 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
48 #define COEFF_TOKEN_VLC_BITS 8
49 #define TOTAL_ZEROS_VLC_BITS 9
50 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
51 #define RUN_VLC_BITS 3
52 #define RUN7_VLC_BITS 6
54 #define MAX_SPS_COUNT 32
55 #define MAX_PPS_COUNT 256
57 #define MAX_MMCO_COUNT 66
59 /* Compiling in interlaced support reduces the speed
60 * of progressive decoding by about 2%. */
61 #define ALLOW_INTERLACE
63 #ifdef ALLOW_INTERLACE
64 #define MB_MBAFF h->mb_mbaff
65 #define MB_FIELD h->mb_field_decoding_flag
66 #define FRAME_MBAFF h->mb_aff_frame
72 #define IS_INTERLACED(mb_type) 0
76 * Sequence parameter set
82 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
83 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
84 int poc_type; ///< pic_order_cnt_type
85 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
86 int delta_pic_order_always_zero_flag;
87 int offset_for_non_ref_pic;
88 int offset_for_top_to_bottom_field;
89 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
90 int ref_frame_count; ///< num_ref_frames
91 int gaps_in_frame_num_allowed_flag;
92 int mb_width; ///< frame_width_in_mbs_minus1 + 1
93 int mb_height; ///< frame_height_in_mbs_minus1 + 1
94 int frame_mbs_only_flag;
95 int mb_aff; ///<mb_adaptive_frame_field_flag
96 int direct_8x8_inference_flag;
97 int crop; ///< frame_cropping_flag
98 int crop_left; ///< frame_cropping_rect_left_offset
99 int crop_right; ///< frame_cropping_rect_right_offset
100 int crop_top; ///< frame_cropping_rect_top_offset
101 int crop_bottom; ///< frame_cropping_rect_bottom_offset
102 int vui_parameters_present_flag;
104 int timing_info_present_flag;
105 uint32_t num_units_in_tick;
107 int fixed_frame_rate_flag;
108 short offset_for_ref_frame[256]; //FIXME dyn aloc?
109 int bitstream_restriction_flag;
110 int num_reorder_frames;
111 int scaling_matrix_present;
112 uint8_t scaling_matrix4[6][16];
113 uint8_t scaling_matrix8[2][64];
117 * Picture parameter set
121 int cabac; ///< entropy_coding_mode_flag
122 int pic_order_present; ///< pic_order_present_flag
123 int slice_group_count; ///< num_slice_groups_minus1 + 1
124 int mb_slice_group_map_type;
125 unsigned int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
126 int weighted_pred; ///< weighted_pred_flag
127 int weighted_bipred_idc;
128 int init_qp; ///< pic_init_qp_minus26 + 26
129 int init_qs; ///< pic_init_qs_minus26 + 26
130 int chroma_qp_index_offset;
131 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
132 int constrained_intra_pred; ///< constrained_intra_pred_flag
133 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
134 int transform_8x8_mode; ///< transform_8x8_mode_flag
135 uint8_t scaling_matrix4[6][16];
136 uint8_t scaling_matrix8[2][64];
140 * Memory management control operation opcode.
142 typedef enum MMCOOpcode{
153 * Memory management control operation.
164 typedef struct H264Context{
168 uint8_t *rbsp_buffer;
169 unsigned int rbsp_buffer_size;
172 * Used to parse AVC variant of h264
174 int is_avc; ///< this flag is != 0 if codec is avc1
175 int got_avcC; ///< flag used to parse avcC data only once
176 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
184 int chroma_pred_mode;
185 int intra16x16_pred_mode;
190 int8_t intra4x4_pred_mode_cache[5*8];
191 int8_t (*intra4x4_pred_mode)[8];
192 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
193 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
194 void (*pred8x8 [4+3])(uint8_t *src, int stride);
195 void (*pred16x16[4+3])(uint8_t *src, int stride);
196 unsigned int topleft_samples_available;
197 unsigned int top_samples_available;
198 unsigned int topright_samples_available;
199 unsigned int left_samples_available;
200 uint8_t (*top_borders[2])[16+2*8];
201 uint8_t left_border[2*(17+2*9)];
204 * non zero coeff count cache.
205 * is 64 if not available.
207 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
208 uint8_t (*non_zero_count)[16];
211 * Motion vector cache.
213 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
214 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
215 #define LIST_NOT_USED -1 //FIXME rename?
216 #define PART_NOT_AVAILABLE -2
219 * is 1 if the specific list MV&references are set to 0,0,-2.
221 int mv_cache_clean[2];
224 * number of neighbors (top and/or left) that used 8x8 dct
226 int neighbor_transform_size;
229 * block_offset[ 0..23] for frame macroblocks
230 * block_offset[24..47] for field macroblocks
232 int block_offset[2*(16+8)];
234 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
236 int b_stride; //FIXME use s->b4_stride
239 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff
248 int unknown_svq3_flag;
249 int next_slice_index;
251 SPS sps_buffer[MAX_SPS_COUNT];
252 SPS sps; ///< current sps
254 PPS pps_buffer[MAX_PPS_COUNT];
258 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
260 uint32_t dequant4_buffer[6][52][16];
261 uint32_t dequant8_buffer[2][52][64];
262 uint32_t (*dequant4_coeff[6])[16];
263 uint32_t (*dequant8_coeff[2])[64];
264 int dequant_coeff_pps; ///< reinit tables when pps changes
267 uint8_t *slice_table_base;
268 uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
270 int slice_type_fixed;
272 //interlacing specific flags
274 int mb_field_decoding_flag;
275 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
277 unsigned int sub_mb_type[4];
282 int delta_poc_bottom;
285 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
286 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
287 int frame_num_offset; ///< for POC type 2
288 int prev_frame_num_offset; ///< for POC type 2
289 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
292 * frame_num for frames or 2*frame_num for field pics.
297 * max_frame_num or 2*max_frame_num for field pics.
301 //Weighted pred stuff
303 int use_weight_chroma;
304 int luma_log2_weight_denom;
305 int chroma_log2_weight_denom;
306 int luma_weight[2][48];
307 int luma_offset[2][48];
308 int chroma_weight[2][48][2];
309 int chroma_offset[2][48][2];
310 int implicit_weight[48][48];
313 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
314 int slice_alpha_c0_offset;
315 int slice_beta_offset;
317 int redundant_pic_count;
319 int direct_spatial_mv_pred;
320 int dist_scale_factor[16];
321 int dist_scale_factor_field[32];
322 int map_col_to_list0[2][16];
323 int map_col_to_list0_field[2][32];
326 * num_ref_idx_l0/1_active_minus1 + 1
328 unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode
329 Picture *short_ref[32];
330 Picture *long_ref[32];
331 Picture default_ref_list[2][32];
332 Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs
333 Picture *delayed_pic[18]; //FIXME size?
334 Picture *delayed_output_pic;
337 * memory management control operations buffer.
339 MMCO mmco[MAX_MMCO_COUNT];
342 int long_ref_count; ///< number of actual long term references
343 int short_ref_count; ///< number of actual short term references
346 GetBitContext intra_gb;
347 GetBitContext inter_gb;
348 GetBitContext *intra_gb_ptr;
349 GetBitContext *inter_gb_ptr;
351 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
352 DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not to large or ensure that there is some unused stuff after mb
358 uint8_t cabac_state[460];
361 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
366 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
367 uint8_t *chroma_pred_mode_table;
368 int last_qscale_diff;
369 int16_t (*mvd_table[2])[2];
370 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
371 uint8_t *direct_table;
372 uint8_t direct_cache[5*8];
374 uint8_t zigzag_scan[16];
375 uint8_t zigzag_scan8x8[64];
376 uint8_t zigzag_scan8x8_cavlc[64];
377 uint8_t field_scan[16];
378 uint8_t field_scan8x8[64];
379 uint8_t field_scan8x8_cavlc[64];
380 const uint8_t *zigzag_scan_q0;
381 const uint8_t *zigzag_scan8x8_q0;
382 const uint8_t *zigzag_scan8x8_cavlc_q0;
383 const uint8_t *field_scan_q0;
384 const uint8_t *field_scan8x8_q0;
385 const uint8_t *field_scan8x8_cavlc_q0;
390 static VLC coeff_token_vlc[4];
391 static VLC chroma_dc_coeff_token_vlc;
393 static VLC total_zeros_vlc[15];
394 static VLC chroma_dc_total_zeros_vlc[3];
396 static VLC run_vlc[6];
399 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
400 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
401 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
402 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
404 static av_always_inline uint32_t pack16to32(int a, int b){
405 #ifdef WORDS_BIGENDIAN
406 return (b&0xFFFF) + (a<<16);
408 return (a&0xFFFF) + (b<<16);
412 const uint8_t ff_rem6[52]={
413 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
416 const uint8_t ff_div6[52]={
417 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
423 * @param h height of the rectangle, should be a constant
424 * @param w width of the rectangle, should be a constant
425 * @param size the size of val (1 or 4), should be a constant
427 static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
428 uint8_t *p= (uint8_t*)vp;
429 assert(size==1 || size==4);
435 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
436 assert((stride&(w-1))==0);
438 const uint16_t v= size==4 ? val : val*0x0101;
439 *(uint16_t*)(p + 0*stride)= v;
441 *(uint16_t*)(p + 1*stride)= v;
443 *(uint16_t*)(p + 2*stride)=
444 *(uint16_t*)(p + 3*stride)= v;
446 const uint32_t v= size==4 ? val : val*0x01010101;
447 *(uint32_t*)(p + 0*stride)= v;
449 *(uint32_t*)(p + 1*stride)= v;
451 *(uint32_t*)(p + 2*stride)=
452 *(uint32_t*)(p + 3*stride)= v;
454 //gcc can't optimize 64bit math on x86_32
455 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
456 const uint64_t v= val*0x0100000001ULL;
457 *(uint64_t*)(p + 0*stride)= v;
459 *(uint64_t*)(p + 1*stride)= v;
461 *(uint64_t*)(p + 2*stride)=
462 *(uint64_t*)(p + 3*stride)= v;
464 const uint64_t v= val*0x0100000001ULL;
465 *(uint64_t*)(p + 0+0*stride)=
466 *(uint64_t*)(p + 8+0*stride)=
467 *(uint64_t*)(p + 0+1*stride)=
468 *(uint64_t*)(p + 8+1*stride)= v;
470 *(uint64_t*)(p + 0+2*stride)=
471 *(uint64_t*)(p + 8+2*stride)=
472 *(uint64_t*)(p + 0+3*stride)=
473 *(uint64_t*)(p + 8+3*stride)= v;
475 *(uint32_t*)(p + 0+0*stride)=
476 *(uint32_t*)(p + 4+0*stride)= val;
478 *(uint32_t*)(p + 0+1*stride)=
479 *(uint32_t*)(p + 4+1*stride)= val;
481 *(uint32_t*)(p + 0+2*stride)=
482 *(uint32_t*)(p + 4+2*stride)=
483 *(uint32_t*)(p + 0+3*stride)=
484 *(uint32_t*)(p + 4+3*stride)= val;
486 *(uint32_t*)(p + 0+0*stride)=
487 *(uint32_t*)(p + 4+0*stride)=
488 *(uint32_t*)(p + 8+0*stride)=
489 *(uint32_t*)(p +12+0*stride)=
490 *(uint32_t*)(p + 0+1*stride)=
491 *(uint32_t*)(p + 4+1*stride)=
492 *(uint32_t*)(p + 8+1*stride)=
493 *(uint32_t*)(p +12+1*stride)= val;
495 *(uint32_t*)(p + 0+2*stride)=
496 *(uint32_t*)(p + 4+2*stride)=
497 *(uint32_t*)(p + 8+2*stride)=
498 *(uint32_t*)(p +12+2*stride)=
499 *(uint32_t*)(p + 0+3*stride)=
500 *(uint32_t*)(p + 4+3*stride)=
501 *(uint32_t*)(p + 8+3*stride)=
502 *(uint32_t*)(p +12+3*stride)= val;
509 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
510 MpegEncContext * const s = &h->s;
511 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
512 int topleft_xy, top_xy, topright_xy, left_xy[2];
513 int topleft_type, top_type, topright_type, left_type[2];
517 //FIXME deblocking could skip the intra and nnz parts.
518 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
521 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
523 top_xy = mb_xy - s->mb_stride;
524 topleft_xy = top_xy - 1;
525 topright_xy= top_xy + 1;
526 left_xy[1] = left_xy[0] = mb_xy-1;
536 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
537 const int top_pair_xy = pair_xy - s->mb_stride;
538 const int topleft_pair_xy = top_pair_xy - 1;
539 const int topright_pair_xy = top_pair_xy + 1;
540 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
541 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
542 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
543 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
544 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
545 const int bottom = (s->mb_y & 1);
546 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
548 ? !curr_mb_frame_flag // bottom macroblock
549 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
551 top_xy -= s->mb_stride;
554 ? !curr_mb_frame_flag // bottom macroblock
555 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
557 topleft_xy -= s->mb_stride;
560 ? !curr_mb_frame_flag // bottom macroblock
561 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
563 topright_xy -= s->mb_stride;
565 if (left_mb_frame_flag != curr_mb_frame_flag) {
566 left_xy[1] = left_xy[0] = pair_xy - 1;
567 if (curr_mb_frame_flag) {
588 left_xy[1] += s->mb_stride;
601 h->top_mb_xy = top_xy;
602 h->left_mb_xy[0] = left_xy[0];
603 h->left_mb_xy[1] = left_xy[1];
607 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
608 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
609 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
611 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
613 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
615 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
616 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
617 if(USES_LIST(mb_type,list)){
618 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
619 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
620 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
621 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
627 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
628 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
630 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
631 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
633 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
634 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
639 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
640 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
641 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
642 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
643 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
646 if(IS_INTRA(mb_type)){
647 h->topleft_samples_available=
648 h->top_samples_available=
649 h->left_samples_available= 0xFFFF;
650 h->topright_samples_available= 0xEEEA;
652 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
653 h->topleft_samples_available= 0xB3FF;
654 h->top_samples_available= 0x33FF;
655 h->topright_samples_available= 0x26EA;
658 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
659 h->topleft_samples_available&= 0xDF5F;
660 h->left_samples_available&= 0x5F5F;
664 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
665 h->topleft_samples_available&= 0x7FFF;
667 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
668 h->topright_samples_available&= 0xFBFF;
670 if(IS_INTRA4x4(mb_type)){
671 if(IS_INTRA4x4(top_type)){
672 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
673 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
674 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
675 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
678 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
683 h->intra4x4_pred_mode_cache[4+8*0]=
684 h->intra4x4_pred_mode_cache[5+8*0]=
685 h->intra4x4_pred_mode_cache[6+8*0]=
686 h->intra4x4_pred_mode_cache[7+8*0]= pred;
689 if(IS_INTRA4x4(left_type[i])){
690 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
691 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
694 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
699 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
700 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
715 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
717 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
718 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
719 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
720 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
722 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
723 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
725 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
726 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
729 h->non_zero_count_cache[4+8*0]=
730 h->non_zero_count_cache[5+8*0]=
731 h->non_zero_count_cache[6+8*0]=
732 h->non_zero_count_cache[7+8*0]=
734 h->non_zero_count_cache[1+8*0]=
735 h->non_zero_count_cache[2+8*0]=
737 h->non_zero_count_cache[1+8*3]=
738 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
742 for (i=0; i<2; i++) {
744 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
745 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
746 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
747 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
749 h->non_zero_count_cache[3+8*1 + 2*8*i]=
750 h->non_zero_count_cache[3+8*2 + 2*8*i]=
751 h->non_zero_count_cache[0+8*1 + 8*i]=
752 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
759 h->top_cbp = h->cbp_table[top_xy];
760 } else if(IS_INTRA(mb_type)) {
767 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
768 } else if(IS_INTRA(mb_type)) {
774 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
777 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
782 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
784 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
785 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
786 /*if(!h->mv_cache_clean[list]){
787 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
788 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
789 h->mv_cache_clean[list]= 1;
793 h->mv_cache_clean[list]= 0;
795 if(USES_LIST(top_type, list)){
796 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
797 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
798 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
799 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
800 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
801 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
802 h->ref_cache[list][scan8[0] + 0 - 1*8]=
803 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
804 h->ref_cache[list][scan8[0] + 2 - 1*8]=
805 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
807 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
808 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
809 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
810 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
811 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
814 //FIXME unify cleanup or sth
815 if(USES_LIST(left_type[0], list)){
816 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
817 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
818 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
819 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
820 h->ref_cache[list][scan8[0] - 1 + 0*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
821 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1]>>1)];
823 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
824 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
825 h->ref_cache[list][scan8[0] - 1 + 0*8]=
826 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
829 if(USES_LIST(left_type[1], list)){
830 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
831 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
832 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
833 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
834 h->ref_cache[list][scan8[0] - 1 + 2*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
835 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[3]>>1)];
837 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
838 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
839 h->ref_cache[list][scan8[0] - 1 + 2*8]=
840 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
841 assert((!left_type[0]) == (!left_type[1]));
844 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
847 if(USES_LIST(topleft_type, list)){
848 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
849 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
850 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
851 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
853 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
854 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
857 if(USES_LIST(topright_type, list)){
858 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
859 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
860 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
861 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
863 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
864 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
867 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
870 h->ref_cache[list][scan8[5 ]+1] =
871 h->ref_cache[list][scan8[7 ]+1] =
872 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
873 h->ref_cache[list][scan8[4 ]] =
874 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
875 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
876 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
877 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
878 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
879 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
882 /* XXX beurk, Load mvd */
883 if(USES_LIST(top_type, list)){
884 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
885 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
886 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
887 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
888 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
890 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
891 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
892 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
893 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
895 if(USES_LIST(left_type[0], list)){
896 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
897 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
898 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
900 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
901 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
903 if(USES_LIST(left_type[1], list)){
904 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
905 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
906 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
908 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
909 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
911 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
912 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
913 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
914 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
915 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
917 if(h->slice_type == B_TYPE){
918 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
920 if(IS_DIRECT(top_type)){
921 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
922 }else if(IS_8X8(top_type)){
923 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
924 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
925 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
927 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
930 if(IS_DIRECT(left_type[0]))
931 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
932 else if(IS_8X8(left_type[0]))
933 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
935 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
937 if(IS_DIRECT(left_type[1]))
938 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
939 else if(IS_8X8(left_type[1]))
940 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
942 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
948 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
949 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
950 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
951 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
952 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
953 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
954 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
955 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
956 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
957 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
959 #define MAP_F2F(idx, mb_type)\
960 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
961 h->ref_cache[list][idx] <<= 1;\
962 h->mv_cache[list][idx][1] /= 2;\
963 h->mvd_cache[list][idx][1] /= 2;\
968 #define MAP_F2F(idx, mb_type)\
969 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
970 h->ref_cache[list][idx] >>= 1;\
971 h->mv_cache[list][idx][1] <<= 1;\
972 h->mvd_cache[list][idx][1] <<= 1;\
982 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
985 static inline void write_back_intra_pred_mode(H264Context *h){
986 MpegEncContext * const s = &h->s;
987 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
989 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
990 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
991 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
992 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
993 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
994 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
995 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
999 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1001 static inline int check_intra4x4_pred_mode(H264Context *h){
1002 MpegEncContext * const s = &h->s;
1003 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
1004 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
1007 if(!(h->top_samples_available&0x8000)){
1009 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
1011 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1014 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
1019 if(!(h->left_samples_available&0x8000)){
1021 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
1023 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1026 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
1032 } //FIXME cleanup like next
1035 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1037 static inline int check_intra_pred_mode(H264Context *h, int mode){
1038 MpegEncContext * const s = &h->s;
1039 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
1040 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
1042 if(mode < 0 || mode > 6) {
1043 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
1047 if(!(h->top_samples_available&0x8000)){
1050 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1055 if(!(h->left_samples_available&0x8000)){
1058 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1067 * gets the predicted intra4x4 prediction mode.
1069 static inline int pred_intra_mode(H264Context *h, int n){
1070 const int index8= scan8[n];
1071 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
1072 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
1073 const int min= FFMIN(left, top);
1075 tprintf("mode:%d %d min:%d\n", left ,top, min);
1077 if(min<0) return DC_PRED;
1081 static inline void write_back_non_zero_count(H264Context *h){
1082 MpegEncContext * const s = &h->s;
1083 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1085 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
1086 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
1087 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
1088 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
1089 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
1090 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
1091 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
1093 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
1094 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
1095 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
1097 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1098 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1099 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1102 // store all luma nnzs, for deblocking
1105 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
1106 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
1111 * gets the predicted number of non zero coefficients.
1112 * @param n block index
1114 static inline int pred_non_zero_count(H264Context *h, int n){
1115 const int index8= scan8[n];
1116 const int left= h->non_zero_count_cache[index8 - 1];
1117 const int top = h->non_zero_count_cache[index8 - 8];
1120 if(i<64) i= (i+1)>>1;
1122 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1127 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1128 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1130 /* there is no consistent mapping of mvs to neighboring locations that will
1131 * make mbaff happy, so we can't move all this logic to fill_caches */
1133 MpegEncContext *s = &h->s;
1134 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
1136 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
1137 *C = h->mv_cache[list][scan8[0]-2];
1140 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
1141 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
1142 if(IS_INTERLACED(mb_types[topright_xy])){
1143 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
1144 const int x4 = X4, y4 = Y4;\
1145 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
1146 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
1147 return LIST_NOT_USED;\
1148 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
1149 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
1150 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
1151 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
1153 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
1156 if(topright_ref == PART_NOT_AVAILABLE
1157 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
1158 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
1160 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
1161 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
1164 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
1165 && i >= scan8[0]+8){
1166 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
1167 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
1173 if(topright_ref != PART_NOT_AVAILABLE){
1174 *C= h->mv_cache[list][ i - 8 + part_width ];
1175 return topright_ref;
1177 tprintf("topright MV not available\n");
1179 *C= h->mv_cache[list][ i - 8 - 1 ];
1180 return h->ref_cache[list][ i - 8 - 1 ];
1185 * gets the predicted MV.
1186 * @param n the block index
1187 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1188 * @param mx the x component of the predicted motion vector
1189 * @param my the y component of the predicted motion vector
1191 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1192 const int index8= scan8[n];
1193 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1194 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1195 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1196 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1198 int diagonal_ref, match_count;
1200 assert(part_width==1 || part_width==2 || part_width==4);
1210 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1211 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1212 tprintf("pred_motion match_count=%d\n", match_count);
1213 if(match_count > 1){ //most common
1214 *mx= mid_pred(A[0], B[0], C[0]);
1215 *my= mid_pred(A[1], B[1], C[1]);
1216 }else if(match_count==1){
1220 }else if(top_ref==ref){
1228 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1232 *mx= mid_pred(A[0], B[0], C[0]);
1233 *my= mid_pred(A[1], B[1], C[1]);
1237 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1241 * gets the directionally predicted 16x8 MV.
1242 * @param n the block index
1243 * @param mx the x component of the predicted motion vector
1244 * @param my the y component of the predicted motion vector
1246 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1248 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1249 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1251 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1259 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1260 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1262 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1264 if(left_ref == ref){
1272 pred_motion(h, n, 4, list, ref, mx, my);
1276 * gets the directionally predicted 8x16 MV.
1277 * @param n the block index
1278 * @param mx the x component of the predicted motion vector
1279 * @param my the y component of the predicted motion vector
1281 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1283 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1284 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1286 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1288 if(left_ref == ref){
1297 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1299 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1301 if(diagonal_ref == ref){
1309 pred_motion(h, n, 2, list, ref, mx, my);
1312 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1313 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1314 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1316 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1318 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1319 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1320 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1326 pred_motion(h, 0, 4, 0, 0, mx, my);
1331 static inline void direct_dist_scale_factor(H264Context * const h){
1332 const int poc = h->s.current_picture_ptr->poc;
1333 const int poc1 = h->ref_list[1][0].poc;
1335 for(i=0; i<h->ref_count[0]; i++){
1336 int poc0 = h->ref_list[0][i].poc;
1337 int td = clip(poc1 - poc0, -128, 127);
1338 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1339 h->dist_scale_factor[i] = 256;
1341 int tb = clip(poc - poc0, -128, 127);
1342 int tx = (16384 + (FFABS(td) >> 1)) / td;
1343 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1347 for(i=0; i<h->ref_count[0]; i++){
1348 h->dist_scale_factor_field[2*i] =
1349 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
1353 static inline void direct_ref_list_init(H264Context * const h){
1354 MpegEncContext * const s = &h->s;
1355 Picture * const ref1 = &h->ref_list[1][0];
1356 Picture * const cur = s->current_picture_ptr;
1358 if(cur->pict_type == I_TYPE)
1359 cur->ref_count[0] = 0;
1360 if(cur->pict_type != B_TYPE)
1361 cur->ref_count[1] = 0;
1362 for(list=0; list<2; list++){
1363 cur->ref_count[list] = h->ref_count[list];
1364 for(j=0; j<h->ref_count[list]; j++)
1365 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1367 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1369 for(list=0; list<2; list++){
1370 for(i=0; i<ref1->ref_count[list]; i++){
1371 const int poc = ref1->ref_poc[list][i];
1372 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1373 for(j=0; j<h->ref_count[list]; j++)
1374 if(h->ref_list[list][j].poc == poc){
1375 h->map_col_to_list0[list][i] = j;
1381 for(list=0; list<2; list++){
1382 for(i=0; i<ref1->ref_count[list]; i++){
1383 j = h->map_col_to_list0[list][i];
1384 h->map_col_to_list0_field[list][2*i] = 2*j;
1385 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1391 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1392 MpegEncContext * const s = &h->s;
1393 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1394 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1395 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1396 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1397 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1398 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1399 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1400 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1401 const int is_b8x8 = IS_8X8(*mb_type);
1402 unsigned int sub_mb_type;
1405 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1406 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1407 /* FIXME save sub mb types from previous frames (or derive from MVs)
1408 * so we know exactly what block size to use */
1409 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1410 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1411 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1412 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1413 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1415 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1416 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1419 *mb_type |= MB_TYPE_DIRECT2;
1421 *mb_type |= MB_TYPE_INTERLACED;
1423 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1425 if(h->direct_spatial_mv_pred){
1430 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1432 /* ref = min(neighbors) */
1433 for(list=0; list<2; list++){
1434 int refa = h->ref_cache[list][scan8[0] - 1];
1435 int refb = h->ref_cache[list][scan8[0] - 8];
1436 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1438 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1440 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1442 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1448 if(ref[0] < 0 && ref[1] < 0){
1449 ref[0] = ref[1] = 0;
1450 mv[0][0] = mv[0][1] =
1451 mv[1][0] = mv[1][1] = 0;
1453 for(list=0; list<2; list++){
1455 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1457 mv[list][0] = mv[list][1] = 0;
1462 *mb_type &= ~MB_TYPE_P0L1;
1463 sub_mb_type &= ~MB_TYPE_P0L1;
1464 }else if(ref[0] < 0){
1465 *mb_type &= ~MB_TYPE_P0L0;
1466 sub_mb_type &= ~MB_TYPE_P0L0;
1469 if(IS_16X16(*mb_type)){
1470 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1471 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1472 if(!IS_INTRA(mb_type_col)
1473 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1474 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1475 && (h->x264_build>33 || !h->x264_build)))){
1477 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1479 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1481 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1483 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1485 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1486 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1489 for(i8=0; i8<4; i8++){
1490 const int x8 = i8&1;
1491 const int y8 = i8>>1;
1493 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1495 h->sub_mb_type[i8] = sub_mb_type;
1497 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1498 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1499 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1500 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1503 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1504 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1505 && (h->x264_build>33 || !h->x264_build)))){
1506 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1507 if(IS_SUB_8X8(sub_mb_type)){
1508 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1509 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1511 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1513 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1516 for(i4=0; i4<4; i4++){
1517 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1518 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1520 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1522 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1528 }else{ /* direct temporal mv pred */
1529 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1530 const int *dist_scale_factor = h->dist_scale_factor;
1533 if(IS_INTERLACED(*mb_type)){
1534 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1535 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1536 dist_scale_factor = h->dist_scale_factor_field;
1538 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1539 /* FIXME assumes direct_8x8_inference == 1 */
1540 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1541 int mb_types_col[2];
1544 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1545 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1546 | (*mb_type & MB_TYPE_INTERLACED);
1547 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1549 if(IS_INTERLACED(*mb_type)){
1550 /* frame to field scaling */
1551 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1552 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1554 l1ref0 -= 2*h->b8_stride;
1555 l1ref1 -= 2*h->b8_stride;
1556 l1mv0 -= 4*h->b_stride;
1557 l1mv1 -= 4*h->b_stride;
1561 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1562 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1564 *mb_type |= MB_TYPE_16x8;
1566 *mb_type |= MB_TYPE_8x8;
1568 /* field to frame scaling */
1569 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1570 * but in MBAFF, top and bottom POC are equal */
1571 int dy = (s->mb_y&1) ? 1 : 2;
1573 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1574 l1ref0 += dy*h->b8_stride;
1575 l1ref1 += dy*h->b8_stride;
1576 l1mv0 += 2*dy*h->b_stride;
1577 l1mv1 += 2*dy*h->b_stride;
1580 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1582 *mb_type |= MB_TYPE_16x16;
1584 *mb_type |= MB_TYPE_8x8;
1587 for(i8=0; i8<4; i8++){
1588 const int x8 = i8&1;
1589 const int y8 = i8>>1;
1591 const int16_t (*l1mv)[2]= l1mv0;
1593 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1595 h->sub_mb_type[i8] = sub_mb_type;
1597 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1598 if(IS_INTRA(mb_types_col[y8])){
1599 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1600 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1601 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1605 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1607 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1609 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1612 scale = dist_scale_factor[ref0];
1613 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1616 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1617 int my_col = (mv_col[1]<<y_shift)/2;
1618 int mx = (scale * mv_col[0] + 128) >> 8;
1619 int my = (scale * my_col + 128) >> 8;
1620 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1621 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1628 /* one-to-one mv scaling */
1630 if(IS_16X16(*mb_type)){
1631 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1632 if(IS_INTRA(mb_type_col)){
1633 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1634 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1635 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1637 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1638 : map_col_to_list0[1][l1ref1[0]];
1639 const int scale = dist_scale_factor[ref0];
1640 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1642 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1643 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1644 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1645 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1646 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1649 for(i8=0; i8<4; i8++){
1650 const int x8 = i8&1;
1651 const int y8 = i8>>1;
1653 const int16_t (*l1mv)[2]= l1mv0;
1655 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1657 h->sub_mb_type[i8] = sub_mb_type;
1658 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1659 if(IS_INTRA(mb_type_col)){
1660 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1661 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1662 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1666 ref0 = l1ref0[x8 + y8*h->b8_stride];
1668 ref0 = map_col_to_list0[0][ref0];
1670 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1673 scale = dist_scale_factor[ref0];
1675 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1676 if(IS_SUB_8X8(sub_mb_type)){
1677 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1678 int mx = (scale * mv_col[0] + 128) >> 8;
1679 int my = (scale * mv_col[1] + 128) >> 8;
1680 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1681 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1683 for(i4=0; i4<4; i4++){
1684 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1685 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1686 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1687 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1688 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1689 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1696 static inline void write_back_motion(H264Context *h, int mb_type){
1697 MpegEncContext * const s = &h->s;
1698 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1699 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1702 if(!USES_LIST(mb_type, 0))
1703 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1705 for(list=0; list<2; list++){
1707 if(!USES_LIST(mb_type, list))
1711 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1712 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1714 if( h->pps.cabac ) {
1715 if(IS_SKIP(mb_type))
1716 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1719 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1720 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1725 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1726 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1727 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1728 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1729 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1733 if(h->slice_type == B_TYPE && h->pps.cabac){
1734 if(IS_8X8(mb_type)){
1735 uint8_t *direct_table = &h->direct_table[b8_xy];
1736 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1737 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1738 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1744 * Decodes a network abstraction layer unit.
1745 * @param consumed is the number of bytes used as input
1746 * @param length is the length of the array
1747 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1748 * @returns decoded bytes, might be src+1 if no escapes
1750 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1754 // src[0]&0x80; //forbidden bit
1755 h->nal_ref_idc= src[0]>>5;
1756 h->nal_unit_type= src[0]&0x1F;
1760 for(i=0; i<length; i++)
1761 printf("%2X ", src[i]);
1763 for(i=0; i+1<length; i+=2){
1764 if(src[i]) continue;
1765 if(i>0 && src[i-1]==0) i--;
1766 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1768 /* startcode, so we must be past the end */
1775 if(i>=length-1){ //no escaped 0
1776 *dst_length= length;
1777 *consumed= length+1; //+1 for the header
1781 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1782 dst= h->rbsp_buffer;
1788 //printf("decoding esc\n");
1791 //remove escapes (very rare 1:2^22)
1792 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1793 if(src[si+2]==3){ //escape
1798 }else //next start code
1802 dst[di++]= src[si++];
1806 *consumed= si + 1;//+1 for the header
1807 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1812 * identifies the exact end of the bitstream
1813 * @return the length of the trailing, or 0 if damaged
1815 static int decode_rbsp_trailing(uint8_t *src){
1819 tprintf("rbsp trailing %X\n", v);
1829 * idct tranforms the 16 dc values and dequantize them.
1830 * @param qp quantization parameter
1832 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1835 int temp[16]; //FIXME check if this is a good idea
1836 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1837 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1839 //memset(block, 64, 2*256);
1842 const int offset= y_offset[i];
1843 const int z0= block[offset+stride*0] + block[offset+stride*4];
1844 const int z1= block[offset+stride*0] - block[offset+stride*4];
1845 const int z2= block[offset+stride*1] - block[offset+stride*5];
1846 const int z3= block[offset+stride*1] + block[offset+stride*5];
1855 const int offset= x_offset[i];
1856 const int z0= temp[4*0+i] + temp[4*2+i];
1857 const int z1= temp[4*0+i] - temp[4*2+i];
1858 const int z2= temp[4*1+i] - temp[4*3+i];
1859 const int z3= temp[4*1+i] + temp[4*3+i];
1861 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1862 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1863 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1864 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1870 * dct tranforms the 16 dc values.
1871 * @param qp quantization parameter ??? FIXME
1873 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1874 // const int qmul= dequant_coeff[qp][0];
1876 int temp[16]; //FIXME check if this is a good idea
1877 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1878 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1881 const int offset= y_offset[i];
1882 const int z0= block[offset+stride*0] + block[offset+stride*4];
1883 const int z1= block[offset+stride*0] - block[offset+stride*4];
1884 const int z2= block[offset+stride*1] - block[offset+stride*5];
1885 const int z3= block[offset+stride*1] + block[offset+stride*5];
1894 const int offset= x_offset[i];
1895 const int z0= temp[4*0+i] + temp[4*2+i];
1896 const int z1= temp[4*0+i] - temp[4*2+i];
1897 const int z2= temp[4*1+i] - temp[4*3+i];
1898 const int z3= temp[4*1+i] + temp[4*3+i];
1900 block[stride*0 +offset]= (z0 + z3)>>1;
1901 block[stride*2 +offset]= (z1 + z2)>>1;
1902 block[stride*8 +offset]= (z1 - z2)>>1;
1903 block[stride*10+offset]= (z0 - z3)>>1;
1911 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1912 const int stride= 16*2;
1913 const int xStride= 16;
1916 a= block[stride*0 + xStride*0];
1917 b= block[stride*0 + xStride*1];
1918 c= block[stride*1 + xStride*0];
1919 d= block[stride*1 + xStride*1];
1926 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1927 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1928 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1929 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1933 static void chroma_dc_dct_c(DCTELEM *block){
1934 const int stride= 16*2;
1935 const int xStride= 16;
1938 a= block[stride*0 + xStride*0];
1939 b= block[stride*0 + xStride*1];
1940 c= block[stride*1 + xStride*0];
1941 d= block[stride*1 + xStride*1];
1948 block[stride*0 + xStride*0]= (a+c);
1949 block[stride*0 + xStride*1]= (e+b);
1950 block[stride*1 + xStride*0]= (a-c);
1951 block[stride*1 + xStride*1]= (e-b);
1956 * gets the chroma qp.
1958 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1960 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
1963 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1964 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1965 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1967 const int * const quant_table= quant_coeff[qscale];
1968 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1969 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1970 const unsigned int threshold2= (threshold1<<1);
1976 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1977 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1978 const unsigned int dc_threshold2= (dc_threshold1<<1);
1980 int level= block[0]*quant_coeff[qscale+18][0];
1981 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1983 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1986 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1989 // last_non_zero = i;
1994 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1995 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1996 const unsigned int dc_threshold2= (dc_threshold1<<1);
1998 int level= block[0]*quant_table[0];
1999 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2001 level= (dc_bias + level)>>(QUANT_SHIFT+1);
2004 level= (dc_bias - level)>>(QUANT_SHIFT+1);
2007 // last_non_zero = i;
2020 const int j= scantable[i];
2021 int level= block[j]*quant_table[j];
2023 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
2024 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
2025 if(((unsigned)(level+threshold1))>threshold2){
2027 level= (bias + level)>>QUANT_SHIFT;
2030 level= (bias - level)>>QUANT_SHIFT;
2039 return last_non_zero;
2042 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
2043 const uint32_t a= ((uint32_t*)(src-stride))[0];
2044 ((uint32_t*)(src+0*stride))[0]= a;
2045 ((uint32_t*)(src+1*stride))[0]= a;
2046 ((uint32_t*)(src+2*stride))[0]= a;
2047 ((uint32_t*)(src+3*stride))[0]= a;
2050 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
2051 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
2052 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
2053 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
2054 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
2057 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
2058 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
2059 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
2061 ((uint32_t*)(src+0*stride))[0]=
2062 ((uint32_t*)(src+1*stride))[0]=
2063 ((uint32_t*)(src+2*stride))[0]=
2064 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2067 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
2068 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
2070 ((uint32_t*)(src+0*stride))[0]=
2071 ((uint32_t*)(src+1*stride))[0]=
2072 ((uint32_t*)(src+2*stride))[0]=
2073 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2076 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
2077 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
2079 ((uint32_t*)(src+0*stride))[0]=
2080 ((uint32_t*)(src+1*stride))[0]=
2081 ((uint32_t*)(src+2*stride))[0]=
2082 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2085 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
2086 ((uint32_t*)(src+0*stride))[0]=
2087 ((uint32_t*)(src+1*stride))[0]=
2088 ((uint32_t*)(src+2*stride))[0]=
2089 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
2093 #define LOAD_TOP_RIGHT_EDGE\
2094 const int t4= topright[0];\
2095 const int t5= topright[1];\
2096 const int t6= topright[2];\
2097 const int t7= topright[3];\
2099 #define LOAD_LEFT_EDGE\
2100 const int l0= src[-1+0*stride];\
2101 const int l1= src[-1+1*stride];\
2102 const int l2= src[-1+2*stride];\
2103 const int l3= src[-1+3*stride];\
2105 #define LOAD_TOP_EDGE\
2106 const int t0= src[ 0-1*stride];\
2107 const int t1= src[ 1-1*stride];\
2108 const int t2= src[ 2-1*stride];\
2109 const int t3= src[ 3-1*stride];\
2111 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
2112 const int lt= src[-1-1*stride];
2116 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
2118 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
2121 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
2125 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2128 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
2130 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2131 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2134 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
2139 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
2141 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
2144 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
2148 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
2151 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
2153 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
2154 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
2157 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
2158 const int lt= src[-1-1*stride];
2161 const __attribute__((unused)) int unu= l3;
2164 src[1+2*stride]=(lt + t0 + 1)>>1;
2166 src[2+2*stride]=(t0 + t1 + 1)>>1;
2168 src[3+2*stride]=(t1 + t2 + 1)>>1;
2169 src[3+0*stride]=(t2 + t3 + 1)>>1;
2171 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2173 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2175 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2176 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2177 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2178 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2181 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2184 const __attribute__((unused)) int unu= t7;
2186 src[0+0*stride]=(t0 + t1 + 1)>>1;
2188 src[0+2*stride]=(t1 + t2 + 1)>>1;
2190 src[1+2*stride]=(t2 + t3 + 1)>>1;
2192 src[2+2*stride]=(t3 + t4+ 1)>>1;
2193 src[3+2*stride]=(t4 + t5+ 1)>>1;
2194 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2196 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2198 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2200 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2201 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2204 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2207 src[0+0*stride]=(l0 + l1 + 1)>>1;
2208 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2210 src[0+1*stride]=(l1 + l2 + 1)>>1;
2212 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2214 src[0+2*stride]=(l2 + l3 + 1)>>1;
2216 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2225 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2226 const int lt= src[-1-1*stride];
2229 const __attribute__((unused)) int unu= t3;
2232 src[2+1*stride]=(lt + l0 + 1)>>1;
2234 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2235 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2236 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2238 src[2+2*stride]=(l0 + l1 + 1)>>1;
2240 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2242 src[2+3*stride]=(l1 + l2+ 1)>>1;
2244 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2245 src[0+3*stride]=(l2 + l3 + 1)>>1;
2246 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2249 void ff_pred16x16_vertical_c(uint8_t *src, int stride){
2251 const uint32_t a= ((uint32_t*)(src-stride))[0];
2252 const uint32_t b= ((uint32_t*)(src-stride))[1];
2253 const uint32_t c= ((uint32_t*)(src-stride))[2];
2254 const uint32_t d= ((uint32_t*)(src-stride))[3];
2256 for(i=0; i<16; i++){
2257 ((uint32_t*)(src+i*stride))[0]= a;
2258 ((uint32_t*)(src+i*stride))[1]= b;
2259 ((uint32_t*)(src+i*stride))[2]= c;
2260 ((uint32_t*)(src+i*stride))[3]= d;
2264 void ff_pred16x16_horizontal_c(uint8_t *src, int stride){
2267 for(i=0; i<16; i++){
2268 ((uint32_t*)(src+i*stride))[0]=
2269 ((uint32_t*)(src+i*stride))[1]=
2270 ((uint32_t*)(src+i*stride))[2]=
2271 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2275 void ff_pred16x16_dc_c(uint8_t *src, int stride){
2279 dc+= src[-1+i*stride];
2286 dc= 0x01010101*((dc + 16)>>5);
2288 for(i=0; i<16; i++){
2289 ((uint32_t*)(src+i*stride))[0]=
2290 ((uint32_t*)(src+i*stride))[1]=
2291 ((uint32_t*)(src+i*stride))[2]=
2292 ((uint32_t*)(src+i*stride))[3]= dc;
2296 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2300 dc+= src[-1+i*stride];
2303 dc= 0x01010101*((dc + 8)>>4);
2305 for(i=0; i<16; i++){
2306 ((uint32_t*)(src+i*stride))[0]=
2307 ((uint32_t*)(src+i*stride))[1]=
2308 ((uint32_t*)(src+i*stride))[2]=
2309 ((uint32_t*)(src+i*stride))[3]= dc;
2313 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2319 dc= 0x01010101*((dc + 8)>>4);
2321 for(i=0; i<16; i++){
2322 ((uint32_t*)(src+i*stride))[0]=
2323 ((uint32_t*)(src+i*stride))[1]=
2324 ((uint32_t*)(src+i*stride))[2]=
2325 ((uint32_t*)(src+i*stride))[3]= dc;
2329 void ff_pred16x16_128_dc_c(uint8_t *src, int stride){
2332 for(i=0; i<16; i++){
2333 ((uint32_t*)(src+i*stride))[0]=
2334 ((uint32_t*)(src+i*stride))[1]=
2335 ((uint32_t*)(src+i*stride))[2]=
2336 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2340 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2343 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2344 const uint8_t * const src0 = src+7-stride;
2345 const uint8_t *src1 = src+8*stride-1;
2346 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2347 int H = src0[1] - src0[-1];
2348 int V = src1[0] - src2[ 0];
2349 for(k=2; k<=8; ++k) {
2350 src1 += stride; src2 -= stride;
2351 H += k*(src0[k] - src0[-k]);
2352 V += k*(src1[0] - src2[ 0]);
2355 H = ( 5*(H/4) ) / 16;
2356 V = ( 5*(V/4) ) / 16;
2358 /* required for 100% accuracy */
2359 i = H; H = V; V = i;
2361 H = ( 5*H+32 ) >> 6;
2362 V = ( 5*V+32 ) >> 6;
2365 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2366 for(j=16; j>0; --j) {
2369 for(i=-16; i<0; i+=4) {
2370 src[16+i] = cm[ (b ) >> 5 ];
2371 src[17+i] = cm[ (b+ H) >> 5 ];
2372 src[18+i] = cm[ (b+2*H) >> 5 ];
2373 src[19+i] = cm[ (b+3*H) >> 5 ];
2380 void ff_pred16x16_plane_c(uint8_t *src, int stride){
2381 pred16x16_plane_compat_c(src, stride, 0);
2384 void ff_pred8x8_vertical_c(uint8_t *src, int stride){
2386 const uint32_t a= ((uint32_t*)(src-stride))[0];
2387 const uint32_t b= ((uint32_t*)(src-stride))[1];
2390 ((uint32_t*)(src+i*stride))[0]= a;
2391 ((uint32_t*)(src+i*stride))[1]= b;
2395 void ff_pred8x8_horizontal_c(uint8_t *src, int stride){
2399 ((uint32_t*)(src+i*stride))[0]=
2400 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2404 void ff_pred8x8_128_dc_c(uint8_t *src, int stride){
2408 ((uint32_t*)(src+i*stride))[0]=
2409 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2413 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2419 dc0+= src[-1+i*stride];
2420 dc2+= src[-1+(i+4)*stride];
2422 dc0= 0x01010101*((dc0 + 2)>>2);
2423 dc2= 0x01010101*((dc2 + 2)>>2);
2426 ((uint32_t*)(src+i*stride))[0]=
2427 ((uint32_t*)(src+i*stride))[1]= dc0;
2430 ((uint32_t*)(src+i*stride))[0]=
2431 ((uint32_t*)(src+i*stride))[1]= dc2;
2435 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2441 dc0+= src[i-stride];
2442 dc1+= src[4+i-stride];
2444 dc0= 0x01010101*((dc0 + 2)>>2);
2445 dc1= 0x01010101*((dc1 + 2)>>2);
2448 ((uint32_t*)(src+i*stride))[0]= dc0;
2449 ((uint32_t*)(src+i*stride))[1]= dc1;
2452 ((uint32_t*)(src+i*stride))[0]= dc0;
2453 ((uint32_t*)(src+i*stride))[1]= dc1;
2458 void ff_pred8x8_dc_c(uint8_t *src, int stride){
2460 int dc0, dc1, dc2, dc3;
2464 dc0+= src[-1+i*stride] + src[i-stride];
2465 dc1+= src[4+i-stride];
2466 dc2+= src[-1+(i+4)*stride];
2468 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2469 dc0= 0x01010101*((dc0 + 4)>>3);
2470 dc1= 0x01010101*((dc1 + 2)>>2);
2471 dc2= 0x01010101*((dc2 + 2)>>2);
2474 ((uint32_t*)(src+i*stride))[0]= dc0;
2475 ((uint32_t*)(src+i*stride))[1]= dc1;
2478 ((uint32_t*)(src+i*stride))[0]= dc2;
2479 ((uint32_t*)(src+i*stride))[1]= dc3;
2483 void ff_pred8x8_plane_c(uint8_t *src, int stride){
2486 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2487 const uint8_t * const src0 = src+3-stride;
2488 const uint8_t *src1 = src+4*stride-1;
2489 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2490 int H = src0[1] - src0[-1];
2491 int V = src1[0] - src2[ 0];
2492 for(k=2; k<=4; ++k) {
2493 src1 += stride; src2 -= stride;
2494 H += k*(src0[k] - src0[-k]);
2495 V += k*(src1[0] - src2[ 0]);
2497 H = ( 17*H+16 ) >> 5;
2498 V = ( 17*V+16 ) >> 5;
2500 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2501 for(j=8; j>0; --j) {
2504 src[0] = cm[ (b ) >> 5 ];
2505 src[1] = cm[ (b+ H) >> 5 ];
2506 src[2] = cm[ (b+2*H) >> 5 ];
2507 src[3] = cm[ (b+3*H) >> 5 ];
2508 src[4] = cm[ (b+4*H) >> 5 ];
2509 src[5] = cm[ (b+5*H) >> 5 ];
2510 src[6] = cm[ (b+6*H) >> 5 ];
2511 src[7] = cm[ (b+7*H) >> 5 ];
2516 #define SRC(x,y) src[(x)+(y)*stride]
2518 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2519 #define PREDICT_8x8_LOAD_LEFT \
2520 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2521 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2522 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2523 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2526 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2527 #define PREDICT_8x8_LOAD_TOP \
2528 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2529 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2530 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2531 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2532 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2535 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2536 #define PREDICT_8x8_LOAD_TOPRIGHT \
2537 int t8, t9, t10, t11, t12, t13, t14, t15; \
2538 if(has_topright) { \
2539 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2540 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2541 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2543 #define PREDICT_8x8_LOAD_TOPLEFT \
2544 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2546 #define PREDICT_8x8_DC(v) \
2548 for( y = 0; y < 8; y++ ) { \
2549 ((uint32_t*)src)[0] = \
2550 ((uint32_t*)src)[1] = v; \
2554 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2556 PREDICT_8x8_DC(0x80808080);
2558 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2560 PREDICT_8x8_LOAD_LEFT;
2561 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2564 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2566 PREDICT_8x8_LOAD_TOP;
2567 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2570 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2572 PREDICT_8x8_LOAD_LEFT;
2573 PREDICT_8x8_LOAD_TOP;
2574 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2575 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2578 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2580 PREDICT_8x8_LOAD_LEFT;
2581 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2582 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2583 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2586 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2589 PREDICT_8x8_LOAD_TOP;
2598 for( y = 1; y < 8; y++ )
2599 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2601 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2603 PREDICT_8x8_LOAD_TOP;
2604 PREDICT_8x8_LOAD_TOPRIGHT;
2605 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2606 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2607 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2608 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2609 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2610 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2611 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2612 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2613 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2614 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2615 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2616 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2617 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2618 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2619 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2621 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2623 PREDICT_8x8_LOAD_TOP;
2624 PREDICT_8x8_LOAD_LEFT;
2625 PREDICT_8x8_LOAD_TOPLEFT;
2626 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2627 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2628 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2629 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2630 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2631 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2632 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2633 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2634 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2635 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2636 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2637 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2638 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2639 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2640 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2643 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2645 PREDICT_8x8_LOAD_TOP;
2646 PREDICT_8x8_LOAD_LEFT;
2647 PREDICT_8x8_LOAD_TOPLEFT;
2648 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2649 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2650 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2651 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2652 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2653 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2654 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2655 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2656 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2657 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2658 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2659 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2660 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2661 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2662 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2663 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2664 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2665 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2666 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2667 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2668 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2669 SRC(7,0)= (t6 + t7 + 1) >> 1;
2671 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2673 PREDICT_8x8_LOAD_TOP;
2674 PREDICT_8x8_LOAD_LEFT;
2675 PREDICT_8x8_LOAD_TOPLEFT;
2676 SRC(0,7)= (l6 + l7 + 1) >> 1;
2677 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2678 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2679 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2680 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2681 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2682 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2683 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2684 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2685 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2686 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2687 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2688 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2689 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2690 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2691 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2692 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2693 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2694 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2695 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2696 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2697 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2699 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2701 PREDICT_8x8_LOAD_TOP;
2702 PREDICT_8x8_LOAD_TOPRIGHT;
2703 SRC(0,0)= (t0 + t1 + 1) >> 1;
2704 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2705 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2706 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2707 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2708 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2709 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2710 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2711 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2712 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2713 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2714 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2715 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2716 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2717 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2718 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2719 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2720 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2721 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2722 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2723 SRC(7,6)= (t10 + t11 + 1) >> 1;
2724 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2726 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2728 PREDICT_8x8_LOAD_LEFT;
2729 SRC(0,0)= (l0 + l1 + 1) >> 1;
2730 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2731 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2732 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2733 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2734 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2735 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2736 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2737 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2738 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2739 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2740 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2741 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2742 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2743 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2744 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2745 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2746 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2748 #undef PREDICT_8x8_LOAD_LEFT
2749 #undef PREDICT_8x8_LOAD_TOP
2750 #undef PREDICT_8x8_LOAD_TOPLEFT
2751 #undef PREDICT_8x8_LOAD_TOPRIGHT
2752 #undef PREDICT_8x8_DC
2758 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2759 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2760 int src_x_offset, int src_y_offset,
2761 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2762 MpegEncContext * const s = &h->s;
2763 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2764 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2765 const int luma_xy= (mx&3) + ((my&3)<<2);
2766 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2767 uint8_t * src_cb, * src_cr;
2768 int extra_width= h->emu_edge_width;
2769 int extra_height= h->emu_edge_height;
2771 const int full_mx= mx>>2;
2772 const int full_my= my>>2;
2773 const int pic_width = 16*s->mb_width;
2774 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2776 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
2779 if(mx&7) extra_width -= 3;
2780 if(my&7) extra_height -= 3;
2782 if( full_mx < 0-extra_width
2783 || full_my < 0-extra_height
2784 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2785 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2786 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2787 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2791 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2793 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2796 if(s->flags&CODEC_FLAG_GRAY) return;
2799 // chroma offset when predicting from a field of opposite parity
2800 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2801 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2803 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2804 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2807 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2808 src_cb= s->edge_emu_buffer;
2810 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2813 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2814 src_cr= s->edge_emu_buffer;
2816 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2819 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2820 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2821 int x_offset, int y_offset,
2822 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2823 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2824 int list0, int list1){
2825 MpegEncContext * const s = &h->s;
2826 qpel_mc_func *qpix_op= qpix_put;
2827 h264_chroma_mc_func chroma_op= chroma_put;
2829 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2830 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2831 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2832 x_offset += 8*s->mb_x;
2833 y_offset += 8*(s->mb_y >> MB_MBAFF);
2836 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2837 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2838 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2839 qpix_op, chroma_op);
2842 chroma_op= chroma_avg;
2846 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2847 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2848 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2849 qpix_op, chroma_op);
2853 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2854 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2855 int x_offset, int y_offset,
2856 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2857 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2858 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2859 int list0, int list1){
2860 MpegEncContext * const s = &h->s;
2862 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2863 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2864 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2865 x_offset += 8*s->mb_x;
2866 y_offset += 8*(s->mb_y >> MB_MBAFF);
2869 /* don't optimize for luma-only case, since B-frames usually
2870 * use implicit weights => chroma too. */
2871 uint8_t *tmp_cb = s->obmc_scratchpad;
2872 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2873 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2874 int refn0 = h->ref_cache[0][ scan8[n] ];
2875 int refn1 = h->ref_cache[1][ scan8[n] ];
2877 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2878 dest_y, dest_cb, dest_cr,
2879 x_offset, y_offset, qpix_put, chroma_put);
2880 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2881 tmp_y, tmp_cb, tmp_cr,
2882 x_offset, y_offset, qpix_put, chroma_put);
2884 if(h->use_weight == 2){
2885 int weight0 = h->implicit_weight[refn0][refn1];
2886 int weight1 = 64 - weight0;
2887 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2888 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
2889 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
2891 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
2892 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2893 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
2894 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2895 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2896 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
2897 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2898 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2899 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
2902 int list = list1 ? 1 : 0;
2903 int refn = h->ref_cache[list][ scan8[n] ];
2904 Picture *ref= &h->ref_list[list][refn];
2905 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2906 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2907 qpix_put, chroma_put);
2909 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
2910 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2911 if(h->use_weight_chroma){
2912 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2913 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2914 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2915 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2920 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2921 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2922 int x_offset, int y_offset,
2923 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2924 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2925 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2926 int list0, int list1){
2927 if((h->use_weight==2 && list0 && list1
2928 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2929 || h->use_weight==1)
2930 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2931 x_offset, y_offset, qpix_put, chroma_put,
2932 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2934 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2935 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2938 static inline void prefetch_motion(H264Context *h, int list){
2939 /* fetch pixels for estimated mv 4 macroblocks ahead
2940 * optimized for 64byte cache lines */
2941 MpegEncContext * const s = &h->s;
2942 const int refn = h->ref_cache[list][scan8[0]];
2944 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
2945 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
2946 uint8_t **src= h->ref_list[list][refn].data;
2947 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
2948 s->dsp.prefetch(src[0]+off, s->linesize, 4);
2949 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
2950 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
2954 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2955 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2956 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2957 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2958 MpegEncContext * const s = &h->s;
2959 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2960 const int mb_type= s->current_picture.mb_type[mb_xy];
2962 assert(IS_INTER(mb_type));
2964 prefetch_motion(h, 0);
2966 if(IS_16X16(mb_type)){
2967 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2968 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2969 &weight_op[0], &weight_avg[0],
2970 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2971 }else if(IS_16X8(mb_type)){
2972 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2973 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2974 &weight_op[1], &weight_avg[1],
2975 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2976 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2977 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2978 &weight_op[1], &weight_avg[1],
2979 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2980 }else if(IS_8X16(mb_type)){
2981 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
2982 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2983 &weight_op[2], &weight_avg[2],
2984 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2985 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
2986 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2987 &weight_op[2], &weight_avg[2],
2988 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2992 assert(IS_8X8(mb_type));
2995 const int sub_mb_type= h->sub_mb_type[i];
2997 int x_offset= (i&1)<<2;
2998 int y_offset= (i&2)<<1;
3000 if(IS_SUB_8X8(sub_mb_type)){
3001 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3002 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3003 &weight_op[3], &weight_avg[3],
3004 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3005 }else if(IS_SUB_8X4(sub_mb_type)){
3006 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3007 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3008 &weight_op[4], &weight_avg[4],
3009 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3010 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
3011 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3012 &weight_op[4], &weight_avg[4],
3013 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3014 }else if(IS_SUB_4X8(sub_mb_type)){
3015 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3016 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3017 &weight_op[5], &weight_avg[5],
3018 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3019 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
3020 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3021 &weight_op[5], &weight_avg[5],
3022 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3025 assert(IS_SUB_4X4(sub_mb_type));
3027 int sub_x_offset= x_offset + 2*(j&1);
3028 int sub_y_offset= y_offset + (j&2);
3029 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
3030 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3031 &weight_op[6], &weight_avg[6],
3032 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3038 prefetch_motion(h, 1);
3041 static void decode_init_vlc(){
3042 static int done = 0;
3048 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
3049 &chroma_dc_coeff_token_len [0], 1, 1,
3050 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
3053 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
3054 &coeff_token_len [i][0], 1, 1,
3055 &coeff_token_bits[i][0], 1, 1, 1);
3059 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
3060 &chroma_dc_total_zeros_len [i][0], 1, 1,
3061 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
3063 for(i=0; i<15; i++){
3064 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
3065 &total_zeros_len [i][0], 1, 1,
3066 &total_zeros_bits[i][0], 1, 1, 1);
3070 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
3071 &run_len [i][0], 1, 1,
3072 &run_bits[i][0], 1, 1, 1);
3074 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
3075 &run_len [6][0], 1, 1,
3076 &run_bits[6][0], 1, 1, 1);
3081 * Sets the intra prediction function pointers.
3083 static void init_pred_ptrs(H264Context *h){
3084 // MpegEncContext * const s = &h->s;
3086 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
3087 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
3088 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
3089 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
3090 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
3091 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
3092 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
3093 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
3094 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
3095 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
3096 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
3097 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
3099 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
3100 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
3101 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
3102 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
3103 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
3104 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
3105 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
3106 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
3107 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
3108 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
3109 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
3110 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
3112 h->pred8x8[DC_PRED8x8 ]= ff_pred8x8_dc_c;
3113 h->pred8x8[VERT_PRED8x8 ]= ff_pred8x8_vertical_c;
3114 h->pred8x8[HOR_PRED8x8 ]= ff_pred8x8_horizontal_c;
3115 h->pred8x8[PLANE_PRED8x8 ]= ff_pred8x8_plane_c;
3116 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
3117 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
3118 h->pred8x8[DC_128_PRED8x8 ]= ff_pred8x8_128_dc_c;
3120 h->pred16x16[DC_PRED8x8 ]= ff_pred16x16_dc_c;
3121 h->pred16x16[VERT_PRED8x8 ]= ff_pred16x16_vertical_c;
3122 h->pred16x16[HOR_PRED8x8 ]= ff_pred16x16_horizontal_c;
3123 h->pred16x16[PLANE_PRED8x8 ]= ff_pred16x16_plane_c;
3124 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
3125 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
3126 h->pred16x16[DC_128_PRED8x8 ]= ff_pred16x16_128_dc_c;
3129 static void free_tables(H264Context *h){
3130 av_freep(&h->intra4x4_pred_mode);
3131 av_freep(&h->chroma_pred_mode_table);
3132 av_freep(&h->cbp_table);
3133 av_freep(&h->mvd_table[0]);
3134 av_freep(&h->mvd_table[1]);
3135 av_freep(&h->direct_table);
3136 av_freep(&h->non_zero_count);
3137 av_freep(&h->slice_table_base);
3138 av_freep(&h->top_borders[1]);
3139 av_freep(&h->top_borders[0]);
3140 h->slice_table= NULL;
3142 av_freep(&h->mb2b_xy);
3143 av_freep(&h->mb2b8_xy);
3145 av_freep(&h->s.obmc_scratchpad);
3148 static void init_dequant8_coeff_table(H264Context *h){
3150 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
3151 h->dequant8_coeff[0] = h->dequant8_buffer[0];
3152 h->dequant8_coeff[1] = h->dequant8_buffer[1];
3154 for(i=0; i<2; i++ ){
3155 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
3156 h->dequant8_coeff[1] = h->dequant8_buffer[0];
3160 for(q=0; q<52; q++){
3161 int shift = ff_div6[q];
3162 int idx = ff_rem6[q];
3164 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
3165 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
3166 h->pps.scaling_matrix8[i][x]) << shift;
3171 static void init_dequant4_coeff_table(H264Context *h){
3173 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
3174 for(i=0; i<6; i++ ){
3175 h->dequant4_coeff[i] = h->dequant4_buffer[i];
3177 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
3178 h->dequant4_coeff[i] = h->dequant4_buffer[j];
3185 for(q=0; q<52; q++){
3186 int shift = ff_div6[q] + 2;
3187 int idx = ff_rem6[q];
3189 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
3190 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
3191 h->pps.scaling_matrix4[i][x]) << shift;
3196 static void init_dequant_tables(H264Context *h){
3198 init_dequant4_coeff_table(h);
3199 if(h->pps.transform_8x8_mode)
3200 init_dequant8_coeff_table(h);
3201 if(h->sps.transform_bypass){
3204 h->dequant4_coeff[i][0][x] = 1<<6;
3205 if(h->pps.transform_8x8_mode)
3208 h->dequant8_coeff[i][0][x] = 1<<6;
3215 * needs width/height
3217 static int alloc_tables(H264Context *h){
3218 MpegEncContext * const s = &h->s;
3219 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3222 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3224 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3225 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
3226 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3227 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3228 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3230 if( h->pps.cabac ) {
3231 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3232 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3233 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3234 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3237 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
3238 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
3240 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3241 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3242 for(y=0; y<s->mb_height; y++){
3243 for(x=0; x<s->mb_width; x++){
3244 const int mb_xy= x + y*s->mb_stride;
3245 const int b_xy = 4*x + 4*y*h->b_stride;
3246 const int b8_xy= 2*x + 2*y*h->b8_stride;
3248 h->mb2b_xy [mb_xy]= b_xy;
3249 h->mb2b8_xy[mb_xy]= b8_xy;
3253 s->obmc_scratchpad = NULL;
3255 if(!h->dequant4_coeff[0])
3256 init_dequant_tables(h);
3264 static void common_init(H264Context *h){
3265 MpegEncContext * const s = &h->s;
3267 s->width = s->avctx->width;
3268 s->height = s->avctx->height;
3269 s->codec_id= s->avctx->codec->id;
3273 h->dequant_coeff_pps= -1;
3274 s->unrestricted_mv=1;
3275 s->decode=1; //FIXME
3277 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3278 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3281 static int decode_init(AVCodecContext *avctx){
3282 H264Context *h= avctx->priv_data;
3283 MpegEncContext * const s = &h->s;
3285 MPV_decode_defaults(s);
3290 s->out_format = FMT_H264;
3291 s->workaround_bugs= avctx->workaround_bugs;
3294 // s->decode_mb= ff_h263_decode_mb;
3296 avctx->pix_fmt= PIX_FMT_YUV420P;
3300 if(avctx->extradata_size > 0 && avctx->extradata &&
3301 *(char *)avctx->extradata == 1){
3311 static int frame_start(H264Context *h){
3312 MpegEncContext * const s = &h->s;
3315 if(MPV_frame_start(s, s->avctx) < 0)
3317 ff_er_frame_start(s);
3319 assert(s->linesize && s->uvlinesize);
3321 for(i=0; i<16; i++){
3322 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3323 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3326 h->block_offset[16+i]=
3327 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3328 h->block_offset[24+16+i]=
3329 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3332 /* can't be in alloc_tables because linesize isn't known there.
3333 * FIXME: redo bipred weight to not require extra buffer? */
3334 if(!s->obmc_scratchpad)
3335 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
3337 /* some macroblocks will be accessed before they're available */
3339 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
3341 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3345 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3346 MpegEncContext * const s = &h->s;
3350 src_cb -= uvlinesize;
3351 src_cr -= uvlinesize;
3353 // There are two lines saved, the line above the the top macroblock of a pair,
3354 // and the line above the bottom macroblock
3355 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3356 for(i=1; i<17; i++){
3357 h->left_border[i]= src_y[15+i* linesize];
3360 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3361 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3363 if(!(s->flags&CODEC_FLAG_GRAY)){
3364 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3365 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3367 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3368 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3370 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3371 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3375 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3376 MpegEncContext * const s = &h->s;
3379 int deblock_left = (s->mb_x > 0);
3380 int deblock_top = (s->mb_y > 0);
3382 src_y -= linesize + 1;
3383 src_cb -= uvlinesize + 1;
3384 src_cr -= uvlinesize + 1;
3386 #define XCHG(a,b,t,xchg)\
3393 for(i = !deblock_top; i<17; i++){
3394 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3399 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3400 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3401 if(s->mb_x+1 < s->mb_width){
3402 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3406 if(!(s->flags&CODEC_FLAG_GRAY)){
3408 for(i = !deblock_top; i<9; i++){
3409 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3410 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3414 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3415 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3420 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3421 MpegEncContext * const s = &h->s;
3424 src_y -= 2 * linesize;
3425 src_cb -= 2 * uvlinesize;
3426 src_cr -= 2 * uvlinesize;
3428 // There are two lines saved, the line above the the top macroblock of a pair,
3429 // and the line above the bottom macroblock
3430 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3431 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3432 for(i=2; i<34; i++){
3433 h->left_border[i]= src_y[15+i* linesize];
3436 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3437 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3438 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3439 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3441 if(!(s->flags&CODEC_FLAG_GRAY)){
3442 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3443 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3444 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3445 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3446 for(i=2; i<18; i++){
3447 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3448 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3450 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3451 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3452 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3453 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3457 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3458 MpegEncContext * const s = &h->s;
3461 int deblock_left = (s->mb_x > 0);
3462 int deblock_top = (s->mb_y > 1);
3464 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3466 src_y -= 2 * linesize + 1;
3467 src_cb -= 2 * uvlinesize + 1;
3468 src_cr -= 2 * uvlinesize + 1;
3470 #define XCHG(a,b,t,xchg)\
3477 for(i = (!deblock_top)<<1; i<34; i++){
3478 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3483 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3484 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3485 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3486 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3487 if(s->mb_x+1 < s->mb_width){
3488 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3489 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3493 if(!(s->flags&CODEC_FLAG_GRAY)){
3495 for(i = (!deblock_top) << 1; i<18; i++){
3496 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3497 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3501 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3502 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3503 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3504 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3509 static void hl_decode_mb(H264Context *h){
3510 MpegEncContext * const s = &h->s;
3511 const int mb_x= s->mb_x;
3512 const int mb_y= s->mb_y;
3513 const int mb_xy= mb_x + mb_y*s->mb_stride;
3514 const int mb_type= s->current_picture.mb_type[mb_xy];
3515 uint8_t *dest_y, *dest_cb, *dest_cr;
3516 int linesize, uvlinesize /*dct_offset*/;
3518 int *block_offset = &h->block_offset[0];
3519 const unsigned int bottom = mb_y & 1;
3520 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3521 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3522 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3527 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3528 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3529 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3531 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
3532 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
3535 linesize = h->mb_linesize = s->linesize * 2;
3536 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3537 block_offset = &h->block_offset[24];
3538 if(mb_y&1){ //FIXME move out of this func?
3539 dest_y -= s->linesize*15;
3540 dest_cb-= s->uvlinesize*7;
3541 dest_cr-= s->uvlinesize*7;
3545 for(list=0; list<2; list++){
3546 if(!USES_LIST(mb_type, list))
3548 if(IS_16X16(mb_type)){
3549 int8_t *ref = &h->ref_cache[list][scan8[0]];
3550 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3552 for(i=0; i<16; i+=4){
3553 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3554 int ref = h->ref_cache[list][scan8[i]];
3556 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3562 linesize = h->mb_linesize = s->linesize;
3563 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3564 // dct_offset = s->linesize * 16;
3567 if(transform_bypass){
3569 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3570 }else if(IS_8x8DCT(mb_type)){
3571 idct_dc_add = s->dsp.h264_idct8_dc_add;
3572 idct_add = s->dsp.h264_idct8_add;
3574 idct_dc_add = s->dsp.h264_idct_dc_add;
3575 idct_add = s->dsp.h264_idct_add;
3578 if(FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3579 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3580 int mbt_y = mb_y&~1;
3581 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3582 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3583 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3584 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3587 if (IS_INTRA_PCM(mb_type)) {
3590 // The pixels are stored in h->mb array in the same order as levels,
3591 // copy them in output in the correct order.
3592 for(i=0; i<16; i++) {
3593 for (y=0; y<4; y++) {
3594 for (x=0; x<4; x++) {
3595 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3599 for(i=16; i<16+4; i++) {
3600 for (y=0; y<4; y++) {
3601 for (x=0; x<4; x++) {
3602 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3606 for(i=20; i<20+4; i++) {
3607 for (y=0; y<4; y++) {
3608 for (x=0; x<4; x++) {
3609 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3614 if(IS_INTRA(mb_type)){
3615 if(h->deblocking_filter && !FRAME_MBAFF)
3616 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3618 if(!(s->flags&CODEC_FLAG_GRAY)){
3619 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3620 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3623 if(IS_INTRA4x4(mb_type)){
3625 if(IS_8x8DCT(mb_type)){
3626 for(i=0; i<16; i+=4){
3627 uint8_t * const ptr= dest_y + block_offset[i];
3628 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3629 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3630 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3631 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3633 if(nnz == 1 && h->mb[i*16])
3634 idct_dc_add(ptr, h->mb + i*16, linesize);
3636 idct_add(ptr, h->mb + i*16, linesize);
3640 for(i=0; i<16; i++){
3641 uint8_t * const ptr= dest_y + block_offset[i];
3643 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3646 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3647 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3648 assert(mb_y || linesize <= block_offset[i]);
3649 if(!topright_avail){
3650 tr= ptr[3 - linesize]*0x01010101;
3651 topright= (uint8_t*) &tr;
3653 topright= ptr + 4 - linesize;
3657 h->pred4x4[ dir ](ptr, topright, linesize);
3658 nnz = h->non_zero_count_cache[ scan8[i] ];
3660 if(s->codec_id == CODEC_ID_H264){
3661 if(nnz == 1 && h->mb[i*16])
3662 idct_dc_add(ptr, h->mb + i*16, linesize);
3664 idct_add(ptr, h->mb + i*16, linesize);
3666 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3671 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3672 if(s->codec_id == CODEC_ID_H264){
3673 if(!transform_bypass)
3674 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3676 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3678 if(h->deblocking_filter && !FRAME_MBAFF)
3679 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3680 }else if(s->codec_id == CODEC_ID_H264){
3681 hl_motion(h, dest_y, dest_cb, dest_cr,
3682 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
3683 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
3684 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3688 if(!IS_INTRA4x4(mb_type)){
3689 if(s->codec_id == CODEC_ID_H264){
3690 if(IS_INTRA16x16(mb_type)){
3691 for(i=0; i<16; i++){
3692 if(h->non_zero_count_cache[ scan8[i] ])
3693 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3694 else if(h->mb[i*16])
3695 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3698 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3699 for(i=0; i<16; i+=di){
3700 int nnz = h->non_zero_count_cache[ scan8[i] ];
3702 if(nnz==1 && h->mb[i*16])
3703 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3705 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3710 for(i=0; i<16; i++){
3711 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3712 uint8_t * const ptr= dest_y + block_offset[i];
3713 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3719 if(!(s->flags&CODEC_FLAG_GRAY)){
3720 uint8_t *dest[2] = {dest_cb, dest_cr};
3721 if(transform_bypass){
3722 idct_add = idct_dc_add = s->dsp.add_pixels4;
3724 idct_add = s->dsp.h264_idct_add;
3725 idct_dc_add = s->dsp.h264_idct_dc_add;
3726 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3727 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3729 if(s->codec_id == CODEC_ID_H264){
3730 for(i=16; i<16+8; i++){
3731 if(h->non_zero_count_cache[ scan8[i] ])
3732 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3733 else if(h->mb[i*16])
3734 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3737 for(i=16; i<16+8; i++){
3738 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3739 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3740 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3746 if(h->deblocking_filter) {
3748 //FIXME try deblocking one mb at a time?
3749 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3750 const int mb_y = s->mb_y - 1;
3751 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3752 const int mb_xy= mb_x + mb_y*s->mb_stride;
3753 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3754 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3755 if (!bottom) return;
3756 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3757 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3758 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3760 if(IS_INTRA(mb_type_top | mb_type_bottom))
3761 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3763 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3767 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3768 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3769 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
3770 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3773 tprintf("call mbaff filter_mb\n");
3774 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3775 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3776 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3778 tprintf("call filter_mb\n");
3779 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3780 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3781 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3787 * fills the default_ref_list.
3789 static int fill_default_ref_list(H264Context *h){
3790 MpegEncContext * const s = &h->s;
3792 int smallest_poc_greater_than_current = -1;
3793 Picture sorted_short_ref[32];
3795 if(h->slice_type==B_TYPE){
3799 /* sort frame according to poc in B slice */
3800 for(out_i=0; out_i<h->short_ref_count; out_i++){
3802 int best_poc=INT_MAX;
3804 for(i=0; i<h->short_ref_count; i++){
3805 const int poc= h->short_ref[i]->poc;
3806 if(poc > limit && poc < best_poc){
3812 assert(best_i != INT_MIN);
3815 sorted_short_ref[out_i]= *h->short_ref[best_i];
3816 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3817 if (-1 == smallest_poc_greater_than_current) {
3818 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3819 smallest_poc_greater_than_current = out_i;
3825 if(s->picture_structure == PICT_FRAME){
3826 if(h->slice_type==B_TYPE){
3828 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3830 // find the largest poc
3831 for(list=0; list<2; list++){
3834 int step= list ? -1 : 1;
3836 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3837 while(j<0 || j>= h->short_ref_count){
3838 if(j != -99 && step == (list ? -1 : 1))
3841 j= smallest_poc_greater_than_current + (step>>1);
3843 if(sorted_short_ref[j].reference != 3) continue;
3844 h->default_ref_list[list][index ]= sorted_short_ref[j];
3845 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3848 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3849 if(h->long_ref[i] == NULL) continue;
3850 if(h->long_ref[i]->reference != 3) continue;
3852 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3853 h->default_ref_list[ list ][index++].pic_id= i;;
3856 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3857 // swap the two first elements of L1 when
3858 // L0 and L1 are identical
3859 Picture temp= h->default_ref_list[1][0];
3860 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3861 h->default_ref_list[1][1] = temp;
3864 if(index < h->ref_count[ list ])
3865 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3869 for(i=0; i<h->short_ref_count; i++){
3870 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3871 h->default_ref_list[0][index ]= *h->short_ref[i];
3872 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3874 for(i = 0; i < 16; i++){
3875 if(h->long_ref[i] == NULL) continue;
3876 if(h->long_ref[i]->reference != 3) continue;
3877 h->default_ref_list[0][index ]= *h->long_ref[i];
3878 h->default_ref_list[0][index++].pic_id= i;;
3880 if(index < h->ref_count[0])
3881 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3884 if(h->slice_type==B_TYPE){
3886 //FIXME second field balh
3890 for (i=0; i<h->ref_count[0]; i++) {
3891 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3893 if(h->slice_type==B_TYPE){
3894 for (i=0; i<h->ref_count[1]; i++) {
3895 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3902 static void print_short_term(H264Context *h);
3903 static void print_long_term(H264Context *h);
3905 static int decode_ref_pic_list_reordering(H264Context *h){
3906 MpegEncContext * const s = &h->s;
3909 print_short_term(h);
3911 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3913 for(list=0; list<2; list++){
3914 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3916 if(get_bits1(&s->gb)){
3917 int pred= h->curr_pic_num;
3919 for(index=0; ; index++){
3920 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3921 unsigned int pic_id;
3923 Picture *ref = NULL;
3925 if(reordering_of_pic_nums_idc==3)
3928 if(index >= h->ref_count[list]){
3929 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3933 if(reordering_of_pic_nums_idc<3){
3934 if(reordering_of_pic_nums_idc<2){
3935 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3937 if(abs_diff_pic_num >= h->max_pic_num){
3938 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3942 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3943 else pred+= abs_diff_pic_num;
3944 pred &= h->max_pic_num - 1;
3946 for(i= h->short_ref_count-1; i>=0; i--){
3947 ref = h->short_ref[i];
3948 assert(ref->reference == 3);
3949 assert(!ref->long_ref);
3950 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3954 ref->pic_id= ref->frame_num;
3956 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3958 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3961 ref = h->long_ref[pic_id];
3963 ref->pic_id= pic_id;
3964 assert(ref->reference == 3);
3965 assert(ref->long_ref);
3973 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3974 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3976 for(i=index; i+1<h->ref_count[list]; i++){
3977 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3980 for(; i > index; i--){
3981 h->ref_list[list][i]= h->ref_list[list][i-1];
3983 h->ref_list[list][index]= *ref;
3986 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3992 if(h->slice_type!=B_TYPE) break;
3994 for(list=0; list<2; list++){
3995 for(index= 0; index < h->ref_count[list]; index++){
3996 if(!h->ref_list[list][index].data[0])
3997 h->ref_list[list][index]= s->current_picture;
3999 if(h->slice_type!=B_TYPE) break;
4002 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
4003 direct_dist_scale_factor(h);
4004 direct_ref_list_init(h);
4008 static void fill_mbaff_ref_list(H264Context *h){
4010 for(list=0; list<2; list++){
4011 for(i=0; i<h->ref_count[list]; i++){
4012 Picture *frame = &h->ref_list[list][i];
4013 Picture *field = &h->ref_list[list][16+2*i];
4016 field[0].linesize[j] <<= 1;
4017 field[1] = field[0];
4019 field[1].data[j] += frame->linesize[j];
4021 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
4022 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
4024 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
4025 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
4029 for(j=0; j<h->ref_count[1]; j++){
4030 for(i=0; i<h->ref_count[0]; i++)
4031 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
4032 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
4033 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
4037 static int pred_weight_table(H264Context *h){
4038 MpegEncContext * const s = &h->s;
4040 int luma_def, chroma_def;
4043 h->use_weight_chroma= 0;
4044 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
4045 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
4046 luma_def = 1<<h->luma_log2_weight_denom;
4047 chroma_def = 1<<h->chroma_log2_weight_denom;
4049 for(list=0; list<2; list++){
4050 for(i=0; i<h->ref_count[list]; i++){
4051 int luma_weight_flag, chroma_weight_flag;
4053 luma_weight_flag= get_bits1(&s->gb);
4054 if(luma_weight_flag){
4055 h->luma_weight[list][i]= get_se_golomb(&s->gb);
4056 h->luma_offset[list][i]= get_se_golomb(&s->gb);
4057 if( h->luma_weight[list][i] != luma_def
4058 || h->luma_offset[list][i] != 0)
4061 h->luma_weight[list][i]= luma_def;
4062 h->luma_offset[list][i]= 0;
4065 chroma_weight_flag= get_bits1(&s->gb);
4066 if(chroma_weight_flag){
4069 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
4070 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
4071 if( h->chroma_weight[list][i][j] != chroma_def
4072 || h->chroma_offset[list][i][j] != 0)
4073 h->use_weight_chroma= 1;
4078 h->chroma_weight[list][i][j]= chroma_def;
4079 h->chroma_offset[list][i][j]= 0;
4083 if(h->slice_type != B_TYPE) break;
4085 h->use_weight= h->use_weight || h->use_weight_chroma;
4089 static void implicit_weight_table(H264Context *h){
4090 MpegEncContext * const s = &h->s;
4092 int cur_poc = s->current_picture_ptr->poc;
4094 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
4095 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
4097 h->use_weight_chroma= 0;
4102 h->use_weight_chroma= 2;
4103 h->luma_log2_weight_denom= 5;
4104 h->chroma_log2_weight_denom= 5;
4106 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
4107 int poc0 = h->ref_list[0][ref0].poc;
4108 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
4109 int poc1 = h->ref_list[1][ref1].poc;
4110 int td = clip(poc1 - poc0, -128, 127);
4112 int tb = clip(cur_poc - poc0, -128, 127);
4113 int tx = (16384 + (FFABS(td) >> 1)) / td;
4114 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
4115 if(dist_scale_factor < -64 || dist_scale_factor > 128)
4116 h->implicit_weight[ref0][ref1] = 32;
4118 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
4120 h->implicit_weight[ref0][ref1] = 32;
4125 static inline void unreference_pic(H264Context *h, Picture *pic){
4128 if(pic == h->delayed_output_pic)
4131 for(i = 0; h->delayed_pic[i]; i++)
4132 if(pic == h->delayed_pic[i]){
4140 * instantaneous decoder refresh.
4142 static void idr(H264Context *h){
4145 for(i=0; i<16; i++){
4146 if (h->long_ref[i] != NULL) {
4147 unreference_pic(h, h->long_ref[i]);
4148 h->long_ref[i]= NULL;
4151 h->long_ref_count=0;
4153 for(i=0; i<h->short_ref_count; i++){
4154 unreference_pic(h, h->short_ref[i]);
4155 h->short_ref[i]= NULL;
4157 h->short_ref_count=0;
4160 /* forget old pics after a seek */
4161 static void flush_dpb(AVCodecContext *avctx){
4162 H264Context *h= avctx->priv_data;
4164 for(i=0; i<16; i++) {
4165 if(h->delayed_pic[i])
4166 h->delayed_pic[i]->reference= 0;
4167 h->delayed_pic[i]= NULL;
4169 if(h->delayed_output_pic)
4170 h->delayed_output_pic->reference= 0;
4171 h->delayed_output_pic= NULL;
4173 if(h->s.current_picture_ptr)
4174 h->s.current_picture_ptr->reference= 0;
4179 * @return the removed picture or NULL if an error occurs
4181 static Picture * remove_short(H264Context *h, int frame_num){
4182 MpegEncContext * const s = &h->s;
4185 if(s->avctx->debug&FF_DEBUG_MMCO)
4186 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
4188 for(i=0; i<h->short_ref_count; i++){
4189 Picture *pic= h->short_ref[i];
4190 if(s->avctx->debug&FF_DEBUG_MMCO)
4191 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
4192 if(pic->frame_num == frame_num){
4193 h->short_ref[i]= NULL;
4194 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
4195 h->short_ref_count--;
4204 * @return the removed picture or NULL if an error occurs
4206 static Picture * remove_long(H264Context *h, int i){
4209 pic= h->long_ref[i];
4210 h->long_ref[i]= NULL;
4211 if(pic) h->long_ref_count--;
4217 * print short term list
4219 static void print_short_term(H264Context *h) {
4221 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4222 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
4223 for(i=0; i<h->short_ref_count; i++){
4224 Picture *pic= h->short_ref[i];
4225 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4231 * print long term list
4233 static void print_long_term(H264Context *h) {
4235 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4236 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
4237 for(i = 0; i < 16; i++){
4238 Picture *pic= h->long_ref[i];
4240 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4247 * Executes the reference picture marking (memory management control operations).
4249 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
4250 MpegEncContext * const s = &h->s;
4252 int current_is_long=0;
4255 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
4256 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4258 for(i=0; i<mmco_count; i++){
4259 if(s->avctx->debug&FF_DEBUG_MMCO)
4260 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4262 switch(mmco[i].opcode){
4263 case MMCO_SHORT2UNUSED:
4264 pic= remove_short(h, mmco[i].short_frame_num);
4266 unreference_pic(h, pic);
4267 else if(s->avctx->debug&FF_DEBUG_MMCO)
4268 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4270 case MMCO_SHORT2LONG:
4271 pic= remove_long(h, mmco[i].long_index);
4272 if(pic) unreference_pic(h, pic);
4274 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4275 if (h->long_ref[ mmco[i].long_index ]){
4276 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4277 h->long_ref_count++;
4280 case MMCO_LONG2UNUSED:
4281 pic= remove_long(h, mmco[i].long_index);
4283 unreference_pic(h, pic);
4284 else if(s->avctx->debug&FF_DEBUG_MMCO)
4285 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4288 pic= remove_long(h, mmco[i].long_index);
4289 if(pic) unreference_pic(h, pic);
4291 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4292 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4293 h->long_ref_count++;
4297 case MMCO_SET_MAX_LONG:
4298 assert(mmco[i].long_index <= 16);
4299 // just remove the long term which index is greater than new max
4300 for(j = mmco[i].long_index; j<16; j++){
4301 pic = remove_long(h, j);
4302 if (pic) unreference_pic(h, pic);
4306 while(h->short_ref_count){
4307 pic= remove_short(h, h->short_ref[0]->frame_num);
4308 if(pic) unreference_pic(h, pic);
4310 for(j = 0; j < 16; j++) {
4311 pic= remove_long(h, j);
4312 if(pic) unreference_pic(h, pic);
4319 if(!current_is_long){
4320 pic= remove_short(h, s->current_picture_ptr->frame_num);
4322 unreference_pic(h, pic);
4323 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4326 if(h->short_ref_count)
4327 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4329 h->short_ref[0]= s->current_picture_ptr;
4330 h->short_ref[0]->long_ref=0;
4331 h->short_ref_count++;
4334 print_short_term(h);
4339 static int decode_ref_pic_marking(H264Context *h){
4340 MpegEncContext * const s = &h->s;
4343 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4344 s->broken_link= get_bits1(&s->gb) -1;
4345 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4346 if(h->mmco[0].long_index == -1)
4349 h->mmco[0].opcode= MMCO_LONG;
4353 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4354 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4355 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4357 h->mmco[i].opcode= opcode;
4358 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4359 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4360 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4361 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4365 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4366 unsigned int long_index= get_ue_golomb(&s->gb);
4367 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ long_index >= 16){
4368 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4371 h->mmco[i].long_index= long_index;
4374 if(opcode > (unsigned)MMCO_LONG){
4375 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4378 if(opcode == MMCO_END)
4383 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4385 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4386 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4387 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4397 static int init_poc(H264Context *h){
4398 MpegEncContext * const s = &h->s;
4399 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4402 if(h->nal_unit_type == NAL_IDR_SLICE){
4403 h->frame_num_offset= 0;
4405 if(h->frame_num < h->prev_frame_num)
4406 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4408 h->frame_num_offset= h->prev_frame_num_offset;
4411 if(h->sps.poc_type==0){
4412 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4414 if(h->nal_unit_type == NAL_IDR_SLICE){
4419 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4420 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4421 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4422 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4424 h->poc_msb = h->prev_poc_msb;
4425 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4427 field_poc[1] = h->poc_msb + h->poc_lsb;
4428 if(s->picture_structure == PICT_FRAME)
4429 field_poc[1] += h->delta_poc_bottom;
4430 }else if(h->sps.poc_type==1){
4431 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4434 if(h->sps.poc_cycle_length != 0)
4435 abs_frame_num = h->frame_num_offset + h->frame_num;
4439 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4442 expected_delta_per_poc_cycle = 0;
4443 for(i=0; i < h->sps.poc_cycle_length; i++)
4444 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4446 if(abs_frame_num > 0){
4447 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4448 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4450 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4451 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4452 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4456 if(h->nal_ref_idc == 0)
4457 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4459 field_poc[0] = expectedpoc + h->delta_poc[0];
4460 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4462 if(s->picture_structure == PICT_FRAME)
4463 field_poc[1] += h->delta_poc[1];
4466 if(h->nal_unit_type == NAL_IDR_SLICE){
4469 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4470 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4476 if(s->picture_structure != PICT_BOTTOM_FIELD)
4477 s->current_picture_ptr->field_poc[0]= field_poc[0];
4478 if(s->picture_structure != PICT_TOP_FIELD)
4479 s->current_picture_ptr->field_poc[1]= field_poc[1];
4480 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4481 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4487 * decodes a slice header.
4488 * this will allso call MPV_common_init() and frame_start() as needed
4490 static int decode_slice_header(H264Context *h){
4491 MpegEncContext * const s = &h->s;
4492 unsigned int first_mb_in_slice;
4493 unsigned int pps_id;
4494 int num_ref_idx_active_override_flag;
4495 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4496 unsigned int slice_type, tmp;
4497 int default_ref_list_done = 0;
4499 s->current_picture.reference= h->nal_ref_idc != 0;
4500 s->dropable= h->nal_ref_idc == 0;
4502 first_mb_in_slice= get_ue_golomb(&s->gb);
4504 slice_type= get_ue_golomb(&s->gb);
4506 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4511 h->slice_type_fixed=1;
4513 h->slice_type_fixed=0;
4515 slice_type= slice_type_map[ slice_type ];
4516 if (slice_type == I_TYPE
4517 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4518 default_ref_list_done = 1;
4520 h->slice_type= slice_type;
4522 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4524 pps_id= get_ue_golomb(&s->gb);
4525 if(pps_id>=MAX_PPS_COUNT){
4526 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4529 h->pps= h->pps_buffer[pps_id];
4530 if(h->pps.slice_group_count == 0){
4531 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4535 h->sps= h->sps_buffer[ h->pps.sps_id ];
4536 if(h->sps.log2_max_frame_num == 0){
4537 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4541 if(h->dequant_coeff_pps != pps_id){
4542 h->dequant_coeff_pps = pps_id;
4543 init_dequant_tables(h);
4546 s->mb_width= h->sps.mb_width;
4547 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4549 h->b_stride= s->mb_width*4;
4550 h->b8_stride= s->mb_width*2;
4552 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4553 if(h->sps.frame_mbs_only_flag)
4554 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4556 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4558 if (s->context_initialized
4559 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4563 if (!s->context_initialized) {
4564 if (MPV_common_init(s) < 0)
4567 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4568 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4569 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4572 for(i=0; i<16; i++){
4573 #define T(x) (x>>2) | ((x<<2) & 0xF)
4574 h->zigzag_scan[i] = T(zigzag_scan[i]);
4575 h-> field_scan[i] = T( field_scan[i]);
4579 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4580 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4581 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4582 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4583 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4586 for(i=0; i<64; i++){
4587 #define T(x) (x>>3) | ((x&7)<<3)
4588 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4589 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4590 h->field_scan8x8[i] = T(field_scan8x8[i]);
4591 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4595 if(h->sps.transform_bypass){ //FIXME same ugly
4596 h->zigzag_scan_q0 = zigzag_scan;
4597 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4598 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4599 h->field_scan_q0 = field_scan;
4600 h->field_scan8x8_q0 = field_scan8x8;
4601 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4603 h->zigzag_scan_q0 = h->zigzag_scan;
4604 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4605 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4606 h->field_scan_q0 = h->field_scan;
4607 h->field_scan8x8_q0 = h->field_scan8x8;
4608 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4613 s->avctx->width = s->width;
4614 s->avctx->height = s->height;
4615 s->avctx->sample_aspect_ratio= h->sps.sar;
4616 if(!s->avctx->sample_aspect_ratio.den)
4617 s->avctx->sample_aspect_ratio.den = 1;
4619 if(h->sps.timing_info_present_flag){
4620 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4621 if(h->x264_build > 0 && h->x264_build < 44)
4622 s->avctx->time_base.den *= 2;
4623 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4624 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4628 if(h->slice_num == 0){
4629 if(frame_start(h) < 0)
4633 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4634 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4637 h->mb_aff_frame = 0;
4638 if(h->sps.frame_mbs_only_flag){
4639 s->picture_structure= PICT_FRAME;
4641 if(get_bits1(&s->gb)) { //field_pic_flag
4642 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4643 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4645 s->picture_structure= PICT_FRAME;
4646 h->mb_aff_frame = h->sps.mb_aff;
4649 assert(s->mb_num == s->mb_width * s->mb_height);
4650 if(first_mb_in_slice << h->mb_aff_frame >= s->mb_num ||
4651 first_mb_in_slice >= s->mb_num){
4652 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4655 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4656 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4657 assert(s->mb_y < s->mb_height);
4659 if(s->picture_structure==PICT_FRAME){
4660 h->curr_pic_num= h->frame_num;
4661 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4663 h->curr_pic_num= 2*h->frame_num;
4664 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4667 if(h->nal_unit_type == NAL_IDR_SLICE){
4668 get_ue_golomb(&s->gb); /* idr_pic_id */
4671 if(h->sps.poc_type==0){
4672 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4674 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4675 h->delta_poc_bottom= get_se_golomb(&s->gb);
4679 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4680 h->delta_poc[0]= get_se_golomb(&s->gb);
4682 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4683 h->delta_poc[1]= get_se_golomb(&s->gb);
4688 if(h->pps.redundant_pic_cnt_present){
4689 h->redundant_pic_count= get_ue_golomb(&s->gb);
4692 //set defaults, might be overriden a few line later
4693 h->ref_count[0]= h->pps.ref_count[0];
4694 h->ref_count[1]= h->pps.ref_count[1];
4696 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4697 if(h->slice_type == B_TYPE){
4698 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4699 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4700 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4702 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4704 if(num_ref_idx_active_override_flag){
4705 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4706 if(h->slice_type==B_TYPE)
4707 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4709 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4710 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4711 h->ref_count[0]= h->ref_count[1]= 1;
4717 if(!default_ref_list_done){
4718 fill_default_ref_list(h);
4721 if(decode_ref_pic_list_reordering(h) < 0)
4724 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4725 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4726 pred_weight_table(h);
4727 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4728 implicit_weight_table(h);
4732 if(s->current_picture.reference)
4733 decode_ref_pic_marking(h);
4736 fill_mbaff_ref_list(h);
4738 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
4739 tmp = get_ue_golomb(&s->gb);
4741 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4744 h->cabac_init_idc= tmp;
4747 h->last_qscale_diff = 0;
4748 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4750 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4754 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4755 //FIXME qscale / qp ... stuff
4756 if(h->slice_type == SP_TYPE){
4757 get_bits1(&s->gb); /* sp_for_switch_flag */
4759 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4760 get_se_golomb(&s->gb); /* slice_qs_delta */
4763 h->deblocking_filter = 1;
4764 h->slice_alpha_c0_offset = 0;
4765 h->slice_beta_offset = 0;
4766 if( h->pps.deblocking_filter_parameters_present ) {
4767 tmp= get_ue_golomb(&s->gb);
4769 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4772 h->deblocking_filter= tmp;
4773 if(h->deblocking_filter < 2)
4774 h->deblocking_filter^= 1; // 1<->0
4776 if( h->deblocking_filter ) {
4777 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4778 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4781 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4782 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4783 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4784 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4785 h->deblocking_filter= 0;
4788 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4789 slice_group_change_cycle= get_bits(&s->gb, ?);
4794 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4795 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4797 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4798 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4800 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4802 av_get_pict_type_char(h->slice_type),
4803 pps_id, h->frame_num,
4804 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4805 h->ref_count[0], h->ref_count[1],
4807 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4809 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4813 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
4814 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4815 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4817 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4818 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4827 static inline int get_level_prefix(GetBitContext *gb){
4831 OPEN_READER(re, gb);
4832 UPDATE_CACHE(re, gb);
4833 buf=GET_CACHE(re, gb);
4835 log= 32 - av_log2(buf);
4837 print_bin(buf>>(32-log), log);
4838 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4841 LAST_SKIP_BITS(re, gb, log);
4842 CLOSE_READER(re, gb);
4847 static inline int get_dct8x8_allowed(H264Context *h){
4850 if(!IS_SUB_8X8(h->sub_mb_type[i])
4851 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4858 * decodes a residual block.
4859 * @param n block index
4860 * @param scantable scantable
4861 * @param max_coeff number of coefficients in the block
4862 * @return <0 if an error occured
4864 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4865 MpegEncContext * const s = &h->s;
4866 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4868 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4870 //FIXME put trailing_onex into the context
4872 if(n == CHROMA_DC_BLOCK_INDEX){
4873 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4874 total_coeff= coeff_token>>2;
4876 if(n == LUMA_DC_BLOCK_INDEX){
4877 total_coeff= pred_non_zero_count(h, 0);
4878 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4879 total_coeff= coeff_token>>2;
4881 total_coeff= pred_non_zero_count(h, n);
4882 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4883 total_coeff= coeff_token>>2;
4884 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4888 //FIXME set last_non_zero?
4893 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff<0)\n", s->mb_x, s->mb_y);
4897 trailing_ones= coeff_token&3;
4898 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4899 assert(total_coeff<=16);
4901 for(i=0; i<trailing_ones; i++){
4902 level[i]= 1 - 2*get_bits1(gb);
4906 int level_code, mask;
4907 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4908 int prefix= get_level_prefix(gb);
4910 //first coefficient has suffix_length equal to 0 or 1
4911 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4913 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4915 level_code= (prefix<<suffix_length); //part
4916 }else if(prefix==14){
4918 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4920 level_code= prefix + get_bits(gb, 4); //part
4921 }else if(prefix==15){
4922 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4923 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4925 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4929 if(trailing_ones < 3) level_code += 2;
4934 mask= -(level_code&1);
4935 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4938 //remaining coefficients have suffix_length > 0
4939 for(;i<total_coeff;i++) {
4940 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4941 prefix = get_level_prefix(gb);
4943 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4944 }else if(prefix==15){
4945 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4947 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4950 mask= -(level_code&1);
4951 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4952 if(level_code > suffix_limit[suffix_length])
4957 if(total_coeff == max_coeff)
4960 if(n == CHROMA_DC_BLOCK_INDEX)
4961 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4963 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4966 coeff_num = zeros_left + total_coeff - 1;
4967 j = scantable[coeff_num];
4969 block[j] = level[0];
4970 for(i=1;i<total_coeff;i++) {
4973 else if(zeros_left < 7){
4974 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4976 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4978 zeros_left -= run_before;
4979 coeff_num -= 1 + run_before;
4980 j= scantable[ coeff_num ];
4985 block[j] = (level[0] * qmul[j] + 32)>>6;
4986 for(i=1;i<total_coeff;i++) {
4989 else if(zeros_left < 7){
4990 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4992 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4994 zeros_left -= run_before;
4995 coeff_num -= 1 + run_before;
4996 j= scantable[ coeff_num ];
4998 block[j]= (level[i] * qmul[j] + 32)>>6;
5003 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
5010 static void predict_field_decoding_flag(H264Context *h){
5011 MpegEncContext * const s = &h->s;
5012 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5013 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
5014 ? s->current_picture.mb_type[mb_xy-1]
5015 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
5016 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
5018 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
5022 * decodes a P_SKIP or B_SKIP macroblock
5024 static void decode_mb_skip(H264Context *h){
5025 MpegEncContext * const s = &h->s;
5026 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5029 memset(h->non_zero_count[mb_xy], 0, 16);
5030 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
5033 mb_type|= MB_TYPE_INTERLACED;
5035 if( h->slice_type == B_TYPE )
5037 // just for fill_caches. pred_direct_motion will set the real mb_type
5038 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
5040 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5041 pred_direct_motion(h, &mb_type);
5042 mb_type|= MB_TYPE_SKIP;
5047 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
5049 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5050 pred_pskip_motion(h, &mx, &my);
5051 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
5052 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
5055 write_back_motion(h, mb_type);
5056 s->current_picture.mb_type[mb_xy]= mb_type;
5057 s->current_picture.qscale_table[mb_xy]= s->qscale;
5058 h->slice_table[ mb_xy ]= h->slice_num;
5059 h->prev_mb_skipped= 1;
5063 * decodes a macroblock
5064 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5066 static int decode_mb_cavlc(H264Context *h){
5067 MpegEncContext * const s = &h->s;
5068 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5069 int partition_count;
5070 unsigned int mb_type, cbp;
5071 int dct8x8_allowed= h->pps.transform_8x8_mode;
5073 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
5075 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5076 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
5078 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
5079 if(s->mb_skip_run==-1)
5080 s->mb_skip_run= get_ue_golomb(&s->gb);
5082 if (s->mb_skip_run--) {
5083 if(FRAME_MBAFF && (s->mb_y&1) == 0){
5084 if(s->mb_skip_run==0)
5085 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5087 predict_field_decoding_flag(h);
5094 if( (s->mb_y&1) == 0 )
5095 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5097 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5099 h->prev_mb_skipped= 0;
5101 mb_type= get_ue_golomb(&s->gb);
5102 if(h->slice_type == B_TYPE){
5104 partition_count= b_mb_type_info[mb_type].partition_count;
5105 mb_type= b_mb_type_info[mb_type].type;
5108 goto decode_intra_mb;
5110 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
5112 partition_count= p_mb_type_info[mb_type].partition_count;
5113 mb_type= p_mb_type_info[mb_type].type;
5116 goto decode_intra_mb;
5119 assert(h->slice_type == I_TYPE);
5122 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
5126 cbp= i_mb_type_info[mb_type].cbp;
5127 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5128 mb_type= i_mb_type_info[mb_type].type;
5132 mb_type |= MB_TYPE_INTERLACED;
5134 h->slice_table[ mb_xy ]= h->slice_num;
5136 if(IS_INTRA_PCM(mb_type)){
5139 // we assume these blocks are very rare so we dont optimize it
5140 align_get_bits(&s->gb);
5142 // The pixels are stored in the same order as levels in h->mb array.
5143 for(y=0; y<16; y++){
5144 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5145 for(x=0; x<16; x++){
5146 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5147 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
5151 const int index= 256 + 4*(y&3) + 32*(y>>2);
5153 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5154 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5158 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5160 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5161 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5165 // In deblocking, the quantizer is 0
5166 s->current_picture.qscale_table[mb_xy]= 0;
5167 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5168 // All coeffs are present
5169 memset(h->non_zero_count[mb_xy], 16, 16);
5171 s->current_picture.mb_type[mb_xy]= mb_type;
5176 h->ref_count[0] <<= 1;
5177 h->ref_count[1] <<= 1;
5180 fill_caches(h, mb_type, 0);
5183 if(IS_INTRA(mb_type)){
5185 // init_top_left_availability(h);
5186 if(IS_INTRA4x4(mb_type)){
5189 if(dct8x8_allowed && get_bits1(&s->gb)){
5190 mb_type |= MB_TYPE_8x8DCT;
5194 // fill_intra4x4_pred_table(h);
5195 for(i=0; i<16; i+=di){
5196 int mode= pred_intra_mode(h, i);
5198 if(!get_bits1(&s->gb)){
5199 const int rem_mode= get_bits(&s->gb, 3);
5200 mode = rem_mode + (rem_mode >= mode);
5204 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5206 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
5208 write_back_intra_pred_mode(h);
5209 if( check_intra4x4_pred_mode(h) < 0)
5212 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
5213 if(h->intra16x16_pred_mode < 0)
5217 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
5220 h->chroma_pred_mode= pred_mode;
5221 }else if(partition_count==4){
5222 int i, j, sub_partition_count[4], list, ref[2][4];
5224 if(h->slice_type == B_TYPE){
5226 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5227 if(h->sub_mb_type[i] >=13){
5228 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5231 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5232 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5234 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5235 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5236 pred_direct_motion(h, &mb_type);
5237 h->ref_cache[0][scan8[4]] =
5238 h->ref_cache[1][scan8[4]] =
5239 h->ref_cache[0][scan8[12]] =
5240 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5243 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
5245 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5246 if(h->sub_mb_type[i] >=4){
5247 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5250 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5251 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5255 for(list=0; list<2; list++){
5256 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5257 if(ref_count == 0) continue;
5259 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5260 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5261 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
5263 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
5275 dct8x8_allowed = get_dct8x8_allowed(h);
5277 for(list=0; list<2; list++){
5278 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5279 if(ref_count == 0) continue;
5282 if(IS_DIRECT(h->sub_mb_type[i])) {
5283 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
5286 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
5287 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5289 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5290 const int sub_mb_type= h->sub_mb_type[i];
5291 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5292 for(j=0; j<sub_partition_count[i]; j++){
5294 const int index= 4*i + block_width*j;
5295 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5296 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
5297 mx += get_se_golomb(&s->gb);
5298 my += get_se_golomb(&s->gb);
5299 tprintf("final mv:%d %d\n", mx, my);
5301 if(IS_SUB_8X8(sub_mb_type)){
5302 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5303 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5304 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5305 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5306 }else if(IS_SUB_8X4(sub_mb_type)){
5307 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5308 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5309 }else if(IS_SUB_4X8(sub_mb_type)){
5310 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5311 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5313 assert(IS_SUB_4X4(sub_mb_type));
5314 mv_cache[ 0 ][0]= mx;
5315 mv_cache[ 0 ][1]= my;
5319 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5325 }else if(IS_DIRECT(mb_type)){
5326 pred_direct_motion(h, &mb_type);
5327 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5329 int list, mx, my, i;
5330 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5331 if(IS_16X16(mb_type)){
5332 for(list=0; list<2; list++){
5333 if(h->ref_count[list]>0){
5334 if(IS_DIR(mb_type, 0, list)){
5335 unsigned int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5336 if(val >= h->ref_count[list]){
5337 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5340 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5342 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
5345 for(list=0; list<2; list++){
5346 if(IS_DIR(mb_type, 0, list)){
5347 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5348 mx += get_se_golomb(&s->gb);
5349 my += get_se_golomb(&s->gb);
5350 tprintf("final mv:%d %d\n", mx, my);
5352 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5354 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5357 else if(IS_16X8(mb_type)){
5358 for(list=0; list<2; list++){
5359 if(h->ref_count[list]>0){
5361 if(IS_DIR(mb_type, i, list)){
5362 unsigned int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5363 if(val >= h->ref_count[list]){
5364 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5367 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5369 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5373 for(list=0; list<2; list++){
5375 if(IS_DIR(mb_type, i, list)){
5376 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5377 mx += get_se_golomb(&s->gb);
5378 my += get_se_golomb(&s->gb);
5379 tprintf("final mv:%d %d\n", mx, my);
5381 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5383 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5387 assert(IS_8X16(mb_type));
5388 for(list=0; list<2; list++){
5389 if(h->ref_count[list]>0){
5391 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5392 unsigned int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5393 if(val >= h->ref_count[list]){
5394 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5397 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5399 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5403 for(list=0; list<2; list++){
5405 if(IS_DIR(mb_type, i, list)){
5406 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5407 mx += get_se_golomb(&s->gb);
5408 my += get_se_golomb(&s->gb);
5409 tprintf("final mv:%d %d\n", mx, my);
5411 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5413 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5419 if(IS_INTER(mb_type))
5420 write_back_motion(h, mb_type);
5422 if(!IS_INTRA16x16(mb_type)){
5423 cbp= get_ue_golomb(&s->gb);
5425 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
5429 if(IS_INTRA4x4(mb_type))
5430 cbp= golomb_to_intra4x4_cbp[cbp];
5432 cbp= golomb_to_inter_cbp[cbp];
5436 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5437 if(get_bits1(&s->gb))
5438 mb_type |= MB_TYPE_8x8DCT;
5440 s->current_picture.mb_type[mb_xy]= mb_type;
5442 if(cbp || IS_INTRA16x16(mb_type)){
5443 int i8x8, i4x4, chroma_idx;
5444 int chroma_qp, dquant;
5445 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5446 const uint8_t *scan, *scan8x8, *dc_scan;
5448 // fill_non_zero_count_cache(h);
5450 if(IS_INTERLACED(mb_type)){
5451 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5452 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5453 dc_scan= luma_dc_field_scan;
5455 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5456 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5457 dc_scan= luma_dc_zigzag_scan;
5460 dquant= get_se_golomb(&s->gb);
5462 if( dquant > 25 || dquant < -26 ){
5463 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5467 s->qscale += dquant;
5468 if(((unsigned)s->qscale) > 51){
5469 if(s->qscale<0) s->qscale+= 52;
5470 else s->qscale-= 52;
5473 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5474 if(IS_INTRA16x16(mb_type)){
5475 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5476 return -1; //FIXME continue if partitioned and other return -1 too
5479 assert((cbp&15) == 0 || (cbp&15) == 15);
5482 for(i8x8=0; i8x8<4; i8x8++){
5483 for(i4x4=0; i4x4<4; i4x4++){
5484 const int index= i4x4 + 4*i8x8;
5485 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5491 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5494 for(i8x8=0; i8x8<4; i8x8++){
5495 if(cbp & (1<<i8x8)){
5496 if(IS_8x8DCT(mb_type)){
5497 DCTELEM *buf = &h->mb[64*i8x8];
5499 for(i4x4=0; i4x4<4; i4x4++){
5500 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5501 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5504 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5505 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5507 for(i4x4=0; i4x4<4; i4x4++){
5508 const int index= i4x4 + 4*i8x8;
5510 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5516 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5517 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5523 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5524 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5530 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5531 for(i4x4=0; i4x4<4; i4x4++){
5532 const int index= 16 + 4*chroma_idx + i4x4;
5533 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5539 uint8_t * const nnz= &h->non_zero_count_cache[0];
5540 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5541 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5544 uint8_t * const nnz= &h->non_zero_count_cache[0];
5545 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5546 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5547 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5549 s->current_picture.qscale_table[mb_xy]= s->qscale;
5550 write_back_non_zero_count(h);
5553 h->ref_count[0] >>= 1;
5554 h->ref_count[1] >>= 1;
5560 static int decode_cabac_field_decoding_flag(H264Context *h) {
5561 MpegEncContext * const s = &h->s;
5562 const int mb_x = s->mb_x;
5563 const int mb_y = s->mb_y & ~1;
5564 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5565 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5567 unsigned int ctx = 0;
5569 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5572 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5576 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5579 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5580 uint8_t *state= &h->cabac_state[ctx_base];
5584 MpegEncContext * const s = &h->s;
5585 const int mba_xy = h->left_mb_xy[0];
5586 const int mbb_xy = h->top_mb_xy;
5588 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5590 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5592 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5593 return 0; /* I4x4 */
5596 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5597 return 0; /* I4x4 */
5600 if( get_cabac_terminate( &h->cabac ) )
5601 return 25; /* PCM */
5603 mb_type = 1; /* I16x16 */
5604 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5605 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5606 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5607 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5608 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5612 static int decode_cabac_mb_type( H264Context *h ) {
5613 MpegEncContext * const s = &h->s;
5615 if( h->slice_type == I_TYPE ) {
5616 return decode_cabac_intra_mb_type(h, 3, 1);
5617 } else if( h->slice_type == P_TYPE ) {
5618 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5620 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5621 /* P_L0_D16x16, P_8x8 */
5622 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5624 /* P_L0_D8x16, P_L0_D16x8 */
5625 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5628 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5630 } else if( h->slice_type == B_TYPE ) {
5631 const int mba_xy = h->left_mb_xy[0];
5632 const int mbb_xy = h->top_mb_xy;
5636 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5638 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5641 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5642 return 0; /* B_Direct_16x16 */
5644 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5645 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5648 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5649 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5650 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5651 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5653 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5654 else if( bits == 13 ) {
5655 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5656 } else if( bits == 14 )
5657 return 11; /* B_L1_L0_8x16 */
5658 else if( bits == 15 )
5659 return 22; /* B_8x8 */
5661 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5662 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5664 /* TODO SI/SP frames? */
5669 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5670 MpegEncContext * const s = &h->s;
5674 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5675 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5678 && h->slice_table[mba_xy] == h->slice_num
5679 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5680 mba_xy += s->mb_stride;
5682 mbb_xy = mb_xy - s->mb_stride;
5684 && h->slice_table[mbb_xy] == h->slice_num
5685 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5686 mbb_xy -= s->mb_stride;
5688 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5690 int mb_xy = mb_x + mb_y*s->mb_stride;
5692 mbb_xy = mb_xy - s->mb_stride;
5695 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5697 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5700 if( h->slice_type == B_TYPE )
5702 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5705 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5708 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5711 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5712 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5713 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5715 if( mode >= pred_mode )
5721 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5722 const int mba_xy = h->left_mb_xy[0];
5723 const int mbb_xy = h->top_mb_xy;
5727 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5728 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5731 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5734 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5737 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5739 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5745 static const uint8_t block_idx_x[16] = {
5746 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5748 static const uint8_t block_idx_y[16] = {
5749 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5751 static const uint8_t block_idx_xy[4][4] = {
5758 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5763 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5765 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5768 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5773 x = block_idx_x[4*i8x8];
5774 y = block_idx_y[4*i8x8];
5778 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5779 cbp_a = h->left_cbp;
5780 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5786 /* No need to test for skip as we put 0 for skip block */
5787 /* No need to test for IPCM as we put 1 for IPCM block */
5789 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5790 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5795 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5796 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5800 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5806 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5810 cbp_a = (h->left_cbp>>4)&0x03;
5811 cbp_b = (h-> top_cbp>>4)&0x03;
5814 if( cbp_a > 0 ) ctx++;
5815 if( cbp_b > 0 ) ctx += 2;
5816 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5820 if( cbp_a == 2 ) ctx++;
5821 if( cbp_b == 2 ) ctx += 2;
5822 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5824 static int decode_cabac_mb_dqp( H264Context *h) {
5825 MpegEncContext * const s = &h->s;
5831 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5833 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5835 if( h->last_qscale_diff != 0 )
5838 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5844 if(val > 102) //prevent infinite loop
5851 return -(val + 1)/2;
5853 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5854 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5856 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5858 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5862 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5864 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5865 return 0; /* B_Direct_8x8 */
5866 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5867 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5869 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5870 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5871 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5874 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5875 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5879 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5880 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5883 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5884 int refa = h->ref_cache[list][scan8[n] - 1];
5885 int refb = h->ref_cache[list][scan8[n] - 8];
5889 if( h->slice_type == B_TYPE) {
5890 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5892 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5901 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5907 if(ref >= 32 /*h->ref_list[list]*/){
5908 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5909 return 0; //FIXME we should return -1 and check the return everywhere
5915 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5916 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5917 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5918 int ctxbase = (l == 0) ? 40 : 47;
5923 else if( amvd > 32 )
5928 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5933 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5941 while( get_cabac_bypass( &h->cabac ) ) {
5945 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5950 if( get_cabac_bypass( &h->cabac ) )
5954 return get_cabac_bypass_sign( &h->cabac, -mvd );
5957 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5962 nza = h->left_cbp&0x100;
5963 nzb = h-> top_cbp&0x100;
5964 } else if( cat == 1 || cat == 2 ) {
5965 nza = h->non_zero_count_cache[scan8[idx] - 1];
5966 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5967 } else if( cat == 3 ) {
5968 nza = (h->left_cbp>>(6+idx))&0x01;
5969 nzb = (h-> top_cbp>>(6+idx))&0x01;
5972 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5973 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5982 return ctx + 4 * cat;
5985 static const __attribute((used)) uint8_t last_coeff_flag_offset_8x8[63] = {
5986 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5987 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5988 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5989 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5992 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5993 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5994 static const int significant_coeff_flag_offset[2][6] = {
5995 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5996 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5998 static const int last_coeff_flag_offset[2][6] = {
5999 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
6000 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
6002 static const int coeff_abs_level_m1_offset[6] = {
6003 227+0, 227+10, 227+20, 227+30, 227+39, 426
6005 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
6006 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
6007 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
6008 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
6009 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
6010 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
6011 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
6012 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
6013 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
6019 int coeff_count = 0;
6022 int abslevelgt1 = 0;
6024 uint8_t *significant_coeff_ctx_base;
6025 uint8_t *last_coeff_ctx_base;
6026 uint8_t *abs_level_m1_ctx_base;
6029 #define CABAC_ON_STACK
6031 #ifdef CABAC_ON_STACK
6034 cc.range = h->cabac.range;
6035 cc.low = h->cabac.low;
6036 cc.bytestream= h->cabac.bytestream;
6038 #define CC &h->cabac
6042 /* cat: 0-> DC 16x16 n = 0
6043 * 1-> AC 16x16 n = luma4x4idx
6044 * 2-> Luma4x4 n = luma4x4idx
6045 * 3-> DC Chroma n = iCbCr
6046 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
6047 * 5-> Luma8x8 n = 4 * luma8x8idx
6050 /* read coded block flag */
6052 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
6053 if( cat == 1 || cat == 2 )
6054 h->non_zero_count_cache[scan8[n]] = 0;
6056 h->non_zero_count_cache[scan8[16+n]] = 0;
6057 #ifdef CABAC_ON_STACK
6058 h->cabac.range = cc.range ;
6059 h->cabac.low = cc.low ;
6060 h->cabac.bytestream= cc.bytestream;
6066 significant_coeff_ctx_base = h->cabac_state
6067 + significant_coeff_flag_offset[MB_FIELD][cat];
6068 last_coeff_ctx_base = h->cabac_state
6069 + last_coeff_flag_offset[MB_FIELD][cat];
6070 abs_level_m1_ctx_base = h->cabac_state
6071 + coeff_abs_level_m1_offset[cat];
6074 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
6075 for(last= 0; last < coefs; last++) { \
6076 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
6077 if( get_cabac( CC, sig_ctx )) { \
6078 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
6079 index[coeff_count++] = last; \
6080 if( get_cabac( CC, last_ctx ) ) { \
6086 if( last == max_coeff -1 ) {\
6087 index[coeff_count++] = last;\
6089 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
6090 #if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
6091 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
6093 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
6095 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
6097 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
6100 assert(coeff_count > 0);
6103 h->cbp_table[mb_xy] |= 0x100;
6104 else if( cat == 1 || cat == 2 )
6105 h->non_zero_count_cache[scan8[n]] = coeff_count;
6107 h->cbp_table[mb_xy] |= 0x40 << n;
6109 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
6112 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
6115 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
6116 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
6117 int j= scantable[index[coeff_count]];
6119 if( get_cabac( CC, ctx ) == 0 ) {
6121 block[j] = get_cabac_bypass_sign( CC, -1);
6123 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
6129 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
6130 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
6134 if( coeff_abs >= 15 ) {
6136 while( get_cabac_bypass( CC ) ) {
6142 coeff_abs += coeff_abs + get_cabac_bypass( CC );
6148 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
6149 else block[j] = coeff_abs;
6151 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
6152 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
6158 #ifdef CABAC_ON_STACK
6159 h->cabac.range = cc.range ;
6160 h->cabac.low = cc.low ;
6161 h->cabac.bytestream= cc.bytestream;
6166 static void inline compute_mb_neighbors(H264Context *h)
6168 MpegEncContext * const s = &h->s;
6169 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
6170 h->top_mb_xy = mb_xy - s->mb_stride;
6171 h->left_mb_xy[0] = mb_xy - 1;
6173 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
6174 const int top_pair_xy = pair_xy - s->mb_stride;
6175 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
6176 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
6177 const int curr_mb_frame_flag = !MB_FIELD;
6178 const int bottom = (s->mb_y & 1);
6180 ? !curr_mb_frame_flag // bottom macroblock
6181 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
6183 h->top_mb_xy -= s->mb_stride;
6185 if (left_mb_frame_flag != curr_mb_frame_flag) {
6186 h->left_mb_xy[0] = pair_xy - 1;
6193 * decodes a macroblock
6194 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
6196 static int decode_mb_cabac(H264Context *h) {
6197 MpegEncContext * const s = &h->s;
6198 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
6199 int mb_type, partition_count, cbp = 0;
6200 int dct8x8_allowed= h->pps.transform_8x8_mode;
6202 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
6204 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
6205 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
6207 /* a skipped mb needs the aff flag from the following mb */
6208 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
6209 predict_field_decoding_flag(h);
6210 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
6211 skip = h->next_mb_skipped;
6213 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
6214 /* read skip flags */
6216 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
6217 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
6218 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
6219 if(h->next_mb_skipped)
6220 predict_field_decoding_flag(h);
6222 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6227 h->cbp_table[mb_xy] = 0;
6228 h->chroma_pred_mode_table[mb_xy] = 0;
6229 h->last_qscale_diff = 0;
6236 if( (s->mb_y&1) == 0 )
6238 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6240 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
6242 h->prev_mb_skipped = 0;
6244 compute_mb_neighbors(h);
6245 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
6246 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
6250 if( h->slice_type == B_TYPE ) {
6252 partition_count= b_mb_type_info[mb_type].partition_count;
6253 mb_type= b_mb_type_info[mb_type].type;
6256 goto decode_intra_mb;
6258 } else if( h->slice_type == P_TYPE ) {
6260 partition_count= p_mb_type_info[mb_type].partition_count;
6261 mb_type= p_mb_type_info[mb_type].type;
6264 goto decode_intra_mb;
6267 assert(h->slice_type == I_TYPE);
6269 partition_count = 0;
6270 cbp= i_mb_type_info[mb_type].cbp;
6271 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
6272 mb_type= i_mb_type_info[mb_type].type;
6275 mb_type |= MB_TYPE_INTERLACED;
6277 h->slice_table[ mb_xy ]= h->slice_num;
6279 if(IS_INTRA_PCM(mb_type)) {
6283 // We assume these blocks are very rare so we dont optimize it.
6284 // FIXME The two following lines get the bitstream position in the cabac
6285 // decode, I think it should be done by a function in cabac.h (or cabac.c).
6286 ptr= h->cabac.bytestream;
6287 if(h->cabac.low&0x1) ptr--;
6289 if(h->cabac.low&0x1FF) ptr--;
6292 // The pixels are stored in the same order as levels in h->mb array.
6293 for(y=0; y<16; y++){
6294 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
6295 for(x=0; x<16; x++){
6296 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
6297 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
6301 const int index= 256 + 4*(y&3) + 32*(y>>2);
6303 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
6304 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6308 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
6310 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
6311 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6315 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6317 // All blocks are present
6318 h->cbp_table[mb_xy] = 0x1ef;
6319 h->chroma_pred_mode_table[mb_xy] = 0;
6320 // In deblocking, the quantizer is 0
6321 s->current_picture.qscale_table[mb_xy]= 0;
6322 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
6323 // All coeffs are present
6324 memset(h->non_zero_count[mb_xy], 16, 16);
6325 s->current_picture.mb_type[mb_xy]= mb_type;
6330 h->ref_count[0] <<= 1;
6331 h->ref_count[1] <<= 1;
6334 fill_caches(h, mb_type, 0);
6336 if( IS_INTRA( mb_type ) ) {
6338 if( IS_INTRA4x4( mb_type ) ) {
6339 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6340 mb_type |= MB_TYPE_8x8DCT;
6341 for( i = 0; i < 16; i+=4 ) {
6342 int pred = pred_intra_mode( h, i );
6343 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6344 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6347 for( i = 0; i < 16; i++ ) {
6348 int pred = pred_intra_mode( h, i );
6349 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6351 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6354 write_back_intra_pred_mode(h);
6355 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6357 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6358 if( h->intra16x16_pred_mode < 0 ) return -1;
6360 h->chroma_pred_mode_table[mb_xy] =
6361 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6363 pred_mode= check_intra_pred_mode( h, pred_mode );
6364 if( pred_mode < 0 ) return -1;
6365 h->chroma_pred_mode= pred_mode;
6366 } else if( partition_count == 4 ) {
6367 int i, j, sub_partition_count[4], list, ref[2][4];
6369 if( h->slice_type == B_TYPE ) {
6370 for( i = 0; i < 4; i++ ) {
6371 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6372 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6373 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6375 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
6376 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
6377 pred_direct_motion(h, &mb_type);
6378 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6379 for( i = 0; i < 4; i++ )
6380 if( IS_DIRECT(h->sub_mb_type[i]) )
6381 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6385 for( i = 0; i < 4; i++ ) {
6386 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6387 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6388 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6392 for( list = 0; list < 2; list++ ) {
6393 if( h->ref_count[list] > 0 ) {
6394 for( i = 0; i < 4; i++ ) {
6395 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6396 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6397 if( h->ref_count[list] > 1 )
6398 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6404 h->ref_cache[list][ scan8[4*i]+1 ]=
6405 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6411 dct8x8_allowed = get_dct8x8_allowed(h);
6413 for(list=0; list<2; list++){
6415 if(IS_DIRECT(h->sub_mb_type[i])){
6416 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6419 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6421 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6422 const int sub_mb_type= h->sub_mb_type[i];
6423 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6424 for(j=0; j<sub_partition_count[i]; j++){
6427 const int index= 4*i + block_width*j;
6428 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6429 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6430 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6432 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6433 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6434 tprintf("final mv:%d %d\n", mx, my);
6436 if(IS_SUB_8X8(sub_mb_type)){
6437 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
6438 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6439 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
6440 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6442 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
6443 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6444 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
6445 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6446 }else if(IS_SUB_8X4(sub_mb_type)){
6447 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
6448 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
6450 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
6451 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
6452 }else if(IS_SUB_4X8(sub_mb_type)){
6453 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
6454 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
6456 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
6457 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
6459 assert(IS_SUB_4X4(sub_mb_type));
6460 mv_cache[ 0 ][0]= mx;
6461 mv_cache[ 0 ][1]= my;
6463 mvd_cache[ 0 ][0]= mx - mpx;
6464 mvd_cache[ 0 ][1]= my - mpy;
6468 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6469 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6470 p[0] = p[1] = p[8] = p[9] = 0;
6471 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6475 } else if( IS_DIRECT(mb_type) ) {
6476 pred_direct_motion(h, &mb_type);
6477 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6478 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6479 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6481 int list, mx, my, i, mpx, mpy;
6482 if(IS_16X16(mb_type)){
6483 for(list=0; list<2; list++){
6484 if(IS_DIR(mb_type, 0, list)){
6485 if(h->ref_count[list] > 0 ){
6486 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6487 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6490 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6492 for(list=0; list<2; list++){
6493 if(IS_DIR(mb_type, 0, list)){
6494 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6496 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6497 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6498 tprintf("final mv:%d %d\n", mx, my);
6500 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6501 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6503 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6506 else if(IS_16X8(mb_type)){
6507 for(list=0; list<2; list++){
6508 if(h->ref_count[list]>0){
6510 if(IS_DIR(mb_type, i, list)){
6511 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6512 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6514 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6518 for(list=0; list<2; list++){
6520 if(IS_DIR(mb_type, i, list)){
6521 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6522 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6523 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6524 tprintf("final mv:%d %d\n", mx, my);
6526 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6527 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6529 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6530 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6535 assert(IS_8X16(mb_type));
6536 for(list=0; list<2; list++){
6537 if(h->ref_count[list]>0){
6539 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6540 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6541 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6543 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6547 for(list=0; list<2; list++){
6549 if(IS_DIR(mb_type, i, list)){
6550 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6551 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6552 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6554 tprintf("final mv:%d %d\n", mx, my);
6555 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6556 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6558 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6559 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6566 if( IS_INTER( mb_type ) ) {
6567 h->chroma_pred_mode_table[mb_xy] = 0;
6568 write_back_motion( h, mb_type );
6571 if( !IS_INTRA16x16( mb_type ) ) {
6572 cbp = decode_cabac_mb_cbp_luma( h );
6573 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6576 h->cbp_table[mb_xy] = h->cbp = cbp;
6578 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6579 if( decode_cabac_mb_transform_size( h ) )
6580 mb_type |= MB_TYPE_8x8DCT;
6582 s->current_picture.mb_type[mb_xy]= mb_type;
6584 if( cbp || IS_INTRA16x16( mb_type ) ) {
6585 const uint8_t *scan, *scan8x8, *dc_scan;
6588 if(IS_INTERLACED(mb_type)){
6589 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6590 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6591 dc_scan= luma_dc_field_scan;
6593 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6594 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6595 dc_scan= luma_dc_zigzag_scan;
6598 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6599 if( dqp == INT_MIN ){
6600 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6604 if(((unsigned)s->qscale) > 51){
6605 if(s->qscale<0) s->qscale+= 52;
6606 else s->qscale-= 52;
6608 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6610 if( IS_INTRA16x16( mb_type ) ) {
6612 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6613 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6616 for( i = 0; i < 16; i++ ) {
6617 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6618 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6622 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6626 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6627 if( cbp & (1<<i8x8) ) {
6628 if( IS_8x8DCT(mb_type) ) {
6629 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6630 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6633 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6634 const int index = 4*i8x8 + i4x4;
6635 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6637 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6639 //STOP_TIMER("decode_residual")
6642 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6643 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6650 for( c = 0; c < 2; c++ ) {
6651 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6652 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6659 for( c = 0; c < 2; c++ ) {
6660 for( i = 0; i < 4; i++ ) {
6661 const int index = 16 + 4 * c + i;
6662 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6663 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6668 uint8_t * const nnz= &h->non_zero_count_cache[0];
6669 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6670 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6673 uint8_t * const nnz= &h->non_zero_count_cache[0];
6674 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6675 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6676 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6677 h->last_qscale_diff = 0;
6680 s->current_picture.qscale_table[mb_xy]= s->qscale;
6681 write_back_non_zero_count(h);
6684 h->ref_count[0] >>= 1;
6685 h->ref_count[1] >>= 1;
6692 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6694 const int index_a = qp + h->slice_alpha_c0_offset;
6695 const int alpha = (alpha_table+52)[index_a];
6696 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6701 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6702 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6704 /* 16px edge length, because bS=4 is triggered by being at
6705 * the edge of an intra MB, so all 4 bS are the same */
6706 for( d = 0; d < 16; d++ ) {
6707 const int p0 = pix[-1];
6708 const int p1 = pix[-2];
6709 const int p2 = pix[-3];
6711 const int q0 = pix[0];
6712 const int q1 = pix[1];
6713 const int q2 = pix[2];
6715 if( FFABS( p0 - q0 ) < alpha &&
6716 FFABS( p1 - p0 ) < beta &&
6717 FFABS( q1 - q0 ) < beta ) {
6719 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6720 if( FFABS( p2 - p0 ) < beta)
6722 const int p3 = pix[-4];
6724 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6725 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6726 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6729 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6731 if( FFABS( q2 - q0 ) < beta)
6733 const int q3 = pix[3];
6735 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6736 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6737 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6740 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6744 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6745 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6747 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6753 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6755 const int index_a = qp + h->slice_alpha_c0_offset;
6756 const int alpha = (alpha_table+52)[index_a];
6757 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6762 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6763 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6765 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6769 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6771 for( i = 0; i < 16; i++, pix += stride) {
6777 int bS_index = (i >> 1);
6780 bS_index |= (i & 1);
6783 if( bS[bS_index] == 0 ) {
6787 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6788 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6789 alpha = (alpha_table+52)[index_a];
6790 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6792 if( bS[bS_index] < 4 ) {
6793 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6794 const int p0 = pix[-1];
6795 const int p1 = pix[-2];
6796 const int p2 = pix[-3];
6797 const int q0 = pix[0];
6798 const int q1 = pix[1];
6799 const int q2 = pix[2];
6801 if( FFABS( p0 - q0 ) < alpha &&
6802 FFABS( p1 - p0 ) < beta &&
6803 FFABS( q1 - q0 ) < beta ) {
6807 if( FFABS( p2 - p0 ) < beta ) {
6808 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6811 if( FFABS( q2 - q0 ) < beta ) {
6812 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6816 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6817 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6818 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6819 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6822 const int p0 = pix[-1];
6823 const int p1 = pix[-2];
6824 const int p2 = pix[-3];
6826 const int q0 = pix[0];
6827 const int q1 = pix[1];
6828 const int q2 = pix[2];
6830 if( FFABS( p0 - q0 ) < alpha &&
6831 FFABS( p1 - p0 ) < beta &&
6832 FFABS( q1 - q0 ) < beta ) {
6834 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6835 if( FFABS( p2 - p0 ) < beta)
6837 const int p3 = pix[-4];
6839 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6840 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6841 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6844 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6846 if( FFABS( q2 - q0 ) < beta)
6848 const int q3 = pix[3];
6850 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6851 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6852 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6855 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6859 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6860 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6862 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6867 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6869 for( i = 0; i < 8; i++, pix += stride) {
6877 if( bS[bS_index] == 0 ) {
6881 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6882 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6883 alpha = (alpha_table+52)[index_a];
6884 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6886 if( bS[bS_index] < 4 ) {
6887 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6888 const int p0 = pix[-1];
6889 const int p1 = pix[-2];
6890 const int q0 = pix[0];
6891 const int q1 = pix[1];
6893 if( FFABS( p0 - q0 ) < alpha &&
6894 FFABS( p1 - p0 ) < beta &&
6895 FFABS( q1 - q0 ) < beta ) {
6896 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6898 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6899 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6900 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6903 const int p0 = pix[-1];
6904 const int p1 = pix[-2];
6905 const int q0 = pix[0];
6906 const int q1 = pix[1];
6908 if( FFABS( p0 - q0 ) < alpha &&
6909 FFABS( p1 - p0 ) < beta &&
6910 FFABS( q1 - q0 ) < beta ) {
6912 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6913 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6914 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6920 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6922 const int index_a = qp + h->slice_alpha_c0_offset;
6923 const int alpha = (alpha_table+52)[index_a];
6924 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6925 const int pix_next = stride;
6930 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6931 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6933 /* 16px edge length, see filter_mb_edgev */
6934 for( d = 0; d < 16; d++ ) {
6935 const int p0 = pix[-1*pix_next];
6936 const int p1 = pix[-2*pix_next];
6937 const int p2 = pix[-3*pix_next];
6938 const int q0 = pix[0];
6939 const int q1 = pix[1*pix_next];
6940 const int q2 = pix[2*pix_next];
6942 if( FFABS( p0 - q0 ) < alpha &&
6943 FFABS( p1 - p0 ) < beta &&
6944 FFABS( q1 - q0 ) < beta ) {
6946 const int p3 = pix[-4*pix_next];
6947 const int q3 = pix[ 3*pix_next];
6949 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6950 if( FFABS( p2 - p0 ) < beta) {
6952 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6953 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6954 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6957 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6959 if( FFABS( q2 - q0 ) < beta) {
6961 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6962 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6963 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6966 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6970 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6971 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6973 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6980 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6982 const int index_a = qp + h->slice_alpha_c0_offset;
6983 const int alpha = (alpha_table+52)[index_a];
6984 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6989 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6990 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6992 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6996 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6997 MpegEncContext * const s = &h->s;
6999 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
7001 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) {
7002 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
7005 assert(!FRAME_MBAFF);
7007 mb_xy = mb_x + mb_y*s->mb_stride;
7008 mb_type = s->current_picture.mb_type[mb_xy];
7009 qp = s->current_picture.qscale_table[mb_xy];
7010 qp0 = s->current_picture.qscale_table[mb_xy-1];
7011 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
7012 qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
7013 qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
7014 qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
7015 qp0 = (qp + qp0 + 1) >> 1;
7016 qp1 = (qp + qp1 + 1) >> 1;
7017 qpc0 = (qpc + qpc0 + 1) >> 1;
7018 qpc1 = (qpc + qpc1 + 1) >> 1;
7019 qp_thresh = 15 - h->slice_alpha_c0_offset;
7020 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
7021 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
7024 if( IS_INTRA(mb_type) ) {
7025 int16_t bS4[4] = {4,4,4,4};
7026 int16_t bS3[4] = {3,3,3,3};
7027 if( IS_8x8DCT(mb_type) ) {
7028 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7029 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7030 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7031 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7033 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7034 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
7035 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7036 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
7037 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7038 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
7039 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7040 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
7042 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
7043 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
7044 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
7045 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
7046 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7047 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
7048 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7049 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
7052 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
7053 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
7055 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
7057 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
7059 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
7060 (mb_type & MB_TYPE_16x8) ? 1 : 0;
7061 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
7062 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
7064 int step = IS_8x8DCT(mb_type) ? 2 : 1;
7065 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
7066 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
7067 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
7069 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
7070 bSv[0][0] = 0x0004000400040004ULL;
7071 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
7072 bSv[1][0] = 0x0004000400040004ULL;
7074 #define FILTER(hv,dir,edge)\
7075 if(bSv[dir][edge]) {\
7076 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
7078 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7079 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7085 } else if( IS_8x8DCT(mb_type) ) {
7104 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7105 MpegEncContext * const s = &h->s;
7106 const int mb_xy= mb_x + mb_y*s->mb_stride;
7107 const int mb_type = s->current_picture.mb_type[mb_xy];
7108 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
7109 int first_vertical_edge_done = 0;
7111 /* FIXME: A given frame may occupy more than one position in
7112 * the reference list. So ref2frm should be populated with
7113 * frame numbers, not indices. */
7114 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
7115 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7117 //for sufficiently low qp, filtering wouldn't do anything
7118 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
7120 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
7121 int qp = s->current_picture.qscale_table[mb_xy];
7123 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
7124 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
7130 // left mb is in picture
7131 && h->slice_table[mb_xy-1] != 255
7132 // and current and left pair do not have the same interlaced type
7133 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
7134 // and left mb is in the same slice if deblocking_filter == 2
7135 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
7136 /* First vertical edge is different in MBAFF frames
7137 * There are 8 different bS to compute and 2 different Qp
7139 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
7140 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
7144 int mb_qp, mbn0_qp, mbn1_qp;
7146 first_vertical_edge_done = 1;
7148 if( IS_INTRA(mb_type) )
7149 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
7151 for( i = 0; i < 8; i++ ) {
7152 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
7154 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
7156 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
7157 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
7158 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
7165 mb_qp = s->current_picture.qscale_table[mb_xy];
7166 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
7167 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
7168 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
7169 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7170 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
7171 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
7172 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7173 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
7176 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
7177 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7178 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
7179 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
7180 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
7182 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
7183 for( dir = 0; dir < 2; dir++ )
7186 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
7187 const int mbm_type = s->current_picture.mb_type[mbm_xy];
7188 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
7190 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
7191 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
7192 // how often to recheck mv-based bS when iterating between edges
7193 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
7194 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
7195 // how often to recheck mv-based bS when iterating along each edge
7196 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
7198 if (first_vertical_edge_done) {
7200 first_vertical_edge_done = 0;
7203 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
7206 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
7207 && !IS_INTERLACED(mb_type)
7208 && IS_INTERLACED(mbm_type)
7210 // This is a special case in the norm where the filtering must
7211 // be done twice (one each of the field) even if we are in a
7212 // frame macroblock.
7214 static const int nnz_idx[4] = {4,5,6,3};
7215 unsigned int tmp_linesize = 2 * linesize;
7216 unsigned int tmp_uvlinesize = 2 * uvlinesize;
7217 int mbn_xy = mb_xy - 2 * s->mb_stride;
7222 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
7223 if( IS_INTRA(mb_type) ||
7224 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
7225 bS[0] = bS[1] = bS[2] = bS[3] = 3;
7227 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
7228 for( i = 0; i < 4; i++ ) {
7229 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
7230 mbn_nnz[nnz_idx[i]] != 0 )
7236 // Do not use s->qscale as luma quantizer because it has not the same
7237 // value in IPCM macroblocks.
7238 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7239 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
7240 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7241 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
7242 chroma_qp = ( h->chroma_qp +
7243 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7244 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7245 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7252 for( edge = start; edge < edges; edge++ ) {
7253 /* mbn_xy: neighbor macroblock */
7254 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
7255 const int mbn_type = s->current_picture.mb_type[mbn_xy];
7259 if( (edge&1) && IS_8x8DCT(mb_type) )
7262 if( IS_INTRA(mb_type) ||
7263 IS_INTRA(mbn_type) ) {
7266 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
7267 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
7276 bS[0] = bS[1] = bS[2] = bS[3] = value;
7281 if( edge & mask_edge ) {
7282 bS[0] = bS[1] = bS[2] = bS[3] = 0;
7285 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
7286 bS[0] = bS[1] = bS[2] = bS[3] = 1;
7289 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
7290 int b_idx= 8 + 4 + edge * (dir ? 8:1);
7291 int bn_idx= b_idx - (dir ? 8:1);
7293 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
7294 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7295 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7296 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
7298 bS[0] = bS[1] = bS[2] = bS[3] = v;
7304 for( i = 0; i < 4; i++ ) {
7305 int x = dir == 0 ? edge : i;
7306 int y = dir == 0 ? i : edge;
7307 int b_idx= 8 + 4 + x + 8*y;
7308 int bn_idx= b_idx - (dir ? 8:1);
7310 if( h->non_zero_count_cache[b_idx] != 0 ||
7311 h->non_zero_count_cache[bn_idx] != 0 ) {
7317 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
7318 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7319 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7320 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7328 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7333 // Do not use s->qscale as luma quantizer because it has not the same
7334 // value in IPCM macroblocks.
7335 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7336 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7337 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7338 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7340 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7341 if( (edge&1) == 0 ) {
7342 int chroma_qp = ( h->chroma_qp +
7343 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7344 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
7345 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
7348 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7349 if( (edge&1) == 0 ) {
7350 int chroma_qp = ( h->chroma_qp +
7351 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7352 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7353 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7360 static int decode_slice(H264Context *h){
7361 MpegEncContext * const s = &h->s;
7362 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7366 if( h->pps.cabac ) {
7370 align_get_bits( &s->gb );
7373 ff_init_cabac_states( &h->cabac);
7374 ff_init_cabac_decoder( &h->cabac,
7375 s->gb.buffer + get_bits_count(&s->gb)/8,
7376 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7377 /* calculate pre-state */
7378 for( i= 0; i < 460; i++ ) {
7380 if( h->slice_type == I_TYPE )
7381 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7383 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7386 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
7388 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7393 int ret = decode_mb_cabac(h);
7395 //STOP_TIMER("decode_mb_cabac")
7397 if(ret>=0) hl_decode_mb(h);
7399 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7402 if(ret>=0) ret = decode_mb_cabac(h);
7404 if(ret>=0) hl_decode_mb(h);
7407 eos = get_cabac_terminate( &h->cabac );
7409 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
7410 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7411 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7415 if( ++s->mb_x >= s->mb_width ) {
7417 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7424 if( eos || s->mb_y >= s->mb_height ) {
7425 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7426 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7433 int ret = decode_mb_cavlc(h);
7435 if(ret>=0) hl_decode_mb(h);
7437 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7439 ret = decode_mb_cavlc(h);
7441 if(ret>=0) hl_decode_mb(h);
7446 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7447 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7452 if(++s->mb_x >= s->mb_width){
7454 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7459 if(s->mb_y >= s->mb_height){
7460 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7462 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7463 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7467 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7474 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7475 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7476 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7477 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7481 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7490 for(;s->mb_y < s->mb_height; s->mb_y++){
7491 for(;s->mb_x < s->mb_width; s->mb_x++){
7492 int ret= decode_mb(h);
7497 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7498 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7503 if(++s->mb_x >= s->mb_width){
7505 if(++s->mb_y >= s->mb_height){
7506 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7507 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7511 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7518 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7519 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7520 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7524 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7531 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7534 return -1; //not reached
7537 static int decode_unregistered_user_data(H264Context *h, int size){
7538 MpegEncContext * const s = &h->s;
7539 uint8_t user_data[16+256];
7545 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7546 user_data[i]= get_bits(&s->gb, 8);
7550 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7551 if(e==1 && build>=0)
7552 h->x264_build= build;
7554 if(s->avctx->debug & FF_DEBUG_BUGS)
7555 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7558 skip_bits(&s->gb, 8);
7563 static int decode_sei(H264Context *h){
7564 MpegEncContext * const s = &h->s;
7566 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7571 type+= show_bits(&s->gb, 8);
7572 }while(get_bits(&s->gb, 8) == 255);
7576 size+= show_bits(&s->gb, 8);
7577 }while(get_bits(&s->gb, 8) == 255);
7581 if(decode_unregistered_user_data(h, size) < 0)
7585 skip_bits(&s->gb, 8*size);
7588 //FIXME check bits here
7589 align_get_bits(&s->gb);
7595 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7596 MpegEncContext * const s = &h->s;
7598 cpb_count = get_ue_golomb(&s->gb) + 1;
7599 get_bits(&s->gb, 4); /* bit_rate_scale */
7600 get_bits(&s->gb, 4); /* cpb_size_scale */
7601 for(i=0; i<cpb_count; i++){
7602 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7603 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7604 get_bits1(&s->gb); /* cbr_flag */
7606 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7607 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7608 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7609 get_bits(&s->gb, 5); /* time_offset_length */
7612 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7613 MpegEncContext * const s = &h->s;
7614 int aspect_ratio_info_present_flag;
7615 unsigned int aspect_ratio_idc;
7616 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7618 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7620 if( aspect_ratio_info_present_flag ) {
7621 aspect_ratio_idc= get_bits(&s->gb, 8);
7622 if( aspect_ratio_idc == EXTENDED_SAR ) {
7623 sps->sar.num= get_bits(&s->gb, 16);
7624 sps->sar.den= get_bits(&s->gb, 16);
7625 }else if(aspect_ratio_idc < 14){
7626 sps->sar= pixel_aspect[aspect_ratio_idc];
7628 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7635 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7637 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7638 get_bits1(&s->gb); /* overscan_appropriate_flag */
7641 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7642 get_bits(&s->gb, 3); /* video_format */
7643 get_bits1(&s->gb); /* video_full_range_flag */
7644 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7645 get_bits(&s->gb, 8); /* colour_primaries */
7646 get_bits(&s->gb, 8); /* transfer_characteristics */
7647 get_bits(&s->gb, 8); /* matrix_coefficients */
7651 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7652 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7653 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7656 sps->timing_info_present_flag = get_bits1(&s->gb);
7657 if(sps->timing_info_present_flag){
7658 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7659 sps->time_scale = get_bits_long(&s->gb, 32);
7660 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7663 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7664 if(nal_hrd_parameters_present_flag)
7665 decode_hrd_parameters(h, sps);
7666 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7667 if(vcl_hrd_parameters_present_flag)
7668 decode_hrd_parameters(h, sps);
7669 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7670 get_bits1(&s->gb); /* low_delay_hrd_flag */
7671 get_bits1(&s->gb); /* pic_struct_present_flag */
7673 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7674 if(sps->bitstream_restriction_flag){
7675 unsigned int num_reorder_frames;
7676 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7677 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7678 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7679 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7680 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7681 num_reorder_frames= get_ue_golomb(&s->gb);
7682 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7684 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7685 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7689 sps->num_reorder_frames= num_reorder_frames;
7695 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7696 const uint8_t *jvt_list, const uint8_t *fallback_list){
7697 MpegEncContext * const s = &h->s;
7698 int i, last = 8, next = 8;
7699 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7700 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7701 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7703 for(i=0;i<size;i++){
7705 next = (last + get_se_golomb(&s->gb)) & 0xff;
7706 if(!i && !next){ /* matrix not written, we use the preset one */
7707 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7710 last = factors[scan[i]] = next ? next : last;
7714 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7715 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7716 MpegEncContext * const s = &h->s;
7717 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7718 const uint8_t *fallback[4] = {
7719 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7720 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7721 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7722 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7724 if(get_bits1(&s->gb)){
7725 sps->scaling_matrix_present |= is_sps;
7726 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7727 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7728 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7729 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7730 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7731 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7732 if(is_sps || pps->transform_8x8_mode){
7733 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7734 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7736 } else if(fallback_sps) {
7737 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7738 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7742 static inline int decode_seq_parameter_set(H264Context *h){
7743 MpegEncContext * const s = &h->s;
7744 int profile_idc, level_idc;
7745 unsigned int sps_id, tmp, mb_width, mb_height;
7749 profile_idc= get_bits(&s->gb, 8);
7750 get_bits1(&s->gb); //constraint_set0_flag
7751 get_bits1(&s->gb); //constraint_set1_flag
7752 get_bits1(&s->gb); //constraint_set2_flag
7753 get_bits1(&s->gb); //constraint_set3_flag
7754 get_bits(&s->gb, 4); // reserved
7755 level_idc= get_bits(&s->gb, 8);
7756 sps_id= get_ue_golomb(&s->gb);
7758 if (sps_id >= MAX_SPS_COUNT){
7759 // ok it has gone out of hand, someone is sending us bad stuff.
7760 av_log(h->s.avctx, AV_LOG_ERROR, "illegal sps_id (%d)\n", sps_id);
7764 sps= &h->sps_buffer[ sps_id ];
7765 sps->profile_idc= profile_idc;
7766 sps->level_idc= level_idc;
7768 if(sps->profile_idc >= 100){ //high profile
7769 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7770 get_bits1(&s->gb); //residual_color_transform_flag
7771 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7772 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7773 sps->transform_bypass = get_bits1(&s->gb);
7774 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7776 sps->scaling_matrix_present = 0;
7778 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7779 sps->poc_type= get_ue_golomb(&s->gb);
7781 if(sps->poc_type == 0){ //FIXME #define
7782 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7783 } else if(sps->poc_type == 1){//FIXME #define
7784 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7785 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7786 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7787 tmp= get_ue_golomb(&s->gb);
7789 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7790 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7793 sps->poc_cycle_length= tmp;
7795 for(i=0; i<sps->poc_cycle_length; i++)
7796 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7797 }else if(sps->poc_type != 2){
7798 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7802 tmp= get_ue_golomb(&s->gb);
7803 if(tmp > MAX_PICTURE_COUNT-2){
7804 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7806 sps->ref_frame_count= tmp;
7807 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7808 mb_width= get_ue_golomb(&s->gb) + 1;
7809 mb_height= get_ue_golomb(&s->gb) + 1;
7810 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7811 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7812 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7815 sps->mb_width = mb_width;
7816 sps->mb_height= mb_height;
7818 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7819 if(!sps->frame_mbs_only_flag)
7820 sps->mb_aff= get_bits1(&s->gb);
7824 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7826 #ifndef ALLOW_INTERLACE
7828 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7830 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7831 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7833 sps->crop= get_bits1(&s->gb);
7835 sps->crop_left = get_ue_golomb(&s->gb);
7836 sps->crop_right = get_ue_golomb(&s->gb);
7837 sps->crop_top = get_ue_golomb(&s->gb);
7838 sps->crop_bottom= get_ue_golomb(&s->gb);
7839 if(sps->crop_left || sps->crop_top){
7840 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7846 sps->crop_bottom= 0;
7849 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7850 if( sps->vui_parameters_present_flag )
7851 decode_vui_parameters(h, sps);
7853 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7854 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7855 sps_id, sps->profile_idc, sps->level_idc,
7857 sps->ref_frame_count,
7858 sps->mb_width, sps->mb_height,
7859 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7860 sps->direct_8x8_inference_flag ? "8B8" : "",
7861 sps->crop_left, sps->crop_right,
7862 sps->crop_top, sps->crop_bottom,
7863 sps->vui_parameters_present_flag ? "VUI" : ""
7869 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7870 MpegEncContext * const s = &h->s;
7871 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7874 if(pps_id>=MAX_PPS_COUNT){
7875 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
7878 pps = &h->pps_buffer[pps_id];
7880 tmp= get_ue_golomb(&s->gb);
7881 if(tmp>=MAX_SPS_COUNT){
7882 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7887 pps->cabac= get_bits1(&s->gb);
7888 pps->pic_order_present= get_bits1(&s->gb);
7889 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7890 if(pps->slice_group_count > 1 ){
7891 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7892 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7893 switch(pps->mb_slice_group_map_type){
7896 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7897 | run_length[ i ] |1 |ue(v) |
7902 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7904 | top_left_mb[ i ] |1 |ue(v) |
7905 | bottom_right_mb[ i ] |1 |ue(v) |
7913 | slice_group_change_direction_flag |1 |u(1) |
7914 | slice_group_change_rate_minus1 |1 |ue(v) |
7919 | slice_group_id_cnt_minus1 |1 |ue(v) |
7920 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7922 | slice_group_id[ i ] |1 |u(v) |
7927 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7928 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7929 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7930 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7931 pps->ref_count[0]= pps->ref_count[1]= 1;
7935 pps->weighted_pred= get_bits1(&s->gb);
7936 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7937 pps->init_qp= get_se_golomb(&s->gb) + 26;
7938 pps->init_qs= get_se_golomb(&s->gb) + 26;
7939 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7940 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7941 pps->constrained_intra_pred= get_bits1(&s->gb);
7942 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7944 pps->transform_8x8_mode= 0;
7945 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7946 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7947 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7949 if(get_bits_count(&s->gb) < bit_length){
7950 pps->transform_8x8_mode= get_bits1(&s->gb);
7951 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7952 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7955 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7956 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7957 pps_id, pps->sps_id,
7958 pps->cabac ? "CABAC" : "CAVLC",
7959 pps->slice_group_count,
7960 pps->ref_count[0], pps->ref_count[1],
7961 pps->weighted_pred ? "weighted" : "",
7962 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7963 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7964 pps->constrained_intra_pred ? "CONSTR" : "",
7965 pps->redundant_pic_cnt_present ? "REDU" : "",
7966 pps->transform_8x8_mode ? "8x8DCT" : ""
7974 * finds the end of the current frame in the bitstream.
7975 * @return the position of the first byte of the next frame, or -1
7977 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7980 ParseContext *pc = &(h->s.parse_context);
7981 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7982 // mb_addr= pc->mb_addr - 1;
7984 for(i=0; i<=buf_size; i++){
7985 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7986 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7987 if(pc->frame_start_found){
7988 // If there isn't one more byte in the buffer
7989 // the test on first_mb_in_slice cannot be done yet
7990 // do it at next call.
7991 if (i >= buf_size) break;
7992 if (buf[i] & 0x80) {
7993 // first_mb_in_slice is 0, probably the first nal of a new
7995 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7997 pc->frame_start_found= 0;
8001 pc->frame_start_found = 1;
8003 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
8004 if(pc->frame_start_found){
8006 pc->frame_start_found= 0;
8011 state= (state<<8) | buf[i];
8015 return END_NOT_FOUND;
8018 #ifdef CONFIG_H264_PARSER
8019 static int h264_parse(AVCodecParserContext *s,
8020 AVCodecContext *avctx,
8021 uint8_t **poutbuf, int *poutbuf_size,
8022 const uint8_t *buf, int buf_size)
8024 H264Context *h = s->priv_data;
8025 ParseContext *pc = &h->s.parse_context;
8028 next= find_frame_end(h, buf, buf_size);
8030 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
8036 *poutbuf = (uint8_t *)buf;
8037 *poutbuf_size = buf_size;
8041 static int h264_split(AVCodecContext *avctx,
8042 const uint8_t *buf, int buf_size)
8045 uint32_t state = -1;
8048 for(i=0; i<=buf_size; i++){
8049 if((state&0xFFFFFF1F) == 0x107)
8051 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
8053 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
8055 while(i>4 && buf[i-5]==0) i--;
8060 state= (state<<8) | buf[i];
8064 #endif /* CONFIG_H264_PARSER */
8066 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
8067 MpegEncContext * const s = &h->s;
8068 AVCodecContext * const avctx= s->avctx;
8072 for(i=0; i<50; i++){
8073 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
8077 s->current_picture_ptr= NULL;
8086 if(buf_index >= buf_size) break;
8088 for(i = 0; i < h->nal_length_size; i++)
8089 nalsize = (nalsize << 8) | buf[buf_index++];
8090 if(nalsize <= 1 || nalsize > buf_size){
8095 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
8100 // start code prefix search
8101 for(; buf_index + 3 < buf_size; buf_index++){
8102 // this should allways succeed in the first iteration
8103 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
8107 if(buf_index+3 >= buf_size) break;
8112 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
8113 if (ptr==NULL || dst_length <= 0){
8116 while(ptr[dst_length - 1] == 0 && dst_length > 1)
8118 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
8120 if(s->avctx->debug&FF_DEBUG_STARTCODE){
8121 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
8124 if (h->is_avc && (nalsize != consumed))
8125 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
8127 buf_index += consumed;
8129 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
8130 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
8133 switch(h->nal_unit_type){
8135 idr(h); //FIXME ensure we don't loose some frames if there is reordering
8137 init_get_bits(&s->gb, ptr, bit_length);
8139 h->inter_gb_ptr= &s->gb;
8140 s->data_partitioning = 0;
8142 if(decode_slice_header(h) < 0){
8143 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8146 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
8147 if(h->redundant_pic_count==0 && s->hurry_up < 5
8148 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8149 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8150 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8151 && avctx->skip_frame < AVDISCARD_ALL)
8155 init_get_bits(&s->gb, ptr, bit_length);
8157 h->inter_gb_ptr= NULL;
8158 s->data_partitioning = 1;
8160 if(decode_slice_header(h) < 0){
8161 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8165 init_get_bits(&h->intra_gb, ptr, bit_length);
8166 h->intra_gb_ptr= &h->intra_gb;
8169 init_get_bits(&h->inter_gb, ptr, bit_length);
8170 h->inter_gb_ptr= &h->inter_gb;
8172 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
8173 && s->context_initialized
8175 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8176 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8177 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8178 && avctx->skip_frame < AVDISCARD_ALL)
8182 init_get_bits(&s->gb, ptr, bit_length);
8186 init_get_bits(&s->gb, ptr, bit_length);
8187 decode_seq_parameter_set(h);
8189 if(s->flags& CODEC_FLAG_LOW_DELAY)
8192 if(avctx->has_b_frames < 2)
8193 avctx->has_b_frames= !s->low_delay;
8196 init_get_bits(&s->gb, ptr, bit_length);
8198 decode_picture_parameter_set(h, bit_length);
8202 case NAL_END_SEQUENCE:
8203 case NAL_END_STREAM:
8204 case NAL_FILLER_DATA:
8206 case NAL_AUXILIARY_SLICE:
8209 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
8213 if(!s->current_picture_ptr) return buf_index; //no frame
8215 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
8216 s->current_picture_ptr->pict_type= s->pict_type;
8218 h->prev_frame_num_offset= h->frame_num_offset;
8219 h->prev_frame_num= h->frame_num;
8220 if(s->current_picture_ptr->reference){
8221 h->prev_poc_msb= h->poc_msb;
8222 h->prev_poc_lsb= h->poc_lsb;
8224 if(s->current_picture_ptr->reference)
8225 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
8235 * returns the number of bytes consumed for building the current frame
8237 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
8238 if(s->flags&CODEC_FLAG_TRUNCATED){
8239 pos -= s->parse_context.last_index;
8240 if(pos<0) pos=0; // FIXME remove (unneeded?)
8244 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
8245 if(pos+10>buf_size) pos=buf_size; // oops ;)
8251 static int decode_frame(AVCodecContext *avctx,
8252 void *data, int *data_size,
8253 uint8_t *buf, int buf_size)
8255 H264Context *h = avctx->priv_data;
8256 MpegEncContext *s = &h->s;
8257 AVFrame *pict = data;
8260 s->flags= avctx->flags;
8261 s->flags2= avctx->flags2;
8263 /* no supplementary picture */
8264 if (buf_size == 0) {
8268 if(s->flags&CODEC_FLAG_TRUNCATED){
8269 int next= find_frame_end(h, buf, buf_size);
8271 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
8273 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
8276 if(h->is_avc && !h->got_avcC) {
8277 int i, cnt, nalsize;
8278 unsigned char *p = avctx->extradata;
8279 if(avctx->extradata_size < 7) {
8280 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
8284 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
8287 /* sps and pps in the avcC always have length coded with 2 bytes,
8288 so put a fake nal_length_size = 2 while parsing them */
8289 h->nal_length_size = 2;
8290 // Decode sps from avcC
8291 cnt = *(p+5) & 0x1f; // Number of sps
8293 for (i = 0; i < cnt; i++) {
8294 nalsize = AV_RB16(p) + 2;
8295 if(decode_nal_units(h, p, nalsize) < 0) {
8296 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
8301 // Decode pps from avcC
8302 cnt = *(p++); // Number of pps
8303 for (i = 0; i < cnt; i++) {
8304 nalsize = AV_RB16(p) + 2;
8305 if(decode_nal_units(h, p, nalsize) != nalsize) {
8306 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
8311 // Now store right nal length size, that will be use to parse all other nals
8312 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
8313 // Do not reparse avcC
8317 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
8318 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
8322 buf_index=decode_nal_units(h, buf, buf_size);
8326 //FIXME do something with unavailable reference frames
8328 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
8329 if(!s->current_picture_ptr){
8330 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
8335 Picture *out = s->current_picture_ptr;
8336 #if 0 //decode order
8337 *data_size = sizeof(AVFrame);
8339 /* Sort B-frames into display order */
8340 Picture *cur = s->current_picture_ptr;
8341 Picture *prev = h->delayed_output_pic;
8342 int i, pics, cross_idr, out_of_order, out_idx;
8344 if(h->sps.bitstream_restriction_flag
8345 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
8346 s->avctx->has_b_frames = h->sps.num_reorder_frames;
8351 while(h->delayed_pic[pics]) pics++;
8353 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
8355 h->delayed_pic[pics++] = cur;
8356 if(cur->reference == 0)
8360 for(i=0; h->delayed_pic[i]; i++)
8361 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
8364 out = h->delayed_pic[0];
8366 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8367 if(h->delayed_pic[i]->poc < out->poc){
8368 out = h->delayed_pic[i];
8372 out_of_order = !cross_idr && prev && out->poc < prev->poc;
8373 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
8375 else if(prev && pics <= s->avctx->has_b_frames)
8377 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8379 ((!cross_idr && prev && out->poc > prev->poc + 2)
8380 || cur->pict_type == B_TYPE)))
8383 s->avctx->has_b_frames++;
8386 else if(out_of_order)
8389 if(out_of_order || pics > s->avctx->has_b_frames){
8390 for(i=out_idx; h->delayed_pic[i]; i++)
8391 h->delayed_pic[i] = h->delayed_pic[i+1];
8397 *data_size = sizeof(AVFrame);
8398 if(prev && prev != out && prev->reference == 1)
8399 prev->reference = 0;
8400 h->delayed_output_pic = out;
8404 *pict= *(AVFrame*)out;
8406 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8409 assert(pict->data[0] || !*data_size);
8410 ff_print_debug_info(s, pict);
8411 //printf("out %d\n", (int)pict->data[0]);
8414 /* Return the Picture timestamp as the frame number */
8415 /* we substract 1 because it is added on utils.c */
8416 avctx->frame_number = s->picture_number - 1;
8418 return get_consumed_bytes(s, buf_index, buf_size);
8421 static inline void fill_mb_avail(H264Context *h){
8422 MpegEncContext * const s = &h->s;
8423 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8426 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8427 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8428 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8434 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8435 h->mb_avail[4]= 1; //FIXME move out
8436 h->mb_avail[5]= 0; //FIXME move out
8442 #define SIZE (COUNT*40)
8448 // int int_temp[10000];
8450 AVCodecContext avctx;
8452 dsputil_init(&dsp, &avctx);
8454 init_put_bits(&pb, temp, SIZE);
8455 printf("testing unsigned exp golomb\n");
8456 for(i=0; i<COUNT; i++){
8458 set_ue_golomb(&pb, i);
8459 STOP_TIMER("set_ue_golomb");
8461 flush_put_bits(&pb);
8463 init_get_bits(&gb, temp, 8*SIZE);
8464 for(i=0; i<COUNT; i++){
8467 s= show_bits(&gb, 24);
8470 j= get_ue_golomb(&gb);
8472 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8475 STOP_TIMER("get_ue_golomb");
8479 init_put_bits(&pb, temp, SIZE);
8480 printf("testing signed exp golomb\n");
8481 for(i=0; i<COUNT; i++){
8483 set_se_golomb(&pb, i - COUNT/2);
8484 STOP_TIMER("set_se_golomb");
8486 flush_put_bits(&pb);
8488 init_get_bits(&gb, temp, 8*SIZE);
8489 for(i=0; i<COUNT; i++){
8492 s= show_bits(&gb, 24);
8495 j= get_se_golomb(&gb);
8496 if(j != i - COUNT/2){
8497 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8500 STOP_TIMER("get_se_golomb");
8503 printf("testing 4x4 (I)DCT\n");
8506 uint8_t src[16], ref[16];
8507 uint64_t error= 0, max_error=0;
8509 for(i=0; i<COUNT; i++){
8511 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8512 for(j=0; j<16; j++){
8513 ref[j]= random()%255;
8514 src[j]= random()%255;
8517 h264_diff_dct_c(block, src, ref, 4);
8520 for(j=0; j<16; j++){
8521 // printf("%d ", block[j]);
8522 block[j]= block[j]*4;
8523 if(j&1) block[j]= (block[j]*4 + 2)/5;
8524 if(j&4) block[j]= (block[j]*4 + 2)/5;
8528 s->dsp.h264_idct_add(ref, block, 4);
8529 /* for(j=0; j<16; j++){
8530 printf("%d ", ref[j]);
8534 for(j=0; j<16; j++){
8535 int diff= FFABS(src[j] - ref[j]);
8538 max_error= FFMAX(max_error, diff);
8541 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8543 printf("testing quantizer\n");
8544 for(qp=0; qp<52; qp++){
8546 src1_block[i]= src2_block[i]= random()%255;
8550 printf("Testing NAL layer\n");
8552 uint8_t bitstream[COUNT];
8553 uint8_t nal[COUNT*2];
8555 memset(&h, 0, sizeof(H264Context));
8557 for(i=0; i<COUNT; i++){
8565 for(j=0; j<COUNT; j++){
8566 bitstream[j]= (random() % 255) + 1;
8569 for(j=0; j<zeros; j++){
8570 int pos= random() % COUNT;
8571 while(bitstream[pos] == 0){
8580 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8582 printf("encoding failed\n");
8586 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8590 if(out_length != COUNT){
8591 printf("incorrect length %d %d\n", out_length, COUNT);
8595 if(consumed != nal_length){
8596 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8600 if(memcmp(bitstream, out, COUNT)){
8601 printf("missmatch\n");
8606 printf("Testing RBSP\n");
8614 static int decode_end(AVCodecContext *avctx)
8616 H264Context *h = avctx->priv_data;
8617 MpegEncContext *s = &h->s;
8619 av_freep(&h->rbsp_buffer);
8620 free_tables(h); //FIXME cleanup init stuff perhaps
8623 // memset(h, 0, sizeof(H264Context));
8629 AVCodec h264_decoder = {
8633 sizeof(H264Context),
8638 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8642 #ifdef CONFIG_H264_PARSER
8643 AVCodecParser h264_parser = {
8645 sizeof(H264Context),