2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * H.264 / AVC / MPEG4 part10 codec.
26 * @author Michael Niedermayer <michaelni@gmx.at>
32 #include "mpegvideo.h"
41 #define interlaced_dct interlaced_dct_is_a_bad_name
42 #define mb_intra mb_intra_isnt_initalized_see_mb_type
44 #define LUMA_DC_BLOCK_INDEX 25
45 #define CHROMA_DC_BLOCK_INDEX 26
47 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
48 #define COEFF_TOKEN_VLC_BITS 8
49 #define TOTAL_ZEROS_VLC_BITS 9
50 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
51 #define RUN_VLC_BITS 3
52 #define RUN7_VLC_BITS 6
54 #define MAX_SPS_COUNT 32
55 #define MAX_PPS_COUNT 256
57 #define MAX_MMCO_COUNT 66
59 /* Compiling in interlaced support reduces the speed
60 * of progressive decoding by about 2%. */
61 #define ALLOW_INTERLACE
63 #ifdef ALLOW_INTERLACE
64 #define MB_MBAFF h->mb_mbaff
65 #define MB_FIELD h->mb_field_decoding_flag
66 #define FRAME_MBAFF h->mb_aff_frame
72 #define IS_INTERLACED(mb_type) 0
76 * Sequence parameter set
82 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
83 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
84 int poc_type; ///< pic_order_cnt_type
85 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
86 int delta_pic_order_always_zero_flag;
87 int offset_for_non_ref_pic;
88 int offset_for_top_to_bottom_field;
89 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
90 int ref_frame_count; ///< num_ref_frames
91 int gaps_in_frame_num_allowed_flag;
92 int mb_width; ///< frame_width_in_mbs_minus1 + 1
93 int mb_height; ///< frame_height_in_mbs_minus1 + 1
94 int frame_mbs_only_flag;
95 int mb_aff; ///<mb_adaptive_frame_field_flag
96 int direct_8x8_inference_flag;
97 int crop; ///< frame_cropping_flag
98 int crop_left; ///< frame_cropping_rect_left_offset
99 int crop_right; ///< frame_cropping_rect_right_offset
100 int crop_top; ///< frame_cropping_rect_top_offset
101 int crop_bottom; ///< frame_cropping_rect_bottom_offset
102 int vui_parameters_present_flag;
104 int timing_info_present_flag;
105 uint32_t num_units_in_tick;
107 int fixed_frame_rate_flag;
108 short offset_for_ref_frame[256]; //FIXME dyn aloc?
109 int bitstream_restriction_flag;
110 int num_reorder_frames;
111 int scaling_matrix_present;
112 uint8_t scaling_matrix4[6][16];
113 uint8_t scaling_matrix8[2][64];
117 * Picture parameter set
121 int cabac; ///< entropy_coding_mode_flag
122 int pic_order_present; ///< pic_order_present_flag
123 int slice_group_count; ///< num_slice_groups_minus1 + 1
124 int mb_slice_group_map_type;
125 unsigned int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
126 int weighted_pred; ///< weighted_pred_flag
127 int weighted_bipred_idc;
128 int init_qp; ///< pic_init_qp_minus26 + 26
129 int init_qs; ///< pic_init_qs_minus26 + 26
130 int chroma_qp_index_offset;
131 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
132 int constrained_intra_pred; ///< constrained_intra_pred_flag
133 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
134 int transform_8x8_mode; ///< transform_8x8_mode_flag
135 uint8_t scaling_matrix4[6][16];
136 uint8_t scaling_matrix8[2][64];
140 * Memory management control operation opcode.
142 typedef enum MMCOOpcode{
153 * Memory management control operation.
164 typedef struct H264Context{
168 uint8_t *rbsp_buffer;
169 unsigned int rbsp_buffer_size;
172 * Used to parse AVC variant of h264
174 int is_avc; ///< this flag is != 0 if codec is avc1
175 int got_avcC; ///< flag used to parse avcC data only once
176 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
184 int chroma_pred_mode;
185 int intra16x16_pred_mode;
190 int8_t intra4x4_pred_mode_cache[5*8];
191 int8_t (*intra4x4_pred_mode)[8];
192 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
193 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
194 void (*pred8x8 [4+3])(uint8_t *src, int stride);
195 void (*pred16x16[4+3])(uint8_t *src, int stride);
196 unsigned int topleft_samples_available;
197 unsigned int top_samples_available;
198 unsigned int topright_samples_available;
199 unsigned int left_samples_available;
200 uint8_t (*top_borders[2])[16+2*8];
201 uint8_t left_border[2*(17+2*9)];
204 * non zero coeff count cache.
205 * is 64 if not available.
207 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
208 uint8_t (*non_zero_count)[16];
211 * Motion vector cache.
213 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
214 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
215 #define LIST_NOT_USED -1 //FIXME rename?
216 #define PART_NOT_AVAILABLE -2
219 * is 1 if the specific list MV&references are set to 0,0,-2.
221 int mv_cache_clean[2];
224 * number of neighbors (top and/or left) that used 8x8 dct
226 int neighbor_transform_size;
229 * block_offset[ 0..23] for frame macroblocks
230 * block_offset[24..47] for field macroblocks
232 int block_offset[2*(16+8)];
234 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
236 int b_stride; //FIXME use s->b4_stride
239 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff
248 int unknown_svq3_flag;
249 int next_slice_index;
251 SPS sps_buffer[MAX_SPS_COUNT];
252 SPS sps; ///< current sps
254 PPS pps_buffer[MAX_PPS_COUNT];
258 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
260 uint32_t dequant4_buffer[6][52][16];
261 uint32_t dequant8_buffer[2][52][64];
262 uint32_t (*dequant4_coeff[6])[16];
263 uint32_t (*dequant8_coeff[2])[64];
264 int dequant_coeff_pps; ///< reinit tables when pps changes
267 uint8_t *slice_table_base;
268 uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
270 int slice_type_fixed;
272 //interlacing specific flags
274 int mb_field_decoding_flag;
275 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
277 unsigned int sub_mb_type[4];
282 int delta_poc_bottom;
285 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
286 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
287 int frame_num_offset; ///< for POC type 2
288 int prev_frame_num_offset; ///< for POC type 2
289 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
292 * frame_num for frames or 2*frame_num for field pics.
297 * max_frame_num or 2*max_frame_num for field pics.
301 //Weighted pred stuff
303 int use_weight_chroma;
304 int luma_log2_weight_denom;
305 int chroma_log2_weight_denom;
306 int luma_weight[2][48];
307 int luma_offset[2][48];
308 int chroma_weight[2][48][2];
309 int chroma_offset[2][48][2];
310 int implicit_weight[48][48];
313 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
314 int slice_alpha_c0_offset;
315 int slice_beta_offset;
317 int redundant_pic_count;
319 int direct_spatial_mv_pred;
320 int dist_scale_factor[16];
321 int dist_scale_factor_field[32];
322 int map_col_to_list0[2][16];
323 int map_col_to_list0_field[2][32];
326 * num_ref_idx_l0/1_active_minus1 + 1
328 unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode
329 Picture *short_ref[32];
330 Picture *long_ref[32];
331 Picture default_ref_list[2][32];
332 Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs
333 Picture *delayed_pic[18]; //FIXME size?
334 Picture *delayed_output_pic;
337 * memory management control operations buffer.
339 MMCO mmco[MAX_MMCO_COUNT];
342 int long_ref_count; ///< number of actual long term references
343 int short_ref_count; ///< number of actual short term references
346 GetBitContext intra_gb;
347 GetBitContext inter_gb;
348 GetBitContext *intra_gb_ptr;
349 GetBitContext *inter_gb_ptr;
351 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
352 DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not to large or ensure that there is some unused stuff after mb
358 uint8_t cabac_state[460];
361 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
366 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
367 uint8_t *chroma_pred_mode_table;
368 int last_qscale_diff;
369 int16_t (*mvd_table[2])[2];
370 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
371 uint8_t *direct_table;
372 uint8_t direct_cache[5*8];
374 uint8_t zigzag_scan[16];
375 uint8_t zigzag_scan8x8[64];
376 uint8_t zigzag_scan8x8_cavlc[64];
377 uint8_t field_scan[16];
378 uint8_t field_scan8x8[64];
379 uint8_t field_scan8x8_cavlc[64];
380 const uint8_t *zigzag_scan_q0;
381 const uint8_t *zigzag_scan8x8_q0;
382 const uint8_t *zigzag_scan8x8_cavlc_q0;
383 const uint8_t *field_scan_q0;
384 const uint8_t *field_scan8x8_q0;
385 const uint8_t *field_scan8x8_cavlc_q0;
390 static VLC coeff_token_vlc[4];
391 static VLC chroma_dc_coeff_token_vlc;
393 static VLC total_zeros_vlc[15];
394 static VLC chroma_dc_total_zeros_vlc[3];
396 static VLC run_vlc[6];
399 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
400 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
401 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
402 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
404 static av_always_inline uint32_t pack16to32(int a, int b){
405 #ifdef WORDS_BIGENDIAN
406 return (b&0xFFFF) + (a<<16);
408 return (a&0xFFFF) + (b<<16);
412 const uint8_t ff_rem6[52]={
413 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
416 const uint8_t ff_div6[52]={
417 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
423 * @param h height of the rectangle, should be a constant
424 * @param w width of the rectangle, should be a constant
425 * @param size the size of val (1 or 4), should be a constant
427 static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
428 uint8_t *p= (uint8_t*)vp;
429 assert(size==1 || size==4);
435 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
436 assert((stride&(w-1))==0);
438 const uint16_t v= size==4 ? val : val*0x0101;
439 *(uint16_t*)(p + 0*stride)= v;
441 *(uint16_t*)(p + 1*stride)= v;
443 *(uint16_t*)(p + 2*stride)=
444 *(uint16_t*)(p + 3*stride)= v;
446 const uint32_t v= size==4 ? val : val*0x01010101;
447 *(uint32_t*)(p + 0*stride)= v;
449 *(uint32_t*)(p + 1*stride)= v;
451 *(uint32_t*)(p + 2*stride)=
452 *(uint32_t*)(p + 3*stride)= v;
454 //gcc can't optimize 64bit math on x86_32
455 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
456 const uint64_t v= val*0x0100000001ULL;
457 *(uint64_t*)(p + 0*stride)= v;
459 *(uint64_t*)(p + 1*stride)= v;
461 *(uint64_t*)(p + 2*stride)=
462 *(uint64_t*)(p + 3*stride)= v;
464 const uint64_t v= val*0x0100000001ULL;
465 *(uint64_t*)(p + 0+0*stride)=
466 *(uint64_t*)(p + 8+0*stride)=
467 *(uint64_t*)(p + 0+1*stride)=
468 *(uint64_t*)(p + 8+1*stride)= v;
470 *(uint64_t*)(p + 0+2*stride)=
471 *(uint64_t*)(p + 8+2*stride)=
472 *(uint64_t*)(p + 0+3*stride)=
473 *(uint64_t*)(p + 8+3*stride)= v;
475 *(uint32_t*)(p + 0+0*stride)=
476 *(uint32_t*)(p + 4+0*stride)= val;
478 *(uint32_t*)(p + 0+1*stride)=
479 *(uint32_t*)(p + 4+1*stride)= val;
481 *(uint32_t*)(p + 0+2*stride)=
482 *(uint32_t*)(p + 4+2*stride)=
483 *(uint32_t*)(p + 0+3*stride)=
484 *(uint32_t*)(p + 4+3*stride)= val;
486 *(uint32_t*)(p + 0+0*stride)=
487 *(uint32_t*)(p + 4+0*stride)=
488 *(uint32_t*)(p + 8+0*stride)=
489 *(uint32_t*)(p +12+0*stride)=
490 *(uint32_t*)(p + 0+1*stride)=
491 *(uint32_t*)(p + 4+1*stride)=
492 *(uint32_t*)(p + 8+1*stride)=
493 *(uint32_t*)(p +12+1*stride)= val;
495 *(uint32_t*)(p + 0+2*stride)=
496 *(uint32_t*)(p + 4+2*stride)=
497 *(uint32_t*)(p + 8+2*stride)=
498 *(uint32_t*)(p +12+2*stride)=
499 *(uint32_t*)(p + 0+3*stride)=
500 *(uint32_t*)(p + 4+3*stride)=
501 *(uint32_t*)(p + 8+3*stride)=
502 *(uint32_t*)(p +12+3*stride)= val;
509 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
510 MpegEncContext * const s = &h->s;
511 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
512 int topleft_xy, top_xy, topright_xy, left_xy[2];
513 int topleft_type, top_type, topright_type, left_type[2];
517 //FIXME deblocking could skip the intra and nnz parts.
518 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
521 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
523 top_xy = mb_xy - s->mb_stride;
524 topleft_xy = top_xy - 1;
525 topright_xy= top_xy + 1;
526 left_xy[1] = left_xy[0] = mb_xy-1;
536 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
537 const int top_pair_xy = pair_xy - s->mb_stride;
538 const int topleft_pair_xy = top_pair_xy - 1;
539 const int topright_pair_xy = top_pair_xy + 1;
540 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
541 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
542 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
543 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
544 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
545 const int bottom = (s->mb_y & 1);
546 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
548 ? !curr_mb_frame_flag // bottom macroblock
549 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
551 top_xy -= s->mb_stride;
554 ? !curr_mb_frame_flag // bottom macroblock
555 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
557 topleft_xy -= s->mb_stride;
560 ? !curr_mb_frame_flag // bottom macroblock
561 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
563 topright_xy -= s->mb_stride;
565 if (left_mb_frame_flag != curr_mb_frame_flag) {
566 left_xy[1] = left_xy[0] = pair_xy - 1;
567 if (curr_mb_frame_flag) {
588 left_xy[1] += s->mb_stride;
601 h->top_mb_xy = top_xy;
602 h->left_mb_xy[0] = left_xy[0];
603 h->left_mb_xy[1] = left_xy[1];
607 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
608 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
609 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
611 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
613 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
615 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
616 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
617 if(USES_LIST(mb_type,list)){
618 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
619 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
620 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
621 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
627 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
628 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
630 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
631 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
633 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
634 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
639 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
640 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
641 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
642 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
643 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
646 if(IS_INTRA(mb_type)){
647 h->topleft_samples_available=
648 h->top_samples_available=
649 h->left_samples_available= 0xFFFF;
650 h->topright_samples_available= 0xEEEA;
652 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
653 h->topleft_samples_available= 0xB3FF;
654 h->top_samples_available= 0x33FF;
655 h->topright_samples_available= 0x26EA;
658 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
659 h->topleft_samples_available&= 0xDF5F;
660 h->left_samples_available&= 0x5F5F;
664 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
665 h->topleft_samples_available&= 0x7FFF;
667 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
668 h->topright_samples_available&= 0xFBFF;
670 if(IS_INTRA4x4(mb_type)){
671 if(IS_INTRA4x4(top_type)){
672 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
673 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
674 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
675 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
678 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
683 h->intra4x4_pred_mode_cache[4+8*0]=
684 h->intra4x4_pred_mode_cache[5+8*0]=
685 h->intra4x4_pred_mode_cache[6+8*0]=
686 h->intra4x4_pred_mode_cache[7+8*0]= pred;
689 if(IS_INTRA4x4(left_type[i])){
690 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
691 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
694 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
699 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
700 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
715 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
717 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
718 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
719 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
720 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
722 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
723 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
725 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
726 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
729 h->non_zero_count_cache[4+8*0]=
730 h->non_zero_count_cache[5+8*0]=
731 h->non_zero_count_cache[6+8*0]=
732 h->non_zero_count_cache[7+8*0]=
734 h->non_zero_count_cache[1+8*0]=
735 h->non_zero_count_cache[2+8*0]=
737 h->non_zero_count_cache[1+8*3]=
738 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
742 for (i=0; i<2; i++) {
744 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
745 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
746 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
747 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
749 h->non_zero_count_cache[3+8*1 + 2*8*i]=
750 h->non_zero_count_cache[3+8*2 + 2*8*i]=
751 h->non_zero_count_cache[0+8*1 + 8*i]=
752 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
759 h->top_cbp = h->cbp_table[top_xy];
760 } else if(IS_INTRA(mb_type)) {
767 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
768 } else if(IS_INTRA(mb_type)) {
774 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
777 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
782 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
784 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
785 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
786 /*if(!h->mv_cache_clean[list]){
787 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
788 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
789 h->mv_cache_clean[list]= 1;
793 h->mv_cache_clean[list]= 0;
795 if(USES_LIST(top_type, list)){
796 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
797 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
798 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
799 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
800 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
801 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
802 h->ref_cache[list][scan8[0] + 0 - 1*8]=
803 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
804 h->ref_cache[list][scan8[0] + 2 - 1*8]=
805 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
807 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
808 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
809 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
810 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
811 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
814 //FIXME unify cleanup or sth
815 if(USES_LIST(left_type[0], list)){
816 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
817 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
818 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
819 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
820 h->ref_cache[list][scan8[0] - 1 + 0*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
821 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1]>>1)];
823 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
824 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
825 h->ref_cache[list][scan8[0] - 1 + 0*8]=
826 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
829 if(USES_LIST(left_type[1], list)){
830 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
831 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
832 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
833 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
834 h->ref_cache[list][scan8[0] - 1 + 2*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
835 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[3]>>1)];
837 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
838 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
839 h->ref_cache[list][scan8[0] - 1 + 2*8]=
840 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
841 assert((!left_type[0]) == (!left_type[1]));
844 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
847 if(USES_LIST(topleft_type, list)){
848 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
849 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
850 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
851 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
853 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
854 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
857 if(USES_LIST(topright_type, list)){
858 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
859 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
860 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
861 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
863 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
864 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
867 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
870 h->ref_cache[list][scan8[5 ]+1] =
871 h->ref_cache[list][scan8[7 ]+1] =
872 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
873 h->ref_cache[list][scan8[4 ]] =
874 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
875 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
876 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
877 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
878 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
879 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
882 /* XXX beurk, Load mvd */
883 if(USES_LIST(top_type, list)){
884 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
885 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
886 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
887 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
888 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
890 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
891 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
892 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
893 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
895 if(USES_LIST(left_type[0], list)){
896 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
897 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
898 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
900 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
901 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
903 if(USES_LIST(left_type[1], list)){
904 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
905 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
906 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
908 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
909 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
911 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
912 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
913 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
914 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
915 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
917 if(h->slice_type == B_TYPE){
918 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
920 if(IS_DIRECT(top_type)){
921 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
922 }else if(IS_8X8(top_type)){
923 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
924 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
925 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
927 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
930 if(IS_DIRECT(left_type[0]))
931 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
932 else if(IS_8X8(left_type[0]))
933 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
935 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
937 if(IS_DIRECT(left_type[1]))
938 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
939 else if(IS_8X8(left_type[1]))
940 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
942 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
948 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
949 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
950 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
951 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
952 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
953 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
954 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
955 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
956 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
957 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
959 #define MAP_F2F(idx, mb_type)\
960 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
961 h->ref_cache[list][idx] <<= 1;\
962 h->mv_cache[list][idx][1] /= 2;\
963 h->mvd_cache[list][idx][1] /= 2;\
968 #define MAP_F2F(idx, mb_type)\
969 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
970 h->ref_cache[list][idx] >>= 1;\
971 h->mv_cache[list][idx][1] <<= 1;\
972 h->mvd_cache[list][idx][1] <<= 1;\
982 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
985 static inline void write_back_intra_pred_mode(H264Context *h){
986 MpegEncContext * const s = &h->s;
987 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
989 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
990 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
991 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
992 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
993 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
994 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
995 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
999 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1001 static inline int check_intra4x4_pred_mode(H264Context *h){
1002 MpegEncContext * const s = &h->s;
1003 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
1004 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
1007 if(!(h->top_samples_available&0x8000)){
1009 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
1011 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1014 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
1019 if(!(h->left_samples_available&0x8000)){
1021 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
1023 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1026 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
1032 } //FIXME cleanup like next
1035 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1037 static inline int check_intra_pred_mode(H264Context *h, int mode){
1038 MpegEncContext * const s = &h->s;
1039 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
1040 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
1043 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
1047 if(!(h->top_samples_available&0x8000)){
1050 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1055 if(!(h->left_samples_available&0x8000)){
1058 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1067 * gets the predicted intra4x4 prediction mode.
1069 static inline int pred_intra_mode(H264Context *h, int n){
1070 const int index8= scan8[n];
1071 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
1072 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
1073 const int min= FFMIN(left, top);
1075 tprintf("mode:%d %d min:%d\n", left ,top, min);
1077 if(min<0) return DC_PRED;
1081 static inline void write_back_non_zero_count(H264Context *h){
1082 MpegEncContext * const s = &h->s;
1083 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1085 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
1086 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
1087 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
1088 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
1089 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
1090 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
1091 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
1093 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
1094 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
1095 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
1097 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1098 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1099 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1102 // store all luma nnzs, for deblocking
1105 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
1106 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
1111 * gets the predicted number of non zero coefficients.
1112 * @param n block index
1114 static inline int pred_non_zero_count(H264Context *h, int n){
1115 const int index8= scan8[n];
1116 const int left= h->non_zero_count_cache[index8 - 1];
1117 const int top = h->non_zero_count_cache[index8 - 8];
1120 if(i<64) i= (i+1)>>1;
1122 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1127 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1128 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1130 /* there is no consistent mapping of mvs to neighboring locations that will
1131 * make mbaff happy, so we can't move all this logic to fill_caches */
1133 MpegEncContext *s = &h->s;
1134 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
1136 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
1137 *C = h->mv_cache[list][scan8[0]-2];
1140 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
1141 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
1142 if(IS_INTERLACED(mb_types[topright_xy])){
1143 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
1144 const int x4 = X4, y4 = Y4;\
1145 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
1146 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
1147 return LIST_NOT_USED;\
1148 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
1149 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
1150 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
1151 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
1153 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
1156 if(topright_ref == PART_NOT_AVAILABLE
1157 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
1158 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
1160 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
1161 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
1164 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
1165 && i >= scan8[0]+8){
1166 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
1167 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
1173 if(topright_ref != PART_NOT_AVAILABLE){
1174 *C= h->mv_cache[list][ i - 8 + part_width ];
1175 return topright_ref;
1177 tprintf("topright MV not available\n");
1179 *C= h->mv_cache[list][ i - 8 - 1 ];
1180 return h->ref_cache[list][ i - 8 - 1 ];
1185 * gets the predicted MV.
1186 * @param n the block index
1187 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1188 * @param mx the x component of the predicted motion vector
1189 * @param my the y component of the predicted motion vector
1191 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1192 const int index8= scan8[n];
1193 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1194 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1195 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1196 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1198 int diagonal_ref, match_count;
1200 assert(part_width==1 || part_width==2 || part_width==4);
1210 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1211 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1212 tprintf("pred_motion match_count=%d\n", match_count);
1213 if(match_count > 1){ //most common
1214 *mx= mid_pred(A[0], B[0], C[0]);
1215 *my= mid_pred(A[1], B[1], C[1]);
1216 }else if(match_count==1){
1220 }else if(top_ref==ref){
1228 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1232 *mx= mid_pred(A[0], B[0], C[0]);
1233 *my= mid_pred(A[1], B[1], C[1]);
1237 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1241 * gets the directionally predicted 16x8 MV.
1242 * @param n the block index
1243 * @param mx the x component of the predicted motion vector
1244 * @param my the y component of the predicted motion vector
1246 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1248 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1249 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1251 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1259 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1260 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1262 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1264 if(left_ref == ref){
1272 pred_motion(h, n, 4, list, ref, mx, my);
1276 * gets the directionally predicted 8x16 MV.
1277 * @param n the block index
1278 * @param mx the x component of the predicted motion vector
1279 * @param my the y component of the predicted motion vector
1281 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1283 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1284 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1286 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1288 if(left_ref == ref){
1297 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1299 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1301 if(diagonal_ref == ref){
1309 pred_motion(h, n, 2, list, ref, mx, my);
1312 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1313 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1314 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1316 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1318 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1319 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1320 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1326 pred_motion(h, 0, 4, 0, 0, mx, my);
1331 static inline void direct_dist_scale_factor(H264Context * const h){
1332 const int poc = h->s.current_picture_ptr->poc;
1333 const int poc1 = h->ref_list[1][0].poc;
1335 for(i=0; i<h->ref_count[0]; i++){
1336 int poc0 = h->ref_list[0][i].poc;
1337 int td = clip(poc1 - poc0, -128, 127);
1338 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1339 h->dist_scale_factor[i] = 256;
1341 int tb = clip(poc - poc0, -128, 127);
1342 int tx = (16384 + (FFABS(td) >> 1)) / td;
1343 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1347 for(i=0; i<h->ref_count[0]; i++){
1348 h->dist_scale_factor_field[2*i] =
1349 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
1353 static inline void direct_ref_list_init(H264Context * const h){
1354 MpegEncContext * const s = &h->s;
1355 Picture * const ref1 = &h->ref_list[1][0];
1356 Picture * const cur = s->current_picture_ptr;
1358 if(cur->pict_type == I_TYPE)
1359 cur->ref_count[0] = 0;
1360 if(cur->pict_type != B_TYPE)
1361 cur->ref_count[1] = 0;
1362 for(list=0; list<2; list++){
1363 cur->ref_count[list] = h->ref_count[list];
1364 for(j=0; j<h->ref_count[list]; j++)
1365 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1367 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1369 for(list=0; list<2; list++){
1370 for(i=0; i<ref1->ref_count[list]; i++){
1371 const int poc = ref1->ref_poc[list][i];
1372 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1373 for(j=0; j<h->ref_count[list]; j++)
1374 if(h->ref_list[list][j].poc == poc){
1375 h->map_col_to_list0[list][i] = j;
1381 for(list=0; list<2; list++){
1382 for(i=0; i<ref1->ref_count[list]; i++){
1383 j = h->map_col_to_list0[list][i];
1384 h->map_col_to_list0_field[list][2*i] = 2*j;
1385 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1391 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1392 MpegEncContext * const s = &h->s;
1393 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1394 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1395 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1396 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1397 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1398 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1399 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1400 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1401 const int is_b8x8 = IS_8X8(*mb_type);
1402 unsigned int sub_mb_type;
1405 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1406 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1407 /* FIXME save sub mb types from previous frames (or derive from MVs)
1408 * so we know exactly what block size to use */
1409 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1410 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1411 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1412 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1413 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1415 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1416 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1419 *mb_type |= MB_TYPE_DIRECT2;
1421 *mb_type |= MB_TYPE_INTERLACED;
1423 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1425 if(h->direct_spatial_mv_pred){
1430 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1432 /* ref = min(neighbors) */
1433 for(list=0; list<2; list++){
1434 int refa = h->ref_cache[list][scan8[0] - 1];
1435 int refb = h->ref_cache[list][scan8[0] - 8];
1436 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1438 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1440 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1442 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1448 if(ref[0] < 0 && ref[1] < 0){
1449 ref[0] = ref[1] = 0;
1450 mv[0][0] = mv[0][1] =
1451 mv[1][0] = mv[1][1] = 0;
1453 for(list=0; list<2; list++){
1455 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1457 mv[list][0] = mv[list][1] = 0;
1462 *mb_type &= ~MB_TYPE_P0L1;
1463 sub_mb_type &= ~MB_TYPE_P0L1;
1464 }else if(ref[0] < 0){
1465 *mb_type &= ~MB_TYPE_P0L0;
1466 sub_mb_type &= ~MB_TYPE_P0L0;
1469 if(IS_16X16(*mb_type)){
1470 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1471 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1472 if(!IS_INTRA(mb_type_col)
1473 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1474 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1475 && (h->x264_build>33 || !h->x264_build)))){
1477 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1479 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1481 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1483 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1485 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1486 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1489 for(i8=0; i8<4; i8++){
1490 const int x8 = i8&1;
1491 const int y8 = i8>>1;
1493 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1495 h->sub_mb_type[i8] = sub_mb_type;
1497 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1498 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1499 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1500 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1503 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1504 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1505 && (h->x264_build>33 || !h->x264_build)))){
1506 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1507 if(IS_SUB_8X8(sub_mb_type)){
1508 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1509 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1511 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1513 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1516 for(i4=0; i4<4; i4++){
1517 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1518 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1520 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1522 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1528 }else{ /* direct temporal mv pred */
1529 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1530 const int *dist_scale_factor = h->dist_scale_factor;
1533 if(IS_INTERLACED(*mb_type)){
1534 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1535 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1536 dist_scale_factor = h->dist_scale_factor_field;
1538 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1539 /* FIXME assumes direct_8x8_inference == 1 */
1540 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1541 int mb_types_col[2];
1544 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1545 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1546 | (*mb_type & MB_TYPE_INTERLACED);
1547 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1549 if(IS_INTERLACED(*mb_type)){
1550 /* frame to field scaling */
1551 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1552 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1554 l1ref0 -= 2*h->b8_stride;
1555 l1ref1 -= 2*h->b8_stride;
1556 l1mv0 -= 4*h->b_stride;
1557 l1mv1 -= 4*h->b_stride;
1561 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1562 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1564 *mb_type |= MB_TYPE_16x8;
1566 *mb_type |= MB_TYPE_8x8;
1568 /* field to frame scaling */
1569 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1570 * but in MBAFF, top and bottom POC are equal */
1571 int dy = (s->mb_y&1) ? 1 : 2;
1573 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1574 l1ref0 += dy*h->b8_stride;
1575 l1ref1 += dy*h->b8_stride;
1576 l1mv0 += 2*dy*h->b_stride;
1577 l1mv1 += 2*dy*h->b_stride;
1580 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1582 *mb_type |= MB_TYPE_16x16;
1584 *mb_type |= MB_TYPE_8x8;
1587 for(i8=0; i8<4; i8++){
1588 const int x8 = i8&1;
1589 const int y8 = i8>>1;
1591 const int16_t (*l1mv)[2]= l1mv0;
1593 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1595 h->sub_mb_type[i8] = sub_mb_type;
1597 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1598 if(IS_INTRA(mb_types_col[y8])){
1599 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1600 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1601 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1605 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1607 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1609 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1612 scale = dist_scale_factor[ref0];
1613 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1616 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1617 int my_col = (mv_col[1]<<y_shift)/2;
1618 int mx = (scale * mv_col[0] + 128) >> 8;
1619 int my = (scale * my_col + 128) >> 8;
1620 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1621 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1628 /* one-to-one mv scaling */
1630 if(IS_16X16(*mb_type)){
1631 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1632 if(IS_INTRA(mb_type_col)){
1633 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1634 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1635 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1637 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1638 : map_col_to_list0[1][l1ref1[0]];
1639 const int scale = dist_scale_factor[ref0];
1640 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1642 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1643 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1644 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1645 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1646 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1649 for(i8=0; i8<4; i8++){
1650 const int x8 = i8&1;
1651 const int y8 = i8>>1;
1653 const int16_t (*l1mv)[2]= l1mv0;
1655 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1657 h->sub_mb_type[i8] = sub_mb_type;
1658 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1659 if(IS_INTRA(mb_type_col)){
1660 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1661 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1662 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1666 ref0 = l1ref0[x8 + y8*h->b8_stride];
1668 ref0 = map_col_to_list0[0][ref0];
1670 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1673 scale = dist_scale_factor[ref0];
1675 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1676 if(IS_SUB_8X8(sub_mb_type)){
1677 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1678 int mx = (scale * mv_col[0] + 128) >> 8;
1679 int my = (scale * mv_col[1] + 128) >> 8;
1680 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1681 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1683 for(i4=0; i4<4; i4++){
1684 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1685 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1686 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1687 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1688 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1689 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1696 static inline void write_back_motion(H264Context *h, int mb_type){
1697 MpegEncContext * const s = &h->s;
1698 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1699 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1702 if(!USES_LIST(mb_type, 0))
1703 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1705 for(list=0; list<2; list++){
1707 if(!USES_LIST(mb_type, list))
1711 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1712 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1714 if( h->pps.cabac ) {
1715 if(IS_SKIP(mb_type))
1716 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1719 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1720 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1725 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1726 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1727 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1728 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1729 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1733 if(h->slice_type == B_TYPE && h->pps.cabac){
1734 if(IS_8X8(mb_type)){
1735 uint8_t *direct_table = &h->direct_table[b8_xy];
1736 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1737 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1738 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1744 * Decodes a network abstraction layer unit.
1745 * @param consumed is the number of bytes used as input
1746 * @param length is the length of the array
1747 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1748 * @returns decoded bytes, might be src+1 if no escapes
1750 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1754 // src[0]&0x80; //forbidden bit
1755 h->nal_ref_idc= src[0]>>5;
1756 h->nal_unit_type= src[0]&0x1F;
1760 for(i=0; i<length; i++)
1761 printf("%2X ", src[i]);
1763 for(i=0; i+1<length; i+=2){
1764 if(src[i]) continue;
1765 if(i>0 && src[i-1]==0) i--;
1766 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1768 /* startcode, so we must be past the end */
1775 if(i>=length-1){ //no escaped 0
1776 *dst_length= length;
1777 *consumed= length+1; //+1 for the header
1781 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1782 dst= h->rbsp_buffer;
1788 //printf("decoding esc\n");
1791 //remove escapes (very rare 1:2^22)
1792 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1793 if(src[si+2]==3){ //escape
1798 }else //next start code
1802 dst[di++]= src[si++];
1806 *consumed= si + 1;//+1 for the header
1807 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1812 * identifies the exact end of the bitstream
1813 * @return the length of the trailing, or 0 if damaged
1815 static int decode_rbsp_trailing(uint8_t *src){
1819 tprintf("rbsp trailing %X\n", v);
1829 * idct tranforms the 16 dc values and dequantize them.
1830 * @param qp quantization parameter
1832 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1835 int temp[16]; //FIXME check if this is a good idea
1836 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1837 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1839 //memset(block, 64, 2*256);
1842 const int offset= y_offset[i];
1843 const int z0= block[offset+stride*0] + block[offset+stride*4];
1844 const int z1= block[offset+stride*0] - block[offset+stride*4];
1845 const int z2= block[offset+stride*1] - block[offset+stride*5];
1846 const int z3= block[offset+stride*1] + block[offset+stride*5];
1855 const int offset= x_offset[i];
1856 const int z0= temp[4*0+i] + temp[4*2+i];
1857 const int z1= temp[4*0+i] - temp[4*2+i];
1858 const int z2= temp[4*1+i] - temp[4*3+i];
1859 const int z3= temp[4*1+i] + temp[4*3+i];
1861 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1862 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1863 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1864 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1870 * dct tranforms the 16 dc values.
1871 * @param qp quantization parameter ??? FIXME
1873 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1874 // const int qmul= dequant_coeff[qp][0];
1876 int temp[16]; //FIXME check if this is a good idea
1877 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1878 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1881 const int offset= y_offset[i];
1882 const int z0= block[offset+stride*0] + block[offset+stride*4];
1883 const int z1= block[offset+stride*0] - block[offset+stride*4];
1884 const int z2= block[offset+stride*1] - block[offset+stride*5];
1885 const int z3= block[offset+stride*1] + block[offset+stride*5];
1894 const int offset= x_offset[i];
1895 const int z0= temp[4*0+i] + temp[4*2+i];
1896 const int z1= temp[4*0+i] - temp[4*2+i];
1897 const int z2= temp[4*1+i] - temp[4*3+i];
1898 const int z3= temp[4*1+i] + temp[4*3+i];
1900 block[stride*0 +offset]= (z0 + z3)>>1;
1901 block[stride*2 +offset]= (z1 + z2)>>1;
1902 block[stride*8 +offset]= (z1 - z2)>>1;
1903 block[stride*10+offset]= (z0 - z3)>>1;
1911 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1912 const int stride= 16*2;
1913 const int xStride= 16;
1916 a= block[stride*0 + xStride*0];
1917 b= block[stride*0 + xStride*1];
1918 c= block[stride*1 + xStride*0];
1919 d= block[stride*1 + xStride*1];
1926 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1927 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1928 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1929 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1933 static void chroma_dc_dct_c(DCTELEM *block){
1934 const int stride= 16*2;
1935 const int xStride= 16;
1938 a= block[stride*0 + xStride*0];
1939 b= block[stride*0 + xStride*1];
1940 c= block[stride*1 + xStride*0];
1941 d= block[stride*1 + xStride*1];
1948 block[stride*0 + xStride*0]= (a+c);
1949 block[stride*0 + xStride*1]= (e+b);
1950 block[stride*1 + xStride*0]= (a-c);
1951 block[stride*1 + xStride*1]= (e-b);
1956 * gets the chroma qp.
1958 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1960 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
1963 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1964 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1965 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1967 const int * const quant_table= quant_coeff[qscale];
1968 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1969 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1970 const unsigned int threshold2= (threshold1<<1);
1976 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1977 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1978 const unsigned int dc_threshold2= (dc_threshold1<<1);
1980 int level= block[0]*quant_coeff[qscale+18][0];
1981 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1983 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1986 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1989 // last_non_zero = i;
1994 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1995 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1996 const unsigned int dc_threshold2= (dc_threshold1<<1);
1998 int level= block[0]*quant_table[0];
1999 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2001 level= (dc_bias + level)>>(QUANT_SHIFT+1);
2004 level= (dc_bias - level)>>(QUANT_SHIFT+1);
2007 // last_non_zero = i;
2020 const int j= scantable[i];
2021 int level= block[j]*quant_table[j];
2023 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
2024 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
2025 if(((unsigned)(level+threshold1))>threshold2){
2027 level= (bias + level)>>QUANT_SHIFT;
2030 level= (bias - level)>>QUANT_SHIFT;
2039 return last_non_zero;
2042 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
2043 const uint32_t a= ((uint32_t*)(src-stride))[0];
2044 ((uint32_t*)(src+0*stride))[0]= a;
2045 ((uint32_t*)(src+1*stride))[0]= a;
2046 ((uint32_t*)(src+2*stride))[0]= a;
2047 ((uint32_t*)(src+3*stride))[0]= a;
2050 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
2051 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
2052 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
2053 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
2054 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
2057 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
2058 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
2059 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
2061 ((uint32_t*)(src+0*stride))[0]=
2062 ((uint32_t*)(src+1*stride))[0]=
2063 ((uint32_t*)(src+2*stride))[0]=
2064 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2067 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
2068 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
2070 ((uint32_t*)(src+0*stride))[0]=
2071 ((uint32_t*)(src+1*stride))[0]=
2072 ((uint32_t*)(src+2*stride))[0]=
2073 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2076 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
2077 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
2079 ((uint32_t*)(src+0*stride))[0]=
2080 ((uint32_t*)(src+1*stride))[0]=
2081 ((uint32_t*)(src+2*stride))[0]=
2082 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2085 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
2086 ((uint32_t*)(src+0*stride))[0]=
2087 ((uint32_t*)(src+1*stride))[0]=
2088 ((uint32_t*)(src+2*stride))[0]=
2089 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
2093 #define LOAD_TOP_RIGHT_EDGE\
2094 const int t4= topright[0];\
2095 const int t5= topright[1];\
2096 const int t6= topright[2];\
2097 const int t7= topright[3];\
2099 #define LOAD_LEFT_EDGE\
2100 const int l0= src[-1+0*stride];\
2101 const int l1= src[-1+1*stride];\
2102 const int l2= src[-1+2*stride];\
2103 const int l3= src[-1+3*stride];\
2105 #define LOAD_TOP_EDGE\
2106 const int t0= src[ 0-1*stride];\
2107 const int t1= src[ 1-1*stride];\
2108 const int t2= src[ 2-1*stride];\
2109 const int t3= src[ 3-1*stride];\
2111 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
2112 const int lt= src[-1-1*stride];
2116 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
2118 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
2121 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
2125 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2128 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
2130 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2131 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2134 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
2139 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
2141 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
2144 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
2148 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
2151 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
2153 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
2154 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
2157 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
2158 const int lt= src[-1-1*stride];
2161 const __attribute__((unused)) int unu= l3;
2164 src[1+2*stride]=(lt + t0 + 1)>>1;
2166 src[2+2*stride]=(t0 + t1 + 1)>>1;
2168 src[3+2*stride]=(t1 + t2 + 1)>>1;
2169 src[3+0*stride]=(t2 + t3 + 1)>>1;
2171 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2173 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2175 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2176 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2177 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2178 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2181 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2184 const __attribute__((unused)) int unu= t7;
2186 src[0+0*stride]=(t0 + t1 + 1)>>1;
2188 src[0+2*stride]=(t1 + t2 + 1)>>1;
2190 src[1+2*stride]=(t2 + t3 + 1)>>1;
2192 src[2+2*stride]=(t3 + t4+ 1)>>1;
2193 src[3+2*stride]=(t4 + t5+ 1)>>1;
2194 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2196 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2198 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2200 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2201 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2204 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2207 src[0+0*stride]=(l0 + l1 + 1)>>1;
2208 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2210 src[0+1*stride]=(l1 + l2 + 1)>>1;
2212 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2214 src[0+2*stride]=(l2 + l3 + 1)>>1;
2216 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2225 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2226 const int lt= src[-1-1*stride];
2229 const __attribute__((unused)) int unu= t3;
2232 src[2+1*stride]=(lt + l0 + 1)>>1;
2234 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2235 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2236 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2238 src[2+2*stride]=(l0 + l1 + 1)>>1;
2240 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2242 src[2+3*stride]=(l1 + l2+ 1)>>1;
2244 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2245 src[0+3*stride]=(l2 + l3 + 1)>>1;
2246 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2249 void ff_pred16x16_vertical_c(uint8_t *src, int stride){
2251 const uint32_t a= ((uint32_t*)(src-stride))[0];
2252 const uint32_t b= ((uint32_t*)(src-stride))[1];
2253 const uint32_t c= ((uint32_t*)(src-stride))[2];
2254 const uint32_t d= ((uint32_t*)(src-stride))[3];
2256 for(i=0; i<16; i++){
2257 ((uint32_t*)(src+i*stride))[0]= a;
2258 ((uint32_t*)(src+i*stride))[1]= b;
2259 ((uint32_t*)(src+i*stride))[2]= c;
2260 ((uint32_t*)(src+i*stride))[3]= d;
2264 void ff_pred16x16_horizontal_c(uint8_t *src, int stride){
2267 for(i=0; i<16; i++){
2268 ((uint32_t*)(src+i*stride))[0]=
2269 ((uint32_t*)(src+i*stride))[1]=
2270 ((uint32_t*)(src+i*stride))[2]=
2271 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2275 void ff_pred16x16_dc_c(uint8_t *src, int stride){
2279 dc+= src[-1+i*stride];
2286 dc= 0x01010101*((dc + 16)>>5);
2288 for(i=0; i<16; i++){
2289 ((uint32_t*)(src+i*stride))[0]=
2290 ((uint32_t*)(src+i*stride))[1]=
2291 ((uint32_t*)(src+i*stride))[2]=
2292 ((uint32_t*)(src+i*stride))[3]= dc;
2296 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2300 dc+= src[-1+i*stride];
2303 dc= 0x01010101*((dc + 8)>>4);
2305 for(i=0; i<16; i++){
2306 ((uint32_t*)(src+i*stride))[0]=
2307 ((uint32_t*)(src+i*stride))[1]=
2308 ((uint32_t*)(src+i*stride))[2]=
2309 ((uint32_t*)(src+i*stride))[3]= dc;
2313 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2319 dc= 0x01010101*((dc + 8)>>4);
2321 for(i=0; i<16; i++){
2322 ((uint32_t*)(src+i*stride))[0]=
2323 ((uint32_t*)(src+i*stride))[1]=
2324 ((uint32_t*)(src+i*stride))[2]=
2325 ((uint32_t*)(src+i*stride))[3]= dc;
2329 void ff_pred16x16_128_dc_c(uint8_t *src, int stride){
2332 for(i=0; i<16; i++){
2333 ((uint32_t*)(src+i*stride))[0]=
2334 ((uint32_t*)(src+i*stride))[1]=
2335 ((uint32_t*)(src+i*stride))[2]=
2336 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2340 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2343 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2344 const uint8_t * const src0 = src+7-stride;
2345 const uint8_t *src1 = src+8*stride-1;
2346 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2347 int H = src0[1] - src0[-1];
2348 int V = src1[0] - src2[ 0];
2349 for(k=2; k<=8; ++k) {
2350 src1 += stride; src2 -= stride;
2351 H += k*(src0[k] - src0[-k]);
2352 V += k*(src1[0] - src2[ 0]);
2355 H = ( 5*(H/4) ) / 16;
2356 V = ( 5*(V/4) ) / 16;
2358 /* required for 100% accuracy */
2359 i = H; H = V; V = i;
2361 H = ( 5*H+32 ) >> 6;
2362 V = ( 5*V+32 ) >> 6;
2365 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2366 for(j=16; j>0; --j) {
2369 for(i=-16; i<0; i+=4) {
2370 src[16+i] = cm[ (b ) >> 5 ];
2371 src[17+i] = cm[ (b+ H) >> 5 ];
2372 src[18+i] = cm[ (b+2*H) >> 5 ];
2373 src[19+i] = cm[ (b+3*H) >> 5 ];
2380 void ff_pred16x16_plane_c(uint8_t *src, int stride){
2381 pred16x16_plane_compat_c(src, stride, 0);
2384 void ff_pred8x8_vertical_c(uint8_t *src, int stride){
2386 const uint32_t a= ((uint32_t*)(src-stride))[0];
2387 const uint32_t b= ((uint32_t*)(src-stride))[1];
2390 ((uint32_t*)(src+i*stride))[0]= a;
2391 ((uint32_t*)(src+i*stride))[1]= b;
2395 void ff_pred8x8_horizontal_c(uint8_t *src, int stride){
2399 ((uint32_t*)(src+i*stride))[0]=
2400 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2404 void ff_pred8x8_128_dc_c(uint8_t *src, int stride){
2408 ((uint32_t*)(src+i*stride))[0]=
2409 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2413 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2419 dc0+= src[-1+i*stride];
2420 dc2+= src[-1+(i+4)*stride];
2422 dc0= 0x01010101*((dc0 + 2)>>2);
2423 dc2= 0x01010101*((dc2 + 2)>>2);
2426 ((uint32_t*)(src+i*stride))[0]=
2427 ((uint32_t*)(src+i*stride))[1]= dc0;
2430 ((uint32_t*)(src+i*stride))[0]=
2431 ((uint32_t*)(src+i*stride))[1]= dc2;
2435 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2441 dc0+= src[i-stride];
2442 dc1+= src[4+i-stride];
2444 dc0= 0x01010101*((dc0 + 2)>>2);
2445 dc1= 0x01010101*((dc1 + 2)>>2);
2448 ((uint32_t*)(src+i*stride))[0]= dc0;
2449 ((uint32_t*)(src+i*stride))[1]= dc1;
2452 ((uint32_t*)(src+i*stride))[0]= dc0;
2453 ((uint32_t*)(src+i*stride))[1]= dc1;
2458 void ff_pred8x8_dc_c(uint8_t *src, int stride){
2460 int dc0, dc1, dc2, dc3;
2464 dc0+= src[-1+i*stride] + src[i-stride];
2465 dc1+= src[4+i-stride];
2466 dc2+= src[-1+(i+4)*stride];
2468 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2469 dc0= 0x01010101*((dc0 + 4)>>3);
2470 dc1= 0x01010101*((dc1 + 2)>>2);
2471 dc2= 0x01010101*((dc2 + 2)>>2);
2474 ((uint32_t*)(src+i*stride))[0]= dc0;
2475 ((uint32_t*)(src+i*stride))[1]= dc1;
2478 ((uint32_t*)(src+i*stride))[0]= dc2;
2479 ((uint32_t*)(src+i*stride))[1]= dc3;
2483 void ff_pred8x8_plane_c(uint8_t *src, int stride){
2486 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2487 const uint8_t * const src0 = src+3-stride;
2488 const uint8_t *src1 = src+4*stride-1;
2489 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2490 int H = src0[1] - src0[-1];
2491 int V = src1[0] - src2[ 0];
2492 for(k=2; k<=4; ++k) {
2493 src1 += stride; src2 -= stride;
2494 H += k*(src0[k] - src0[-k]);
2495 V += k*(src1[0] - src2[ 0]);
2497 H = ( 17*H+16 ) >> 5;
2498 V = ( 17*V+16 ) >> 5;
2500 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2501 for(j=8; j>0; --j) {
2504 src[0] = cm[ (b ) >> 5 ];
2505 src[1] = cm[ (b+ H) >> 5 ];
2506 src[2] = cm[ (b+2*H) >> 5 ];
2507 src[3] = cm[ (b+3*H) >> 5 ];
2508 src[4] = cm[ (b+4*H) >> 5 ];
2509 src[5] = cm[ (b+5*H) >> 5 ];
2510 src[6] = cm[ (b+6*H) >> 5 ];
2511 src[7] = cm[ (b+7*H) >> 5 ];
2516 #define SRC(x,y) src[(x)+(y)*stride]
2518 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2519 #define PREDICT_8x8_LOAD_LEFT \
2520 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2521 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2522 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2523 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2526 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2527 #define PREDICT_8x8_LOAD_TOP \
2528 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2529 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2530 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2531 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2532 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2535 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2536 #define PREDICT_8x8_LOAD_TOPRIGHT \
2537 int t8, t9, t10, t11, t12, t13, t14, t15; \
2538 if(has_topright) { \
2539 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2540 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2541 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2543 #define PREDICT_8x8_LOAD_TOPLEFT \
2544 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2546 #define PREDICT_8x8_DC(v) \
2548 for( y = 0; y < 8; y++ ) { \
2549 ((uint32_t*)src)[0] = \
2550 ((uint32_t*)src)[1] = v; \
2554 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2556 PREDICT_8x8_DC(0x80808080);
2558 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2560 PREDICT_8x8_LOAD_LEFT;
2561 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2564 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2566 PREDICT_8x8_LOAD_TOP;
2567 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2570 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2572 PREDICT_8x8_LOAD_LEFT;
2573 PREDICT_8x8_LOAD_TOP;
2574 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2575 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2578 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2580 PREDICT_8x8_LOAD_LEFT;
2581 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2582 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2583 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2586 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2589 PREDICT_8x8_LOAD_TOP;
2598 for( y = 1; y < 8; y++ )
2599 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2601 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2603 PREDICT_8x8_LOAD_TOP;
2604 PREDICT_8x8_LOAD_TOPRIGHT;
2605 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2606 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2607 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2608 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2609 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2610 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2611 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2612 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2613 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2614 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2615 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2616 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2617 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2618 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2619 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2621 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2623 PREDICT_8x8_LOAD_TOP;
2624 PREDICT_8x8_LOAD_LEFT;
2625 PREDICT_8x8_LOAD_TOPLEFT;
2626 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2627 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2628 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2629 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2630 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2631 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2632 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2633 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2634 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2635 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2636 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2637 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2638 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2639 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2640 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2643 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2645 PREDICT_8x8_LOAD_TOP;
2646 PREDICT_8x8_LOAD_LEFT;
2647 PREDICT_8x8_LOAD_TOPLEFT;
2648 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2649 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2650 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2651 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2652 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2653 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2654 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2655 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2656 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2657 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2658 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2659 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2660 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2661 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2662 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2663 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2664 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2665 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2666 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2667 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2668 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2669 SRC(7,0)= (t6 + t7 + 1) >> 1;
2671 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2673 PREDICT_8x8_LOAD_TOP;
2674 PREDICT_8x8_LOAD_LEFT;
2675 PREDICT_8x8_LOAD_TOPLEFT;
2676 SRC(0,7)= (l6 + l7 + 1) >> 1;
2677 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2678 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2679 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2680 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2681 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2682 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2683 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2684 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2685 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2686 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2687 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2688 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2689 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2690 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2691 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2692 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2693 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2694 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2695 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2696 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2697 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2699 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2701 PREDICT_8x8_LOAD_TOP;
2702 PREDICT_8x8_LOAD_TOPRIGHT;
2703 SRC(0,0)= (t0 + t1 + 1) >> 1;
2704 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2705 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2706 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2707 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2708 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2709 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2710 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2711 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2712 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2713 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2714 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2715 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2716 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2717 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2718 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2719 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2720 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2721 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2722 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2723 SRC(7,6)= (t10 + t11 + 1) >> 1;
2724 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2726 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2728 PREDICT_8x8_LOAD_LEFT;
2729 SRC(0,0)= (l0 + l1 + 1) >> 1;
2730 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2731 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2732 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2733 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2734 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2735 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2736 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2737 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2738 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2739 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2740 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2741 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2742 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2743 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2744 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2745 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2746 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2748 #undef PREDICT_8x8_LOAD_LEFT
2749 #undef PREDICT_8x8_LOAD_TOP
2750 #undef PREDICT_8x8_LOAD_TOPLEFT
2751 #undef PREDICT_8x8_LOAD_TOPRIGHT
2752 #undef PREDICT_8x8_DC
2758 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2759 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2760 int src_x_offset, int src_y_offset,
2761 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2762 MpegEncContext * const s = &h->s;
2763 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2764 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2765 const int luma_xy= (mx&3) + ((my&3)<<2);
2766 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2767 uint8_t * src_cb, * src_cr;
2768 int extra_width= h->emu_edge_width;
2769 int extra_height= h->emu_edge_height;
2771 const int full_mx= mx>>2;
2772 const int full_my= my>>2;
2773 const int pic_width = 16*s->mb_width;
2774 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2776 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
2779 if(mx&7) extra_width -= 3;
2780 if(my&7) extra_height -= 3;
2782 if( full_mx < 0-extra_width
2783 || full_my < 0-extra_height
2784 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2785 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2786 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2787 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2791 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2793 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2796 if(s->flags&CODEC_FLAG_GRAY) return;
2799 // chroma offset when predicting from a field of opposite parity
2800 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2801 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2803 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2804 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2807 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2808 src_cb= s->edge_emu_buffer;
2810 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2813 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2814 src_cr= s->edge_emu_buffer;
2816 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2819 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2820 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2821 int x_offset, int y_offset,
2822 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2823 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2824 int list0, int list1){
2825 MpegEncContext * const s = &h->s;
2826 qpel_mc_func *qpix_op= qpix_put;
2827 h264_chroma_mc_func chroma_op= chroma_put;
2829 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2830 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2831 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2832 x_offset += 8*s->mb_x;
2833 y_offset += 8*(s->mb_y >> MB_MBAFF);
2836 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2837 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2838 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2839 qpix_op, chroma_op);
2842 chroma_op= chroma_avg;
2846 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2847 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2848 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2849 qpix_op, chroma_op);
2853 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2854 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2855 int x_offset, int y_offset,
2856 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2857 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2858 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2859 int list0, int list1){
2860 MpegEncContext * const s = &h->s;
2862 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2863 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2864 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2865 x_offset += 8*s->mb_x;
2866 y_offset += 8*(s->mb_y >> MB_MBAFF);
2869 /* don't optimize for luma-only case, since B-frames usually
2870 * use implicit weights => chroma too. */
2871 uint8_t *tmp_cb = s->obmc_scratchpad;
2872 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2873 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2874 int refn0 = h->ref_cache[0][ scan8[n] ];
2875 int refn1 = h->ref_cache[1][ scan8[n] ];
2877 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2878 dest_y, dest_cb, dest_cr,
2879 x_offset, y_offset, qpix_put, chroma_put);
2880 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2881 tmp_y, tmp_cb, tmp_cr,
2882 x_offset, y_offset, qpix_put, chroma_put);
2884 if(h->use_weight == 2){
2885 int weight0 = h->implicit_weight[refn0][refn1];
2886 int weight1 = 64 - weight0;
2887 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2888 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
2889 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
2891 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
2892 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2893 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
2894 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2895 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2896 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
2897 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2898 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2899 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
2902 int list = list1 ? 1 : 0;
2903 int refn = h->ref_cache[list][ scan8[n] ];
2904 Picture *ref= &h->ref_list[list][refn];
2905 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2906 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2907 qpix_put, chroma_put);
2909 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
2910 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2911 if(h->use_weight_chroma){
2912 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2913 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2914 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2915 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2920 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2921 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2922 int x_offset, int y_offset,
2923 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2924 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2925 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2926 int list0, int list1){
2927 if((h->use_weight==2 && list0 && list1
2928 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2929 || h->use_weight==1)
2930 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2931 x_offset, y_offset, qpix_put, chroma_put,
2932 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2934 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2935 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2938 static inline void prefetch_motion(H264Context *h, int list){
2939 /* fetch pixels for estimated mv 4 macroblocks ahead
2940 * optimized for 64byte cache lines */
2941 MpegEncContext * const s = &h->s;
2942 const int refn = h->ref_cache[list][scan8[0]];
2944 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
2945 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
2946 uint8_t **src= h->ref_list[list][refn].data;
2947 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
2948 s->dsp.prefetch(src[0]+off, s->linesize, 4);
2949 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
2950 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
2954 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2955 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2956 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2957 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2958 MpegEncContext * const s = &h->s;
2959 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2960 const int mb_type= s->current_picture.mb_type[mb_xy];
2962 assert(IS_INTER(mb_type));
2964 prefetch_motion(h, 0);
2966 if(IS_16X16(mb_type)){
2967 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2968 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2969 &weight_op[0], &weight_avg[0],
2970 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2971 }else if(IS_16X8(mb_type)){
2972 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2973 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2974 &weight_op[1], &weight_avg[1],
2975 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2976 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2977 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2978 &weight_op[1], &weight_avg[1],
2979 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2980 }else if(IS_8X16(mb_type)){
2981 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
2982 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2983 &weight_op[2], &weight_avg[2],
2984 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2985 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
2986 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2987 &weight_op[2], &weight_avg[2],
2988 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2992 assert(IS_8X8(mb_type));
2995 const int sub_mb_type= h->sub_mb_type[i];
2997 int x_offset= (i&1)<<2;
2998 int y_offset= (i&2)<<1;
3000 if(IS_SUB_8X8(sub_mb_type)){
3001 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3002 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3003 &weight_op[3], &weight_avg[3],
3004 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3005 }else if(IS_SUB_8X4(sub_mb_type)){
3006 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3007 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3008 &weight_op[4], &weight_avg[4],
3009 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3010 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
3011 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3012 &weight_op[4], &weight_avg[4],
3013 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3014 }else if(IS_SUB_4X8(sub_mb_type)){
3015 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3016 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3017 &weight_op[5], &weight_avg[5],
3018 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3019 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
3020 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3021 &weight_op[5], &weight_avg[5],
3022 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3025 assert(IS_SUB_4X4(sub_mb_type));
3027 int sub_x_offset= x_offset + 2*(j&1);
3028 int sub_y_offset= y_offset + (j&2);
3029 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
3030 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3031 &weight_op[6], &weight_avg[6],
3032 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3038 prefetch_motion(h, 1);
3041 static void decode_init_vlc(){
3042 static int done = 0;
3048 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
3049 &chroma_dc_coeff_token_len [0], 1, 1,
3050 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
3053 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
3054 &coeff_token_len [i][0], 1, 1,
3055 &coeff_token_bits[i][0], 1, 1, 1);
3059 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
3060 &chroma_dc_total_zeros_len [i][0], 1, 1,
3061 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
3063 for(i=0; i<15; i++){
3064 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
3065 &total_zeros_len [i][0], 1, 1,
3066 &total_zeros_bits[i][0], 1, 1, 1);
3070 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
3071 &run_len [i][0], 1, 1,
3072 &run_bits[i][0], 1, 1, 1);
3074 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
3075 &run_len [6][0], 1, 1,
3076 &run_bits[6][0], 1, 1, 1);
3081 * Sets the intra prediction function pointers.
3083 static void init_pred_ptrs(H264Context *h){
3084 // MpegEncContext * const s = &h->s;
3086 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
3087 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
3088 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
3089 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
3090 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
3091 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
3092 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
3093 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
3094 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
3095 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
3096 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
3097 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
3099 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
3100 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
3101 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
3102 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
3103 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
3104 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
3105 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
3106 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
3107 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
3108 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
3109 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
3110 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
3112 h->pred8x8[DC_PRED8x8 ]= ff_pred8x8_dc_c;
3113 h->pred8x8[VERT_PRED8x8 ]= ff_pred8x8_vertical_c;
3114 h->pred8x8[HOR_PRED8x8 ]= ff_pred8x8_horizontal_c;
3115 h->pred8x8[PLANE_PRED8x8 ]= ff_pred8x8_plane_c;
3116 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
3117 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
3118 h->pred8x8[DC_128_PRED8x8 ]= ff_pred8x8_128_dc_c;
3120 h->pred16x16[DC_PRED8x8 ]= ff_pred16x16_dc_c;
3121 h->pred16x16[VERT_PRED8x8 ]= ff_pred16x16_vertical_c;
3122 h->pred16x16[HOR_PRED8x8 ]= ff_pred16x16_horizontal_c;
3123 h->pred16x16[PLANE_PRED8x8 ]= ff_pred16x16_plane_c;
3124 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
3125 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
3126 h->pred16x16[DC_128_PRED8x8 ]= ff_pred16x16_128_dc_c;
3129 static void free_tables(H264Context *h){
3130 av_freep(&h->intra4x4_pred_mode);
3131 av_freep(&h->chroma_pred_mode_table);
3132 av_freep(&h->cbp_table);
3133 av_freep(&h->mvd_table[0]);
3134 av_freep(&h->mvd_table[1]);
3135 av_freep(&h->direct_table);
3136 av_freep(&h->non_zero_count);
3137 av_freep(&h->slice_table_base);
3138 av_freep(&h->top_borders[1]);
3139 av_freep(&h->top_borders[0]);
3140 h->slice_table= NULL;
3142 av_freep(&h->mb2b_xy);
3143 av_freep(&h->mb2b8_xy);
3145 av_freep(&h->s.obmc_scratchpad);
3148 static void init_dequant8_coeff_table(H264Context *h){
3150 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
3151 h->dequant8_coeff[0] = h->dequant8_buffer[0];
3152 h->dequant8_coeff[1] = h->dequant8_buffer[1];
3154 for(i=0; i<2; i++ ){
3155 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
3156 h->dequant8_coeff[1] = h->dequant8_buffer[0];
3160 for(q=0; q<52; q++){
3161 int shift = ff_div6[q];
3162 int idx = ff_rem6[q];
3164 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
3165 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
3166 h->pps.scaling_matrix8[i][x]) << shift;
3171 static void init_dequant4_coeff_table(H264Context *h){
3173 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
3174 for(i=0; i<6; i++ ){
3175 h->dequant4_coeff[i] = h->dequant4_buffer[i];
3177 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
3178 h->dequant4_coeff[i] = h->dequant4_buffer[j];
3185 for(q=0; q<52; q++){
3186 int shift = ff_div6[q] + 2;
3187 int idx = ff_rem6[q];
3189 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
3190 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
3191 h->pps.scaling_matrix4[i][x]) << shift;
3196 static void init_dequant_tables(H264Context *h){
3198 init_dequant4_coeff_table(h);
3199 if(h->pps.transform_8x8_mode)
3200 init_dequant8_coeff_table(h);
3201 if(h->sps.transform_bypass){
3204 h->dequant4_coeff[i][0][x] = 1<<6;
3205 if(h->pps.transform_8x8_mode)
3208 h->dequant8_coeff[i][0][x] = 1<<6;
3215 * needs width/height
3217 static int alloc_tables(H264Context *h){
3218 MpegEncContext * const s = &h->s;
3219 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3222 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3224 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3225 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
3226 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3227 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3228 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3230 if( h->pps.cabac ) {
3231 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3232 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3233 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3234 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3237 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
3238 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
3240 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3241 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3242 for(y=0; y<s->mb_height; y++){
3243 for(x=0; x<s->mb_width; x++){
3244 const int mb_xy= x + y*s->mb_stride;
3245 const int b_xy = 4*x + 4*y*h->b_stride;
3246 const int b8_xy= 2*x + 2*y*h->b8_stride;
3248 h->mb2b_xy [mb_xy]= b_xy;
3249 h->mb2b8_xy[mb_xy]= b8_xy;
3253 s->obmc_scratchpad = NULL;
3255 if(!h->dequant4_coeff[0])
3256 init_dequant_tables(h);
3264 static void common_init(H264Context *h){
3265 MpegEncContext * const s = &h->s;
3267 s->width = s->avctx->width;
3268 s->height = s->avctx->height;
3269 s->codec_id= s->avctx->codec->id;
3273 h->dequant_coeff_pps= -1;
3274 s->unrestricted_mv=1;
3275 s->decode=1; //FIXME
3277 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3278 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3281 static int decode_init(AVCodecContext *avctx){
3282 H264Context *h= avctx->priv_data;
3283 MpegEncContext * const s = &h->s;
3285 MPV_decode_defaults(s);
3290 s->out_format = FMT_H264;
3291 s->workaround_bugs= avctx->workaround_bugs;
3294 // s->decode_mb= ff_h263_decode_mb;
3296 avctx->pix_fmt= PIX_FMT_YUV420P;
3300 if(avctx->extradata_size > 0 && avctx->extradata &&
3301 *(char *)avctx->extradata == 1){
3311 static int frame_start(H264Context *h){
3312 MpegEncContext * const s = &h->s;
3315 if(MPV_frame_start(s, s->avctx) < 0)
3317 ff_er_frame_start(s);
3319 assert(s->linesize && s->uvlinesize);
3321 for(i=0; i<16; i++){
3322 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3323 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3326 h->block_offset[16+i]=
3327 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3328 h->block_offset[24+16+i]=
3329 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3332 /* can't be in alloc_tables because linesize isn't known there.
3333 * FIXME: redo bipred weight to not require extra buffer? */
3334 if(!s->obmc_scratchpad)
3335 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
3337 /* some macroblocks will be accessed before they're available */
3339 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
3341 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3345 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3346 MpegEncContext * const s = &h->s;
3350 src_cb -= uvlinesize;
3351 src_cr -= uvlinesize;
3353 // There are two lines saved, the line above the the top macroblock of a pair,
3354 // and the line above the bottom macroblock
3355 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3356 for(i=1; i<17; i++){
3357 h->left_border[i]= src_y[15+i* linesize];
3360 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3361 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3363 if(!(s->flags&CODEC_FLAG_GRAY)){
3364 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3365 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3367 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3368 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3370 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3371 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3375 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3376 MpegEncContext * const s = &h->s;
3379 int deblock_left = (s->mb_x > 0);
3380 int deblock_top = (s->mb_y > 0);
3382 src_y -= linesize + 1;
3383 src_cb -= uvlinesize + 1;
3384 src_cr -= uvlinesize + 1;
3386 #define XCHG(a,b,t,xchg)\
3393 for(i = !deblock_top; i<17; i++){
3394 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3399 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3400 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3401 if(s->mb_x+1 < s->mb_width){
3402 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3406 if(!(s->flags&CODEC_FLAG_GRAY)){
3408 for(i = !deblock_top; i<9; i++){
3409 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3410 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3414 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3415 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3420 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3421 MpegEncContext * const s = &h->s;
3424 src_y -= 2 * linesize;
3425 src_cb -= 2 * uvlinesize;
3426 src_cr -= 2 * uvlinesize;
3428 // There are two lines saved, the line above the the top macroblock of a pair,
3429 // and the line above the bottom macroblock
3430 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3431 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3432 for(i=2; i<34; i++){
3433 h->left_border[i]= src_y[15+i* linesize];
3436 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3437 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3438 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3439 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3441 if(!(s->flags&CODEC_FLAG_GRAY)){
3442 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3443 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3444 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3445 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3446 for(i=2; i<18; i++){
3447 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3448 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3450 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3451 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3452 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3453 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3457 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3458 MpegEncContext * const s = &h->s;
3461 int deblock_left = (s->mb_x > 0);
3462 int deblock_top = (s->mb_y > 1);
3464 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3466 src_y -= 2 * linesize + 1;
3467 src_cb -= 2 * uvlinesize + 1;
3468 src_cr -= 2 * uvlinesize + 1;
3470 #define XCHG(a,b,t,xchg)\
3477 for(i = (!deblock_top)<<1; i<34; i++){
3478 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3483 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3484 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3485 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3486 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3487 if(s->mb_x+1 < s->mb_width){
3488 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3489 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3493 if(!(s->flags&CODEC_FLAG_GRAY)){
3495 for(i = (!deblock_top) << 1; i<18; i++){
3496 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3497 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3501 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3502 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3503 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3504 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3509 static void hl_decode_mb(H264Context *h){
3510 MpegEncContext * const s = &h->s;
3511 const int mb_x= s->mb_x;
3512 const int mb_y= s->mb_y;
3513 const int mb_xy= mb_x + mb_y*s->mb_stride;
3514 const int mb_type= s->current_picture.mb_type[mb_xy];
3515 uint8_t *dest_y, *dest_cb, *dest_cr;
3516 int linesize, uvlinesize /*dct_offset*/;
3518 int *block_offset = &h->block_offset[0];
3519 const unsigned int bottom = mb_y & 1;
3520 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3521 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3522 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3527 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3528 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3529 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3531 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
3532 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
3535 linesize = h->mb_linesize = s->linesize * 2;
3536 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3537 block_offset = &h->block_offset[24];
3538 if(mb_y&1){ //FIXME move out of this func?
3539 dest_y -= s->linesize*15;
3540 dest_cb-= s->uvlinesize*7;
3541 dest_cr-= s->uvlinesize*7;
3545 for(list=0; list<2; list++){
3546 if(!USES_LIST(mb_type, list))
3548 if(IS_16X16(mb_type)){
3549 int8_t *ref = &h->ref_cache[list][scan8[0]];
3550 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3552 for(i=0; i<16; i+=4){
3553 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3554 int ref = h->ref_cache[list][scan8[i]];
3556 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3562 linesize = h->mb_linesize = s->linesize;
3563 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3564 // dct_offset = s->linesize * 16;
3567 if(transform_bypass){
3569 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3570 }else if(IS_8x8DCT(mb_type)){
3571 idct_dc_add = s->dsp.h264_idct8_dc_add;
3572 idct_add = s->dsp.h264_idct8_add;
3574 idct_dc_add = s->dsp.h264_idct_dc_add;
3575 idct_add = s->dsp.h264_idct_add;
3578 if(FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3579 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3580 int mbt_y = mb_y&~1;
3581 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3582 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3583 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3584 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3587 if (IS_INTRA_PCM(mb_type)) {
3590 // The pixels are stored in h->mb array in the same order as levels,
3591 // copy them in output in the correct order.
3592 for(i=0; i<16; i++) {
3593 for (y=0; y<4; y++) {
3594 for (x=0; x<4; x++) {
3595 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3599 for(i=16; i<16+4; i++) {
3600 for (y=0; y<4; y++) {
3601 for (x=0; x<4; x++) {
3602 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3606 for(i=20; i<20+4; i++) {
3607 for (y=0; y<4; y++) {
3608 for (x=0; x<4; x++) {
3609 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3614 if(IS_INTRA(mb_type)){
3615 if(h->deblocking_filter && !FRAME_MBAFF)
3616 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3618 if(!(s->flags&CODEC_FLAG_GRAY)){
3619 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3620 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3623 if(IS_INTRA4x4(mb_type)){
3625 if(IS_8x8DCT(mb_type)){
3626 for(i=0; i<16; i+=4){
3627 uint8_t * const ptr= dest_y + block_offset[i];
3628 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3629 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3630 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3631 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3633 if(nnz == 1 && h->mb[i*16])
3634 idct_dc_add(ptr, h->mb + i*16, linesize);
3636 idct_add(ptr, h->mb + i*16, linesize);
3640 for(i=0; i<16; i++){
3641 uint8_t * const ptr= dest_y + block_offset[i];
3643 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3646 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3647 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3648 assert(mb_y || linesize <= block_offset[i]);
3649 if(!topright_avail){
3650 tr= ptr[3 - linesize]*0x01010101;
3651 topright= (uint8_t*) &tr;
3653 topright= ptr + 4 - linesize;
3657 h->pred4x4[ dir ](ptr, topright, linesize);
3658 nnz = h->non_zero_count_cache[ scan8[i] ];
3660 if(s->codec_id == CODEC_ID_H264){
3661 if(nnz == 1 && h->mb[i*16])
3662 idct_dc_add(ptr, h->mb + i*16, linesize);
3664 idct_add(ptr, h->mb + i*16, linesize);
3666 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3671 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3672 if(s->codec_id == CODEC_ID_H264){
3673 if(!transform_bypass)
3674 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3676 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3678 if(h->deblocking_filter && !FRAME_MBAFF)
3679 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3680 }else if(s->codec_id == CODEC_ID_H264){
3681 hl_motion(h, dest_y, dest_cb, dest_cr,
3682 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
3683 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
3684 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3688 if(!IS_INTRA4x4(mb_type)){
3689 if(s->codec_id == CODEC_ID_H264){
3690 if(IS_INTRA16x16(mb_type)){
3691 for(i=0; i<16; i++){
3692 if(h->non_zero_count_cache[ scan8[i] ])
3693 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3694 else if(h->mb[i*16])
3695 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3698 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3699 for(i=0; i<16; i+=di){
3700 int nnz = h->non_zero_count_cache[ scan8[i] ];
3702 if(nnz==1 && h->mb[i*16])
3703 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3705 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3710 for(i=0; i<16; i++){
3711 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3712 uint8_t * const ptr= dest_y + block_offset[i];
3713 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3719 if(!(s->flags&CODEC_FLAG_GRAY)){
3720 uint8_t *dest[2] = {dest_cb, dest_cr};
3721 if(transform_bypass){
3722 idct_add = idct_dc_add = s->dsp.add_pixels4;
3724 idct_add = s->dsp.h264_idct_add;
3725 idct_dc_add = s->dsp.h264_idct_dc_add;
3726 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3727 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3729 if(s->codec_id == CODEC_ID_H264){
3730 for(i=16; i<16+8; i++){
3731 if(h->non_zero_count_cache[ scan8[i] ])
3732 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3733 else if(h->mb[i*16])
3734 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3737 for(i=16; i<16+8; i++){
3738 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3739 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3740 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3746 if(h->deblocking_filter) {
3748 //FIXME try deblocking one mb at a time?
3749 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3750 const int mb_y = s->mb_y - 1;
3751 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3752 const int mb_xy= mb_x + mb_y*s->mb_stride;
3753 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3754 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3755 if (!bottom) return;
3756 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3757 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3758 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3760 if(IS_INTRA(mb_type_top | mb_type_bottom))
3761 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3763 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3767 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3768 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3769 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
3770 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3773 tprintf("call mbaff filter_mb\n");
3774 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3775 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3776 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3778 tprintf("call filter_mb\n");
3779 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3780 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3781 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3787 * fills the default_ref_list.
3789 static int fill_default_ref_list(H264Context *h){
3790 MpegEncContext * const s = &h->s;
3792 int smallest_poc_greater_than_current = -1;
3793 Picture sorted_short_ref[32];
3795 if(h->slice_type==B_TYPE){
3799 /* sort frame according to poc in B slice */
3800 for(out_i=0; out_i<h->short_ref_count; out_i++){
3802 int best_poc=INT_MAX;
3804 for(i=0; i<h->short_ref_count; i++){
3805 const int poc= h->short_ref[i]->poc;
3806 if(poc > limit && poc < best_poc){
3812 assert(best_i != INT_MIN);
3815 sorted_short_ref[out_i]= *h->short_ref[best_i];
3816 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3817 if (-1 == smallest_poc_greater_than_current) {
3818 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3819 smallest_poc_greater_than_current = out_i;
3825 if(s->picture_structure == PICT_FRAME){
3826 if(h->slice_type==B_TYPE){
3828 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3830 // find the largest poc
3831 for(list=0; list<2; list++){
3834 int step= list ? -1 : 1;
3836 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3837 while(j<0 || j>= h->short_ref_count){
3838 if(j != -99 && step == (list ? -1 : 1))
3841 j= smallest_poc_greater_than_current + (step>>1);
3843 if(sorted_short_ref[j].reference != 3) continue;
3844 h->default_ref_list[list][index ]= sorted_short_ref[j];
3845 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3848 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3849 if(h->long_ref[i] == NULL) continue;
3850 if(h->long_ref[i]->reference != 3) continue;
3852 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3853 h->default_ref_list[ list ][index++].pic_id= i;;
3856 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3857 // swap the two first elements of L1 when
3858 // L0 and L1 are identical
3859 Picture temp= h->default_ref_list[1][0];
3860 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3861 h->default_ref_list[1][1] = temp;
3864 if(index < h->ref_count[ list ])
3865 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3869 for(i=0; i<h->short_ref_count; i++){
3870 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3871 h->default_ref_list[0][index ]= *h->short_ref[i];
3872 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3874 for(i = 0; i < 16; i++){
3875 if(h->long_ref[i] == NULL) continue;
3876 if(h->long_ref[i]->reference != 3) continue;
3877 h->default_ref_list[0][index ]= *h->long_ref[i];
3878 h->default_ref_list[0][index++].pic_id= i;;
3880 if(index < h->ref_count[0])
3881 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3884 if(h->slice_type==B_TYPE){
3886 //FIXME second field balh
3890 for (i=0; i<h->ref_count[0]; i++) {
3891 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3893 if(h->slice_type==B_TYPE){
3894 for (i=0; i<h->ref_count[1]; i++) {
3895 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3902 static void print_short_term(H264Context *h);
3903 static void print_long_term(H264Context *h);
3905 static int decode_ref_pic_list_reordering(H264Context *h){
3906 MpegEncContext * const s = &h->s;
3909 print_short_term(h);
3911 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3913 for(list=0; list<2; list++){
3914 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3916 if(get_bits1(&s->gb)){
3917 int pred= h->curr_pic_num;
3919 for(index=0; ; index++){
3920 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3921 unsigned int pic_id;
3923 Picture *ref = NULL;
3925 if(reordering_of_pic_nums_idc==3)
3928 if(index >= h->ref_count[list]){
3929 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3933 if(reordering_of_pic_nums_idc<3){
3934 if(reordering_of_pic_nums_idc<2){
3935 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3937 if(abs_diff_pic_num >= h->max_pic_num){
3938 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3942 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3943 else pred+= abs_diff_pic_num;
3944 pred &= h->max_pic_num - 1;
3946 for(i= h->short_ref_count-1; i>=0; i--){
3947 ref = h->short_ref[i];
3948 assert(ref->reference == 3);
3949 assert(!ref->long_ref);
3950 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3954 ref->pic_id= ref->frame_num;
3956 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3958 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3961 ref = h->long_ref[pic_id];
3963 ref->pic_id= pic_id;
3964 assert(ref->reference == 3);
3965 assert(ref->long_ref);
3973 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3974 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3976 for(i=index; i+1<h->ref_count[list]; i++){
3977 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3980 for(; i > index; i--){
3981 h->ref_list[list][i]= h->ref_list[list][i-1];
3983 h->ref_list[list][index]= *ref;
3986 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3992 if(h->slice_type!=B_TYPE) break;
3994 for(list=0; list<2; list++){
3995 for(index= 0; index < h->ref_count[list]; index++){
3996 if(!h->ref_list[list][index].data[0])
3997 h->ref_list[list][index]= s->current_picture;
3999 if(h->slice_type!=B_TYPE) break;
4002 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
4003 direct_dist_scale_factor(h);
4004 direct_ref_list_init(h);
4008 static void fill_mbaff_ref_list(H264Context *h){
4010 for(list=0; list<2; list++){
4011 for(i=0; i<h->ref_count[list]; i++){
4012 Picture *frame = &h->ref_list[list][i];
4013 Picture *field = &h->ref_list[list][16+2*i];
4016 field[0].linesize[j] <<= 1;
4017 field[1] = field[0];
4019 field[1].data[j] += frame->linesize[j];
4021 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
4022 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
4024 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
4025 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
4029 for(j=0; j<h->ref_count[1]; j++){
4030 for(i=0; i<h->ref_count[0]; i++)
4031 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
4032 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
4033 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
4037 static int pred_weight_table(H264Context *h){
4038 MpegEncContext * const s = &h->s;
4040 int luma_def, chroma_def;
4043 h->use_weight_chroma= 0;
4044 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
4045 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
4046 luma_def = 1<<h->luma_log2_weight_denom;
4047 chroma_def = 1<<h->chroma_log2_weight_denom;
4049 for(list=0; list<2; list++){
4050 for(i=0; i<h->ref_count[list]; i++){
4051 int luma_weight_flag, chroma_weight_flag;
4053 luma_weight_flag= get_bits1(&s->gb);
4054 if(luma_weight_flag){
4055 h->luma_weight[list][i]= get_se_golomb(&s->gb);
4056 h->luma_offset[list][i]= get_se_golomb(&s->gb);
4057 if( h->luma_weight[list][i] != luma_def
4058 || h->luma_offset[list][i] != 0)
4061 h->luma_weight[list][i]= luma_def;
4062 h->luma_offset[list][i]= 0;
4065 chroma_weight_flag= get_bits1(&s->gb);
4066 if(chroma_weight_flag){
4069 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
4070 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
4071 if( h->chroma_weight[list][i][j] != chroma_def
4072 || h->chroma_offset[list][i][j] != 0)
4073 h->use_weight_chroma= 1;
4078 h->chroma_weight[list][i][j]= chroma_def;
4079 h->chroma_offset[list][i][j]= 0;
4083 if(h->slice_type != B_TYPE) break;
4085 h->use_weight= h->use_weight || h->use_weight_chroma;
4089 static void implicit_weight_table(H264Context *h){
4090 MpegEncContext * const s = &h->s;
4092 int cur_poc = s->current_picture_ptr->poc;
4094 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
4095 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
4097 h->use_weight_chroma= 0;
4102 h->use_weight_chroma= 2;
4103 h->luma_log2_weight_denom= 5;
4104 h->chroma_log2_weight_denom= 5;
4106 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
4107 int poc0 = h->ref_list[0][ref0].poc;
4108 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
4109 int poc1 = h->ref_list[1][ref1].poc;
4110 int td = clip(poc1 - poc0, -128, 127);
4112 int tb = clip(cur_poc - poc0, -128, 127);
4113 int tx = (16384 + (FFABS(td) >> 1)) / td;
4114 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
4115 if(dist_scale_factor < -64 || dist_scale_factor > 128)
4116 h->implicit_weight[ref0][ref1] = 32;
4118 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
4120 h->implicit_weight[ref0][ref1] = 32;
4125 static inline void unreference_pic(H264Context *h, Picture *pic){
4128 if(pic == h->delayed_output_pic)
4131 for(i = 0; h->delayed_pic[i]; i++)
4132 if(pic == h->delayed_pic[i]){
4140 * instantaneous decoder refresh.
4142 static void idr(H264Context *h){
4145 for(i=0; i<16; i++){
4146 if (h->long_ref[i] != NULL) {
4147 unreference_pic(h, h->long_ref[i]);
4148 h->long_ref[i]= NULL;
4151 h->long_ref_count=0;
4153 for(i=0; i<h->short_ref_count; i++){
4154 unreference_pic(h, h->short_ref[i]);
4155 h->short_ref[i]= NULL;
4157 h->short_ref_count=0;
4160 /* forget old pics after a seek */
4161 static void flush_dpb(AVCodecContext *avctx){
4162 H264Context *h= avctx->priv_data;
4164 for(i=0; i<16; i++) {
4165 if(h->delayed_pic[i])
4166 h->delayed_pic[i]->reference= 0;
4167 h->delayed_pic[i]= NULL;
4169 if(h->delayed_output_pic)
4170 h->delayed_output_pic->reference= 0;
4171 h->delayed_output_pic= NULL;
4173 if(h->s.current_picture_ptr)
4174 h->s.current_picture_ptr->reference= 0;
4179 * @return the removed picture or NULL if an error occurs
4181 static Picture * remove_short(H264Context *h, int frame_num){
4182 MpegEncContext * const s = &h->s;
4185 if(s->avctx->debug&FF_DEBUG_MMCO)
4186 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
4188 for(i=0; i<h->short_ref_count; i++){
4189 Picture *pic= h->short_ref[i];
4190 if(s->avctx->debug&FF_DEBUG_MMCO)
4191 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
4192 if(pic->frame_num == frame_num){
4193 h->short_ref[i]= NULL;
4194 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
4195 h->short_ref_count--;
4204 * @return the removed picture or NULL if an error occurs
4206 static Picture * remove_long(H264Context *h, int i){
4209 pic= h->long_ref[i];
4210 h->long_ref[i]= NULL;
4211 if(pic) h->long_ref_count--;
4217 * print short term list
4219 static void print_short_term(H264Context *h) {
4221 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4222 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
4223 for(i=0; i<h->short_ref_count; i++){
4224 Picture *pic= h->short_ref[i];
4225 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4231 * print long term list
4233 static void print_long_term(H264Context *h) {
4235 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4236 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
4237 for(i = 0; i < 16; i++){
4238 Picture *pic= h->long_ref[i];
4240 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4247 * Executes the reference picture marking (memory management control operations).
4249 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
4250 MpegEncContext * const s = &h->s;
4252 int current_is_long=0;
4255 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
4256 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4258 for(i=0; i<mmco_count; i++){
4259 if(s->avctx->debug&FF_DEBUG_MMCO)
4260 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4262 switch(mmco[i].opcode){
4263 case MMCO_SHORT2UNUSED:
4264 pic= remove_short(h, mmco[i].short_frame_num);
4266 unreference_pic(h, pic);
4267 else if(s->avctx->debug&FF_DEBUG_MMCO)
4268 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4270 case MMCO_SHORT2LONG:
4271 pic= remove_long(h, mmco[i].long_index);
4272 if(pic) unreference_pic(h, pic);
4274 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4275 if (h->long_ref[ mmco[i].long_index ]){
4276 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4277 h->long_ref_count++;
4280 case MMCO_LONG2UNUSED:
4281 pic= remove_long(h, mmco[i].long_index);
4283 unreference_pic(h, pic);
4284 else if(s->avctx->debug&FF_DEBUG_MMCO)
4285 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4288 pic= remove_long(h, mmco[i].long_index);
4289 if(pic) unreference_pic(h, pic);
4291 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4292 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4293 h->long_ref_count++;
4297 case MMCO_SET_MAX_LONG:
4298 assert(mmco[i].long_index <= 16);
4299 // just remove the long term which index is greater than new max
4300 for(j = mmco[i].long_index; j<16; j++){
4301 pic = remove_long(h, j);
4302 if (pic) unreference_pic(h, pic);
4306 while(h->short_ref_count){
4307 pic= remove_short(h, h->short_ref[0]->frame_num);
4308 if(pic) unreference_pic(h, pic);
4310 for(j = 0; j < 16; j++) {
4311 pic= remove_long(h, j);
4312 if(pic) unreference_pic(h, pic);
4319 if(!current_is_long){
4320 pic= remove_short(h, s->current_picture_ptr->frame_num);
4322 unreference_pic(h, pic);
4323 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4326 if(h->short_ref_count)
4327 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4329 h->short_ref[0]= s->current_picture_ptr;
4330 h->short_ref[0]->long_ref=0;
4331 h->short_ref_count++;
4334 print_short_term(h);
4339 static int decode_ref_pic_marking(H264Context *h){
4340 MpegEncContext * const s = &h->s;
4343 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4344 s->broken_link= get_bits1(&s->gb) -1;
4345 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4346 if(h->mmco[0].long_index == -1)
4349 h->mmco[0].opcode= MMCO_LONG;
4353 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4354 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4355 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4357 h->mmco[i].opcode= opcode;
4358 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4359 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4360 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4361 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4365 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4366 unsigned int long_index= get_ue_golomb(&s->gb);
4367 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ long_index >= 16){
4368 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4371 h->mmco[i].long_index= long_index;
4374 if(opcode > (unsigned)MMCO_LONG){
4375 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4378 if(opcode == MMCO_END)
4383 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4385 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4386 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4387 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4397 static int init_poc(H264Context *h){
4398 MpegEncContext * const s = &h->s;
4399 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4402 if(h->nal_unit_type == NAL_IDR_SLICE){
4403 h->frame_num_offset= 0;
4405 if(h->frame_num < h->prev_frame_num)
4406 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4408 h->frame_num_offset= h->prev_frame_num_offset;
4411 if(h->sps.poc_type==0){
4412 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4414 if(h->nal_unit_type == NAL_IDR_SLICE){
4419 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4420 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4421 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4422 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4424 h->poc_msb = h->prev_poc_msb;
4425 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4427 field_poc[1] = h->poc_msb + h->poc_lsb;
4428 if(s->picture_structure == PICT_FRAME)
4429 field_poc[1] += h->delta_poc_bottom;
4430 }else if(h->sps.poc_type==1){
4431 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4434 if(h->sps.poc_cycle_length != 0)
4435 abs_frame_num = h->frame_num_offset + h->frame_num;
4439 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4442 expected_delta_per_poc_cycle = 0;
4443 for(i=0; i < h->sps.poc_cycle_length; i++)
4444 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4446 if(abs_frame_num > 0){
4447 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4448 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4450 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4451 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4452 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4456 if(h->nal_ref_idc == 0)
4457 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4459 field_poc[0] = expectedpoc + h->delta_poc[0];
4460 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4462 if(s->picture_structure == PICT_FRAME)
4463 field_poc[1] += h->delta_poc[1];
4466 if(h->nal_unit_type == NAL_IDR_SLICE){
4469 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4470 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4476 if(s->picture_structure != PICT_BOTTOM_FIELD)
4477 s->current_picture_ptr->field_poc[0]= field_poc[0];
4478 if(s->picture_structure != PICT_TOP_FIELD)
4479 s->current_picture_ptr->field_poc[1]= field_poc[1];
4480 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4481 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4487 * decodes a slice header.
4488 * this will allso call MPV_common_init() and frame_start() as needed
4490 static int decode_slice_header(H264Context *h){
4491 MpegEncContext * const s = &h->s;
4492 unsigned int first_mb_in_slice;
4493 unsigned int pps_id;
4494 int num_ref_idx_active_override_flag;
4495 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4496 unsigned int slice_type, tmp;
4497 int default_ref_list_done = 0;
4499 s->current_picture.reference= h->nal_ref_idc != 0;
4500 s->dropable= h->nal_ref_idc == 0;
4502 first_mb_in_slice= get_ue_golomb(&s->gb);
4504 slice_type= get_ue_golomb(&s->gb);
4506 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4511 h->slice_type_fixed=1;
4513 h->slice_type_fixed=0;
4515 slice_type= slice_type_map[ slice_type ];
4516 if (slice_type == I_TYPE
4517 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4518 default_ref_list_done = 1;
4520 h->slice_type= slice_type;
4522 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4524 pps_id= get_ue_golomb(&s->gb);
4525 if(pps_id>=MAX_PPS_COUNT){
4526 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4529 h->pps= h->pps_buffer[pps_id];
4530 if(h->pps.slice_group_count == 0){
4531 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4535 h->sps= h->sps_buffer[ h->pps.sps_id ];
4536 if(h->sps.log2_max_frame_num == 0){
4537 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4541 if(h->dequant_coeff_pps != pps_id){
4542 h->dequant_coeff_pps = pps_id;
4543 init_dequant_tables(h);
4546 s->mb_width= h->sps.mb_width;
4547 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4549 h->b_stride= s->mb_width*4;
4550 h->b8_stride= s->mb_width*2;
4552 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4553 if(h->sps.frame_mbs_only_flag)
4554 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4556 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4558 if (s->context_initialized
4559 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4563 if (!s->context_initialized) {
4564 if (MPV_common_init(s) < 0)
4567 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4568 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4569 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4572 for(i=0; i<16; i++){
4573 #define T(x) (x>>2) | ((x<<2) & 0xF)
4574 h->zigzag_scan[i] = T(zigzag_scan[i]);
4575 h-> field_scan[i] = T( field_scan[i]);
4579 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4580 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4581 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4582 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4583 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4586 for(i=0; i<64; i++){
4587 #define T(x) (x>>3) | ((x&7)<<3)
4588 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4589 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4590 h->field_scan8x8[i] = T(field_scan8x8[i]);
4591 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4595 if(h->sps.transform_bypass){ //FIXME same ugly
4596 h->zigzag_scan_q0 = zigzag_scan;
4597 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4598 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4599 h->field_scan_q0 = field_scan;
4600 h->field_scan8x8_q0 = field_scan8x8;
4601 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4603 h->zigzag_scan_q0 = h->zigzag_scan;
4604 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4605 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4606 h->field_scan_q0 = h->field_scan;
4607 h->field_scan8x8_q0 = h->field_scan8x8;
4608 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4613 s->avctx->width = s->width;
4614 s->avctx->height = s->height;
4615 s->avctx->sample_aspect_ratio= h->sps.sar;
4616 if(!s->avctx->sample_aspect_ratio.den)
4617 s->avctx->sample_aspect_ratio.den = 1;
4619 if(h->sps.timing_info_present_flag){
4620 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4621 if(h->x264_build > 0 && h->x264_build < 44)
4622 s->avctx->time_base.den *= 2;
4623 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4624 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4628 if(h->slice_num == 0){
4629 if(frame_start(h) < 0)
4633 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4634 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4637 h->mb_aff_frame = 0;
4638 if(h->sps.frame_mbs_only_flag){
4639 s->picture_structure= PICT_FRAME;
4641 if(get_bits1(&s->gb)) { //field_pic_flag
4642 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4643 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4645 s->picture_structure= PICT_FRAME;
4646 h->mb_aff_frame = h->sps.mb_aff;
4649 assert(s->mb_num == s->mb_width * s->mb_height);
4650 if(first_mb_in_slice << h->mb_aff_frame >= s->mb_num ||
4651 first_mb_in_slice >= s->mb_num){
4652 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4655 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4656 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4657 assert(s->mb_y < s->mb_height);
4659 if(s->picture_structure==PICT_FRAME){
4660 h->curr_pic_num= h->frame_num;
4661 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4663 h->curr_pic_num= 2*h->frame_num;
4664 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4667 if(h->nal_unit_type == NAL_IDR_SLICE){
4668 get_ue_golomb(&s->gb); /* idr_pic_id */
4671 if(h->sps.poc_type==0){
4672 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4674 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4675 h->delta_poc_bottom= get_se_golomb(&s->gb);
4679 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4680 h->delta_poc[0]= get_se_golomb(&s->gb);
4682 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4683 h->delta_poc[1]= get_se_golomb(&s->gb);
4688 if(h->pps.redundant_pic_cnt_present){
4689 h->redundant_pic_count= get_ue_golomb(&s->gb);
4692 //set defaults, might be overriden a few line later
4693 h->ref_count[0]= h->pps.ref_count[0];
4694 h->ref_count[1]= h->pps.ref_count[1];
4696 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4697 if(h->slice_type == B_TYPE){
4698 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4699 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4700 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4702 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4704 if(num_ref_idx_active_override_flag){
4705 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4706 if(h->slice_type==B_TYPE)
4707 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4709 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4710 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4711 h->ref_count[0]= h->ref_count[1]= 1;
4717 if(!default_ref_list_done){
4718 fill_default_ref_list(h);
4721 if(decode_ref_pic_list_reordering(h) < 0)
4724 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4725 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4726 pred_weight_table(h);
4727 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4728 implicit_weight_table(h);
4732 if(s->current_picture.reference)
4733 decode_ref_pic_marking(h);
4736 fill_mbaff_ref_list(h);
4738 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
4739 tmp = get_ue_golomb(&s->gb);
4741 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4744 h->cabac_init_idc= tmp;
4747 h->last_qscale_diff = 0;
4748 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4750 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4754 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4755 //FIXME qscale / qp ... stuff
4756 if(h->slice_type == SP_TYPE){
4757 get_bits1(&s->gb); /* sp_for_switch_flag */
4759 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4760 get_se_golomb(&s->gb); /* slice_qs_delta */
4763 h->deblocking_filter = 1;
4764 h->slice_alpha_c0_offset = 0;
4765 h->slice_beta_offset = 0;
4766 if( h->pps.deblocking_filter_parameters_present ) {
4767 tmp= get_ue_golomb(&s->gb);
4769 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4772 h->deblocking_filter= tmp;
4773 if(h->deblocking_filter < 2)
4774 h->deblocking_filter^= 1; // 1<->0
4776 if( h->deblocking_filter ) {
4777 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4778 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4781 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4782 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4783 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4784 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4785 h->deblocking_filter= 0;
4788 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4789 slice_group_change_cycle= get_bits(&s->gb, ?);
4794 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4795 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4797 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4798 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4800 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4802 av_get_pict_type_char(h->slice_type),
4803 pps_id, h->frame_num,
4804 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4805 h->ref_count[0], h->ref_count[1],
4807 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4809 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4813 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
4814 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4815 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4817 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4818 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4827 static inline int get_level_prefix(GetBitContext *gb){
4831 OPEN_READER(re, gb);
4832 UPDATE_CACHE(re, gb);
4833 buf=GET_CACHE(re, gb);
4835 log= 32 - av_log2(buf);
4837 print_bin(buf>>(32-log), log);
4838 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4841 LAST_SKIP_BITS(re, gb, log);
4842 CLOSE_READER(re, gb);
4847 static inline int get_dct8x8_allowed(H264Context *h){
4850 if(!IS_SUB_8X8(h->sub_mb_type[i])
4851 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4858 * decodes a residual block.
4859 * @param n block index
4860 * @param scantable scantable
4861 * @param max_coeff number of coefficients in the block
4862 * @return <0 if an error occured
4864 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4865 MpegEncContext * const s = &h->s;
4866 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4868 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4870 //FIXME put trailing_onex into the context
4872 if(n == CHROMA_DC_BLOCK_INDEX){
4873 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4874 total_coeff= coeff_token>>2;
4876 if(n == LUMA_DC_BLOCK_INDEX){
4877 total_coeff= pred_non_zero_count(h, 0);
4878 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4879 total_coeff= coeff_token>>2;
4881 total_coeff= pred_non_zero_count(h, n);
4882 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4883 total_coeff= coeff_token>>2;
4884 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4888 //FIXME set last_non_zero?
4892 if(total_coeff > (unsigned)max_coeff) {
4893 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4897 trailing_ones= coeff_token&3;
4898 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4899 assert(total_coeff<=16);
4901 for(i=0; i<trailing_ones; i++){
4902 level[i]= 1 - 2*get_bits1(gb);
4906 int level_code, mask;
4907 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4908 int prefix= get_level_prefix(gb);
4910 //first coefficient has suffix_length equal to 0 or 1
4911 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4913 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4915 level_code= (prefix<<suffix_length); //part
4916 }else if(prefix==14){
4918 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4920 level_code= prefix + get_bits(gb, 4); //part
4921 }else if(prefix==15){
4922 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4923 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4925 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4929 if(trailing_ones < 3) level_code += 2;
4934 mask= -(level_code&1);
4935 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4938 //remaining coefficients have suffix_length > 0
4939 for(;i<total_coeff;i++) {
4940 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4941 prefix = get_level_prefix(gb);
4943 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4944 }else if(prefix==15){
4945 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4947 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4950 mask= -(level_code&1);
4951 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4952 if(level_code > suffix_limit[suffix_length])
4957 if(total_coeff == max_coeff)
4960 if(n == CHROMA_DC_BLOCK_INDEX)
4961 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4963 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4966 coeff_num = zeros_left + total_coeff - 1;
4967 j = scantable[coeff_num];
4969 block[j] = level[0];
4970 for(i=1;i<total_coeff;i++) {
4973 else if(zeros_left < 7){
4974 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4976 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4978 zeros_left -= run_before;
4979 coeff_num -= 1 + run_before;
4980 j= scantable[ coeff_num ];
4985 block[j] = (level[0] * qmul[j] + 32)>>6;
4986 for(i=1;i<total_coeff;i++) {
4989 else if(zeros_left < 7){
4990 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4992 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4994 zeros_left -= run_before;
4995 coeff_num -= 1 + run_before;
4996 j= scantable[ coeff_num ];
4998 block[j]= (level[i] * qmul[j] + 32)>>6;
5003 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
5010 static void predict_field_decoding_flag(H264Context *h){
5011 MpegEncContext * const s = &h->s;
5012 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5013 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
5014 ? s->current_picture.mb_type[mb_xy-1]
5015 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
5016 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
5018 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
5022 * decodes a P_SKIP or B_SKIP macroblock
5024 static void decode_mb_skip(H264Context *h){
5025 MpegEncContext * const s = &h->s;
5026 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5029 memset(h->non_zero_count[mb_xy], 0, 16);
5030 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
5033 mb_type|= MB_TYPE_INTERLACED;
5035 if( h->slice_type == B_TYPE )
5037 // just for fill_caches. pred_direct_motion will set the real mb_type
5038 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
5040 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5041 pred_direct_motion(h, &mb_type);
5042 mb_type|= MB_TYPE_SKIP;
5047 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
5049 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5050 pred_pskip_motion(h, &mx, &my);
5051 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
5052 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
5055 write_back_motion(h, mb_type);
5056 s->current_picture.mb_type[mb_xy]= mb_type;
5057 s->current_picture.qscale_table[mb_xy]= s->qscale;
5058 h->slice_table[ mb_xy ]= h->slice_num;
5059 h->prev_mb_skipped= 1;
5063 * decodes a macroblock
5064 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5066 static int decode_mb_cavlc(H264Context *h){
5067 MpegEncContext * const s = &h->s;
5068 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5069 int partition_count;
5070 unsigned int mb_type, cbp;
5071 int dct8x8_allowed= h->pps.transform_8x8_mode;
5073 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
5075 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5076 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
5078 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
5079 if(s->mb_skip_run==-1)
5080 s->mb_skip_run= get_ue_golomb(&s->gb);
5082 if (s->mb_skip_run--) {
5083 if(FRAME_MBAFF && (s->mb_y&1) == 0){
5084 if(s->mb_skip_run==0)
5085 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5087 predict_field_decoding_flag(h);
5094 if( (s->mb_y&1) == 0 )
5095 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5097 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5099 h->prev_mb_skipped= 0;
5101 mb_type= get_ue_golomb(&s->gb);
5102 if(h->slice_type == B_TYPE){
5104 partition_count= b_mb_type_info[mb_type].partition_count;
5105 mb_type= b_mb_type_info[mb_type].type;
5108 goto decode_intra_mb;
5110 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
5112 partition_count= p_mb_type_info[mb_type].partition_count;
5113 mb_type= p_mb_type_info[mb_type].type;
5116 goto decode_intra_mb;
5119 assert(h->slice_type == I_TYPE);
5122 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
5126 cbp= i_mb_type_info[mb_type].cbp;
5127 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5128 mb_type= i_mb_type_info[mb_type].type;
5132 mb_type |= MB_TYPE_INTERLACED;
5134 h->slice_table[ mb_xy ]= h->slice_num;
5136 if(IS_INTRA_PCM(mb_type)){
5139 // we assume these blocks are very rare so we dont optimize it
5140 align_get_bits(&s->gb);
5142 // The pixels are stored in the same order as levels in h->mb array.
5143 for(y=0; y<16; y++){
5144 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5145 for(x=0; x<16; x++){
5146 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5147 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
5151 const int index= 256 + 4*(y&3) + 32*(y>>2);
5153 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5154 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5158 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5160 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5161 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5165 // In deblocking, the quantizer is 0
5166 s->current_picture.qscale_table[mb_xy]= 0;
5167 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5168 // All coeffs are present
5169 memset(h->non_zero_count[mb_xy], 16, 16);
5171 s->current_picture.mb_type[mb_xy]= mb_type;
5176 h->ref_count[0] <<= 1;
5177 h->ref_count[1] <<= 1;
5180 fill_caches(h, mb_type, 0);
5183 if(IS_INTRA(mb_type)){
5185 // init_top_left_availability(h);
5186 if(IS_INTRA4x4(mb_type)){
5189 if(dct8x8_allowed && get_bits1(&s->gb)){
5190 mb_type |= MB_TYPE_8x8DCT;
5194 // fill_intra4x4_pred_table(h);
5195 for(i=0; i<16; i+=di){
5196 int mode= pred_intra_mode(h, i);
5198 if(!get_bits1(&s->gb)){
5199 const int rem_mode= get_bits(&s->gb, 3);
5200 mode = rem_mode + (rem_mode >= mode);
5204 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5206 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
5208 write_back_intra_pred_mode(h);
5209 if( check_intra4x4_pred_mode(h) < 0)
5212 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
5213 if(h->intra16x16_pred_mode < 0)
5217 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
5220 h->chroma_pred_mode= pred_mode;
5221 }else if(partition_count==4){
5222 int i, j, sub_partition_count[4], list, ref[2][4];
5224 if(h->slice_type == B_TYPE){
5226 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5227 if(h->sub_mb_type[i] >=13){
5228 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5231 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5232 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5234 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5235 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5236 pred_direct_motion(h, &mb_type);
5237 h->ref_cache[0][scan8[4]] =
5238 h->ref_cache[1][scan8[4]] =
5239 h->ref_cache[0][scan8[12]] =
5240 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5243 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
5245 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5246 if(h->sub_mb_type[i] >=4){
5247 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5250 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5251 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5255 for(list=0; list<2; list++){
5256 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5257 if(ref_count == 0) continue;
5259 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5260 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5261 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
5263 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
5275 dct8x8_allowed = get_dct8x8_allowed(h);
5277 for(list=0; list<2; list++){
5278 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5279 if(ref_count == 0) continue;
5282 if(IS_DIRECT(h->sub_mb_type[i])) {
5283 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
5286 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
5287 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5289 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5290 const int sub_mb_type= h->sub_mb_type[i];
5291 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5292 for(j=0; j<sub_partition_count[i]; j++){
5294 const int index= 4*i + block_width*j;
5295 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5296 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
5297 mx += get_se_golomb(&s->gb);
5298 my += get_se_golomb(&s->gb);
5299 tprintf("final mv:%d %d\n", mx, my);
5301 if(IS_SUB_8X8(sub_mb_type)){
5303 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5305 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5306 }else if(IS_SUB_8X4(sub_mb_type)){
5307 mv_cache[ 1 ][0]= mx;
5308 mv_cache[ 1 ][1]= my;
5309 }else if(IS_SUB_4X8(sub_mb_type)){
5310 mv_cache[ 8 ][0]= mx;
5311 mv_cache[ 8 ][1]= my;
5313 mv_cache[ 0 ][0]= mx;
5314 mv_cache[ 0 ][1]= my;
5317 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5323 }else if(IS_DIRECT(mb_type)){
5324 pred_direct_motion(h, &mb_type);
5325 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5327 int list, mx, my, i;
5328 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5329 if(IS_16X16(mb_type)){
5330 for(list=0; list<2; list++){
5331 if(h->ref_count[list]>0){
5332 if(IS_DIR(mb_type, 0, list)){
5333 unsigned int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5334 if(val >= h->ref_count[list]){
5335 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5338 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5340 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
5343 for(list=0; list<2; list++){
5344 if(IS_DIR(mb_type, 0, list)){
5345 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5346 mx += get_se_golomb(&s->gb);
5347 my += get_se_golomb(&s->gb);
5348 tprintf("final mv:%d %d\n", mx, my);
5350 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5352 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5355 else if(IS_16X8(mb_type)){
5356 for(list=0; list<2; list++){
5357 if(h->ref_count[list]>0){
5359 if(IS_DIR(mb_type, i, list)){
5360 unsigned int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5361 if(val >= h->ref_count[list]){
5362 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5365 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5367 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5371 for(list=0; list<2; list++){
5373 if(IS_DIR(mb_type, i, list)){
5374 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5375 mx += get_se_golomb(&s->gb);
5376 my += get_se_golomb(&s->gb);
5377 tprintf("final mv:%d %d\n", mx, my);
5379 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5381 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5385 assert(IS_8X16(mb_type));
5386 for(list=0; list<2; list++){
5387 if(h->ref_count[list]>0){
5389 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5390 unsigned int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5391 if(val >= h->ref_count[list]){
5392 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
5395 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5397 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5401 for(list=0; list<2; list++){
5403 if(IS_DIR(mb_type, i, list)){
5404 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5405 mx += get_se_golomb(&s->gb);
5406 my += get_se_golomb(&s->gb);
5407 tprintf("final mv:%d %d\n", mx, my);
5409 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5411 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5417 if(IS_INTER(mb_type))
5418 write_back_motion(h, mb_type);
5420 if(!IS_INTRA16x16(mb_type)){
5421 cbp= get_ue_golomb(&s->gb);
5423 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
5427 if(IS_INTRA4x4(mb_type))
5428 cbp= golomb_to_intra4x4_cbp[cbp];
5430 cbp= golomb_to_inter_cbp[cbp];
5434 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5435 if(get_bits1(&s->gb))
5436 mb_type |= MB_TYPE_8x8DCT;
5438 s->current_picture.mb_type[mb_xy]= mb_type;
5440 if(cbp || IS_INTRA16x16(mb_type)){
5441 int i8x8, i4x4, chroma_idx;
5442 int chroma_qp, dquant;
5443 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5444 const uint8_t *scan, *scan8x8, *dc_scan;
5446 // fill_non_zero_count_cache(h);
5448 if(IS_INTERLACED(mb_type)){
5449 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5450 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5451 dc_scan= luma_dc_field_scan;
5453 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5454 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5455 dc_scan= luma_dc_zigzag_scan;
5458 dquant= get_se_golomb(&s->gb);
5460 if( dquant > 25 || dquant < -26 ){
5461 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5465 s->qscale += dquant;
5466 if(((unsigned)s->qscale) > 51){
5467 if(s->qscale<0) s->qscale+= 52;
5468 else s->qscale-= 52;
5471 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5472 if(IS_INTRA16x16(mb_type)){
5473 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5474 return -1; //FIXME continue if partitioned and other return -1 too
5477 assert((cbp&15) == 0 || (cbp&15) == 15);
5480 for(i8x8=0; i8x8<4; i8x8++){
5481 for(i4x4=0; i4x4<4; i4x4++){
5482 const int index= i4x4 + 4*i8x8;
5483 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5489 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5492 for(i8x8=0; i8x8<4; i8x8++){
5493 if(cbp & (1<<i8x8)){
5494 if(IS_8x8DCT(mb_type)){
5495 DCTELEM *buf = &h->mb[64*i8x8];
5497 for(i4x4=0; i4x4<4; i4x4++){
5498 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5499 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5502 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5503 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5505 for(i4x4=0; i4x4<4; i4x4++){
5506 const int index= i4x4 + 4*i8x8;
5508 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5514 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5515 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5521 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5522 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5528 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5529 for(i4x4=0; i4x4<4; i4x4++){
5530 const int index= 16 + 4*chroma_idx + i4x4;
5531 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5537 uint8_t * const nnz= &h->non_zero_count_cache[0];
5538 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5539 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5542 uint8_t * const nnz= &h->non_zero_count_cache[0];
5543 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5544 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5545 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5547 s->current_picture.qscale_table[mb_xy]= s->qscale;
5548 write_back_non_zero_count(h);
5551 h->ref_count[0] >>= 1;
5552 h->ref_count[1] >>= 1;
5558 static int decode_cabac_field_decoding_flag(H264Context *h) {
5559 MpegEncContext * const s = &h->s;
5560 const int mb_x = s->mb_x;
5561 const int mb_y = s->mb_y & ~1;
5562 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5563 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5565 unsigned int ctx = 0;
5567 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5570 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5574 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5577 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5578 uint8_t *state= &h->cabac_state[ctx_base];
5582 MpegEncContext * const s = &h->s;
5583 const int mba_xy = h->left_mb_xy[0];
5584 const int mbb_xy = h->top_mb_xy;
5586 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5588 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5590 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5591 return 0; /* I4x4 */
5594 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5595 return 0; /* I4x4 */
5598 if( get_cabac_terminate( &h->cabac ) )
5599 return 25; /* PCM */
5601 mb_type = 1; /* I16x16 */
5602 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5603 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5604 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5605 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5606 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5610 static int decode_cabac_mb_type( H264Context *h ) {
5611 MpegEncContext * const s = &h->s;
5613 if( h->slice_type == I_TYPE ) {
5614 return decode_cabac_intra_mb_type(h, 3, 1);
5615 } else if( h->slice_type == P_TYPE ) {
5616 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5618 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5619 /* P_L0_D16x16, P_8x8 */
5620 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5622 /* P_L0_D8x16, P_L0_D16x8 */
5623 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5626 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5628 } else if( h->slice_type == B_TYPE ) {
5629 const int mba_xy = h->left_mb_xy[0];
5630 const int mbb_xy = h->top_mb_xy;
5634 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5636 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5639 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5640 return 0; /* B_Direct_16x16 */
5642 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5643 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5646 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5647 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5648 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5649 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5651 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5652 else if( bits == 13 ) {
5653 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5654 } else if( bits == 14 )
5655 return 11; /* B_L1_L0_8x16 */
5656 else if( bits == 15 )
5657 return 22; /* B_8x8 */
5659 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5660 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5662 /* TODO SI/SP frames? */
5667 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5668 MpegEncContext * const s = &h->s;
5672 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5673 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5676 && h->slice_table[mba_xy] == h->slice_num
5677 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5678 mba_xy += s->mb_stride;
5680 mbb_xy = mb_xy - s->mb_stride;
5682 && h->slice_table[mbb_xy] == h->slice_num
5683 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5684 mbb_xy -= s->mb_stride;
5686 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5688 int mb_xy = mb_x + mb_y*s->mb_stride;
5690 mbb_xy = mb_xy - s->mb_stride;
5693 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5695 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5698 if( h->slice_type == B_TYPE )
5700 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5703 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5706 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5709 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5710 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5711 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5713 if( mode >= pred_mode )
5719 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5720 const int mba_xy = h->left_mb_xy[0];
5721 const int mbb_xy = h->top_mb_xy;
5725 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5726 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5729 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5732 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5735 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5737 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5743 static const uint8_t block_idx_x[16] = {
5744 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5746 static const uint8_t block_idx_y[16] = {
5747 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5749 static const uint8_t block_idx_xy[4][4] = {
5756 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5761 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5763 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5766 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5771 x = block_idx_x[4*i8x8];
5772 y = block_idx_y[4*i8x8];
5776 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5777 cbp_a = h->left_cbp;
5778 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5784 /* No need to test for skip as we put 0 for skip block */
5785 /* No need to test for IPCM as we put 1 for IPCM block */
5787 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5788 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5793 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5794 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5798 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5804 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5808 cbp_a = (h->left_cbp>>4)&0x03;
5809 cbp_b = (h-> top_cbp>>4)&0x03;
5812 if( cbp_a > 0 ) ctx++;
5813 if( cbp_b > 0 ) ctx += 2;
5814 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5818 if( cbp_a == 2 ) ctx++;
5819 if( cbp_b == 2 ) ctx += 2;
5820 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5822 static int decode_cabac_mb_dqp( H264Context *h) {
5823 MpegEncContext * const s = &h->s;
5829 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5831 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5833 if( h->last_qscale_diff != 0 )
5836 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5842 if(val > 102) //prevent infinite loop
5849 return -(val + 1)/2;
5851 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5852 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5854 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5856 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5860 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5862 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5863 return 0; /* B_Direct_8x8 */
5864 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5865 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5867 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5868 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5869 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5872 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5873 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5877 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5878 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5881 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5882 int refa = h->ref_cache[list][scan8[n] - 1];
5883 int refb = h->ref_cache[list][scan8[n] - 8];
5887 if( h->slice_type == B_TYPE) {
5888 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5890 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5899 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5905 if(ref >= 32 /*h->ref_list[list]*/){
5906 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5907 return 0; //FIXME we should return -1 and check the return everywhere
5913 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5914 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5915 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5916 int ctxbase = (l == 0) ? 40 : 47;
5921 else if( amvd > 32 )
5926 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5931 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5939 while( get_cabac_bypass( &h->cabac ) ) {
5943 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5948 if( get_cabac_bypass( &h->cabac ) )
5952 return get_cabac_bypass_sign( &h->cabac, -mvd );
5955 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5960 nza = h->left_cbp&0x100;
5961 nzb = h-> top_cbp&0x100;
5962 } else if( cat == 1 || cat == 2 ) {
5963 nza = h->non_zero_count_cache[scan8[idx] - 1];
5964 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5965 } else if( cat == 3 ) {
5966 nza = (h->left_cbp>>(6+idx))&0x01;
5967 nzb = (h-> top_cbp>>(6+idx))&0x01;
5970 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5971 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5980 return ctx + 4 * cat;
5983 static const __attribute((used)) uint8_t last_coeff_flag_offset_8x8[63] = {
5984 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5985 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5986 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5987 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5990 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5991 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5992 static const int significant_coeff_flag_offset[2][6] = {
5993 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5994 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5996 static const int last_coeff_flag_offset[2][6] = {
5997 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5998 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
6000 static const int coeff_abs_level_m1_offset[6] = {
6001 227+0, 227+10, 227+20, 227+30, 227+39, 426
6003 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
6004 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
6005 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
6006 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
6007 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
6008 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
6009 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
6010 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
6011 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
6017 int coeff_count = 0;
6020 int abslevelgt1 = 0;
6022 uint8_t *significant_coeff_ctx_base;
6023 uint8_t *last_coeff_ctx_base;
6024 uint8_t *abs_level_m1_ctx_base;
6027 #define CABAC_ON_STACK
6029 #ifdef CABAC_ON_STACK
6032 cc.range = h->cabac.range;
6033 cc.low = h->cabac.low;
6034 cc.bytestream= h->cabac.bytestream;
6036 #define CC &h->cabac
6040 /* cat: 0-> DC 16x16 n = 0
6041 * 1-> AC 16x16 n = luma4x4idx
6042 * 2-> Luma4x4 n = luma4x4idx
6043 * 3-> DC Chroma n = iCbCr
6044 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
6045 * 5-> Luma8x8 n = 4 * luma8x8idx
6048 /* read coded block flag */
6050 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
6051 if( cat == 1 || cat == 2 )
6052 h->non_zero_count_cache[scan8[n]] = 0;
6054 h->non_zero_count_cache[scan8[16+n]] = 0;
6055 #ifdef CABAC_ON_STACK
6056 h->cabac.range = cc.range ;
6057 h->cabac.low = cc.low ;
6058 h->cabac.bytestream= cc.bytestream;
6064 significant_coeff_ctx_base = h->cabac_state
6065 + significant_coeff_flag_offset[MB_FIELD][cat];
6066 last_coeff_ctx_base = h->cabac_state
6067 + last_coeff_flag_offset[MB_FIELD][cat];
6068 abs_level_m1_ctx_base = h->cabac_state
6069 + coeff_abs_level_m1_offset[cat];
6072 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
6073 for(last= 0; last < coefs; last++) { \
6074 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
6075 if( get_cabac( CC, sig_ctx )) { \
6076 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
6077 index[coeff_count++] = last; \
6078 if( get_cabac( CC, last_ctx ) ) { \
6084 if( last == max_coeff -1 ) {\
6085 index[coeff_count++] = last;\
6087 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
6088 #if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
6089 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
6091 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
6093 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
6095 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
6098 assert(coeff_count > 0);
6101 h->cbp_table[mb_xy] |= 0x100;
6102 else if( cat == 1 || cat == 2 )
6103 h->non_zero_count_cache[scan8[n]] = coeff_count;
6105 h->cbp_table[mb_xy] |= 0x40 << n;
6107 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
6110 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
6113 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
6114 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
6115 int j= scantable[index[coeff_count]];
6117 if( get_cabac( CC, ctx ) == 0 ) {
6119 block[j] = get_cabac_bypass_sign( CC, -1);
6121 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
6127 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
6128 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
6132 if( coeff_abs >= 15 ) {
6134 while( get_cabac_bypass( CC ) ) {
6140 coeff_abs += coeff_abs + get_cabac_bypass( CC );
6146 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
6147 else block[j] = coeff_abs;
6149 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
6150 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
6156 #ifdef CABAC_ON_STACK
6157 h->cabac.range = cc.range ;
6158 h->cabac.low = cc.low ;
6159 h->cabac.bytestream= cc.bytestream;
6164 static void inline compute_mb_neighbors(H264Context *h)
6166 MpegEncContext * const s = &h->s;
6167 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
6168 h->top_mb_xy = mb_xy - s->mb_stride;
6169 h->left_mb_xy[0] = mb_xy - 1;
6171 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
6172 const int top_pair_xy = pair_xy - s->mb_stride;
6173 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
6174 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
6175 const int curr_mb_frame_flag = !MB_FIELD;
6176 const int bottom = (s->mb_y & 1);
6178 ? !curr_mb_frame_flag // bottom macroblock
6179 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
6181 h->top_mb_xy -= s->mb_stride;
6183 if (left_mb_frame_flag != curr_mb_frame_flag) {
6184 h->left_mb_xy[0] = pair_xy - 1;
6191 * decodes a macroblock
6192 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
6194 static int decode_mb_cabac(H264Context *h) {
6195 MpegEncContext * const s = &h->s;
6196 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
6197 int mb_type, partition_count, cbp = 0;
6198 int dct8x8_allowed= h->pps.transform_8x8_mode;
6200 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
6202 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
6203 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
6205 /* a skipped mb needs the aff flag from the following mb */
6206 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
6207 predict_field_decoding_flag(h);
6208 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
6209 skip = h->next_mb_skipped;
6211 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
6212 /* read skip flags */
6214 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
6215 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
6216 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
6217 if(h->next_mb_skipped)
6218 predict_field_decoding_flag(h);
6220 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6225 h->cbp_table[mb_xy] = 0;
6226 h->chroma_pred_mode_table[mb_xy] = 0;
6227 h->last_qscale_diff = 0;
6234 if( (s->mb_y&1) == 0 )
6236 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6238 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
6240 h->prev_mb_skipped = 0;
6242 compute_mb_neighbors(h);
6243 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
6244 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
6248 if( h->slice_type == B_TYPE ) {
6250 partition_count= b_mb_type_info[mb_type].partition_count;
6251 mb_type= b_mb_type_info[mb_type].type;
6254 goto decode_intra_mb;
6256 } else if( h->slice_type == P_TYPE ) {
6258 partition_count= p_mb_type_info[mb_type].partition_count;
6259 mb_type= p_mb_type_info[mb_type].type;
6262 goto decode_intra_mb;
6265 assert(h->slice_type == I_TYPE);
6267 partition_count = 0;
6268 cbp= i_mb_type_info[mb_type].cbp;
6269 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
6270 mb_type= i_mb_type_info[mb_type].type;
6273 mb_type |= MB_TYPE_INTERLACED;
6275 h->slice_table[ mb_xy ]= h->slice_num;
6277 if(IS_INTRA_PCM(mb_type)) {
6281 // We assume these blocks are very rare so we dont optimize it.
6282 // FIXME The two following lines get the bitstream position in the cabac
6283 // decode, I think it should be done by a function in cabac.h (or cabac.c).
6284 ptr= h->cabac.bytestream;
6285 if(h->cabac.low&0x1) ptr--;
6287 if(h->cabac.low&0x1FF) ptr--;
6290 // The pixels are stored in the same order as levels in h->mb array.
6291 for(y=0; y<16; y++){
6292 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
6293 for(x=0; x<16; x++){
6294 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
6295 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
6299 const int index= 256 + 4*(y&3) + 32*(y>>2);
6301 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
6302 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6306 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
6308 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
6309 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6313 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6315 // All blocks are present
6316 h->cbp_table[mb_xy] = 0x1ef;
6317 h->chroma_pred_mode_table[mb_xy] = 0;
6318 // In deblocking, the quantizer is 0
6319 s->current_picture.qscale_table[mb_xy]= 0;
6320 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
6321 // All coeffs are present
6322 memset(h->non_zero_count[mb_xy], 16, 16);
6323 s->current_picture.mb_type[mb_xy]= mb_type;
6328 h->ref_count[0] <<= 1;
6329 h->ref_count[1] <<= 1;
6332 fill_caches(h, mb_type, 0);
6334 if( IS_INTRA( mb_type ) ) {
6336 if( IS_INTRA4x4( mb_type ) ) {
6337 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6338 mb_type |= MB_TYPE_8x8DCT;
6339 for( i = 0; i < 16; i+=4 ) {
6340 int pred = pred_intra_mode( h, i );
6341 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6342 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6345 for( i = 0; i < 16; i++ ) {
6346 int pred = pred_intra_mode( h, i );
6347 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6349 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6352 write_back_intra_pred_mode(h);
6353 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6355 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6356 if( h->intra16x16_pred_mode < 0 ) return -1;
6358 h->chroma_pred_mode_table[mb_xy] =
6359 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6361 pred_mode= check_intra_pred_mode( h, pred_mode );
6362 if( pred_mode < 0 ) return -1;
6363 h->chroma_pred_mode= pred_mode;
6364 } else if( partition_count == 4 ) {
6365 int i, j, sub_partition_count[4], list, ref[2][4];
6367 if( h->slice_type == B_TYPE ) {
6368 for( i = 0; i < 4; i++ ) {
6369 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6370 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6371 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6373 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
6374 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
6375 pred_direct_motion(h, &mb_type);
6376 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6377 for( i = 0; i < 4; i++ )
6378 if( IS_DIRECT(h->sub_mb_type[i]) )
6379 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6383 for( i = 0; i < 4; i++ ) {
6384 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6385 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6386 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6390 for( list = 0; list < 2; list++ ) {
6391 if( h->ref_count[list] > 0 ) {
6392 for( i = 0; i < 4; i++ ) {
6393 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6394 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6395 if( h->ref_count[list] > 1 )
6396 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6402 h->ref_cache[list][ scan8[4*i]+1 ]=
6403 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6409 dct8x8_allowed = get_dct8x8_allowed(h);
6411 for(list=0; list<2; list++){
6413 if(IS_DIRECT(h->sub_mb_type[i])){
6414 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6417 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6419 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6420 const int sub_mb_type= h->sub_mb_type[i];
6421 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6422 for(j=0; j<sub_partition_count[i]; j++){
6425 const int index= 4*i + block_width*j;
6426 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6427 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6428 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6430 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6431 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6432 tprintf("final mv:%d %d\n", mx, my);
6434 if(IS_SUB_8X8(sub_mb_type)){
6436 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6438 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6441 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6443 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6444 }else if(IS_SUB_8X4(sub_mb_type)){
6445 mv_cache[ 1 ][0]= mx;
6446 mv_cache[ 1 ][1]= my;
6448 mvd_cache[ 1 ][0]= mx - mpx;
6449 mvd_cache[ 1 ][1]= my - mpy;
6450 }else if(IS_SUB_4X8(sub_mb_type)){
6451 mv_cache[ 8 ][0]= mx;
6452 mv_cache[ 8 ][1]= my;
6454 mvd_cache[ 8 ][0]= mx - mpx;
6455 mvd_cache[ 8 ][1]= my - mpy;
6457 mv_cache[ 0 ][0]= mx;
6458 mv_cache[ 0 ][1]= my;
6460 mvd_cache[ 0 ][0]= mx - mpx;
6461 mvd_cache[ 0 ][1]= my - mpy;
6464 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6465 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6466 p[0] = p[1] = p[8] = p[9] = 0;
6467 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6471 } else if( IS_DIRECT(mb_type) ) {
6472 pred_direct_motion(h, &mb_type);
6473 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6474 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6475 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6477 int list, mx, my, i, mpx, mpy;
6478 if(IS_16X16(mb_type)){
6479 for(list=0; list<2; list++){
6480 if(IS_DIR(mb_type, 0, list)){
6481 if(h->ref_count[list] > 0 ){
6482 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6483 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6486 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6488 for(list=0; list<2; list++){
6489 if(IS_DIR(mb_type, 0, list)){
6490 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6492 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6493 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6494 tprintf("final mv:%d %d\n", mx, my);
6496 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6497 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6499 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6502 else if(IS_16X8(mb_type)){
6503 for(list=0; list<2; list++){
6504 if(h->ref_count[list]>0){
6506 if(IS_DIR(mb_type, i, list)){
6507 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6508 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6510 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6514 for(list=0; list<2; list++){
6516 if(IS_DIR(mb_type, i, list)){
6517 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6518 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6519 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6520 tprintf("final mv:%d %d\n", mx, my);
6522 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6523 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6525 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6526 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6531 assert(IS_8X16(mb_type));
6532 for(list=0; list<2; list++){
6533 if(h->ref_count[list]>0){
6535 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6536 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6537 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6539 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6543 for(list=0; list<2; list++){
6545 if(IS_DIR(mb_type, i, list)){
6546 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6547 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6548 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6550 tprintf("final mv:%d %d\n", mx, my);
6551 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6552 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6554 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6555 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6562 if( IS_INTER( mb_type ) ) {
6563 h->chroma_pred_mode_table[mb_xy] = 0;
6564 write_back_motion( h, mb_type );
6567 if( !IS_INTRA16x16( mb_type ) ) {
6568 cbp = decode_cabac_mb_cbp_luma( h );
6569 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6572 h->cbp_table[mb_xy] = h->cbp = cbp;
6574 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6575 if( decode_cabac_mb_transform_size( h ) )
6576 mb_type |= MB_TYPE_8x8DCT;
6578 s->current_picture.mb_type[mb_xy]= mb_type;
6580 if( cbp || IS_INTRA16x16( mb_type ) ) {
6581 const uint8_t *scan, *scan8x8, *dc_scan;
6584 if(IS_INTERLACED(mb_type)){
6585 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6586 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6587 dc_scan= luma_dc_field_scan;
6589 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6590 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6591 dc_scan= luma_dc_zigzag_scan;
6594 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6595 if( dqp == INT_MIN ){
6596 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6600 if(((unsigned)s->qscale) > 51){
6601 if(s->qscale<0) s->qscale+= 52;
6602 else s->qscale-= 52;
6604 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6606 if( IS_INTRA16x16( mb_type ) ) {
6608 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6609 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6612 for( i = 0; i < 16; i++ ) {
6613 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6614 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6618 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6622 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6623 if( cbp & (1<<i8x8) ) {
6624 if( IS_8x8DCT(mb_type) ) {
6625 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6626 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6629 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6630 const int index = 4*i8x8 + i4x4;
6631 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6633 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6635 //STOP_TIMER("decode_residual")
6638 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6639 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6646 for( c = 0; c < 2; c++ ) {
6647 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6648 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6655 for( c = 0; c < 2; c++ ) {
6656 for( i = 0; i < 4; i++ ) {
6657 const int index = 16 + 4 * c + i;
6658 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6659 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6664 uint8_t * const nnz= &h->non_zero_count_cache[0];
6665 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6666 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6669 uint8_t * const nnz= &h->non_zero_count_cache[0];
6670 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6671 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6672 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6673 h->last_qscale_diff = 0;
6676 s->current_picture.qscale_table[mb_xy]= s->qscale;
6677 write_back_non_zero_count(h);
6680 h->ref_count[0] >>= 1;
6681 h->ref_count[1] >>= 1;
6688 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6690 const int index_a = qp + h->slice_alpha_c0_offset;
6691 const int alpha = (alpha_table+52)[index_a];
6692 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6697 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6698 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6700 /* 16px edge length, because bS=4 is triggered by being at
6701 * the edge of an intra MB, so all 4 bS are the same */
6702 for( d = 0; d < 16; d++ ) {
6703 const int p0 = pix[-1];
6704 const int p1 = pix[-2];
6705 const int p2 = pix[-3];
6707 const int q0 = pix[0];
6708 const int q1 = pix[1];
6709 const int q2 = pix[2];
6711 if( FFABS( p0 - q0 ) < alpha &&
6712 FFABS( p1 - p0 ) < beta &&
6713 FFABS( q1 - q0 ) < beta ) {
6715 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6716 if( FFABS( p2 - p0 ) < beta)
6718 const int p3 = pix[-4];
6720 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6721 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6722 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6725 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6727 if( FFABS( q2 - q0 ) < beta)
6729 const int q3 = pix[3];
6731 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6732 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6733 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6736 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6740 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6741 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6743 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6749 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6751 const int index_a = qp + h->slice_alpha_c0_offset;
6752 const int alpha = (alpha_table+52)[index_a];
6753 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6758 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6759 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6761 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6765 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6767 for( i = 0; i < 16; i++, pix += stride) {
6773 int bS_index = (i >> 1);
6776 bS_index |= (i & 1);
6779 if( bS[bS_index] == 0 ) {
6783 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6784 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6785 alpha = (alpha_table+52)[index_a];
6786 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6788 if( bS[bS_index] < 4 ) {
6789 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6790 const int p0 = pix[-1];
6791 const int p1 = pix[-2];
6792 const int p2 = pix[-3];
6793 const int q0 = pix[0];
6794 const int q1 = pix[1];
6795 const int q2 = pix[2];
6797 if( FFABS( p0 - q0 ) < alpha &&
6798 FFABS( p1 - p0 ) < beta &&
6799 FFABS( q1 - q0 ) < beta ) {
6803 if( FFABS( p2 - p0 ) < beta ) {
6804 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6807 if( FFABS( q2 - q0 ) < beta ) {
6808 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6812 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6813 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6814 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6815 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6818 const int p0 = pix[-1];
6819 const int p1 = pix[-2];
6820 const int p2 = pix[-3];
6822 const int q0 = pix[0];
6823 const int q1 = pix[1];
6824 const int q2 = pix[2];
6826 if( FFABS( p0 - q0 ) < alpha &&
6827 FFABS( p1 - p0 ) < beta &&
6828 FFABS( q1 - q0 ) < beta ) {
6830 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6831 if( FFABS( p2 - p0 ) < beta)
6833 const int p3 = pix[-4];
6835 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6836 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6837 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6840 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6842 if( FFABS( q2 - q0 ) < beta)
6844 const int q3 = pix[3];
6846 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6847 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6848 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6851 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6855 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6856 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6858 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6863 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6865 for( i = 0; i < 8; i++, pix += stride) {
6873 if( bS[bS_index] == 0 ) {
6877 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6878 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6879 alpha = (alpha_table+52)[index_a];
6880 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6882 if( bS[bS_index] < 4 ) {
6883 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6884 const int p0 = pix[-1];
6885 const int p1 = pix[-2];
6886 const int q0 = pix[0];
6887 const int q1 = pix[1];
6889 if( FFABS( p0 - q0 ) < alpha &&
6890 FFABS( p1 - p0 ) < beta &&
6891 FFABS( q1 - q0 ) < beta ) {
6892 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6894 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6895 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6896 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6899 const int p0 = pix[-1];
6900 const int p1 = pix[-2];
6901 const int q0 = pix[0];
6902 const int q1 = pix[1];
6904 if( FFABS( p0 - q0 ) < alpha &&
6905 FFABS( p1 - p0 ) < beta &&
6906 FFABS( q1 - q0 ) < beta ) {
6908 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6909 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6910 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6916 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6918 const int index_a = qp + h->slice_alpha_c0_offset;
6919 const int alpha = (alpha_table+52)[index_a];
6920 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6921 const int pix_next = stride;
6926 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6927 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6929 /* 16px edge length, see filter_mb_edgev */
6930 for( d = 0; d < 16; d++ ) {
6931 const int p0 = pix[-1*pix_next];
6932 const int p1 = pix[-2*pix_next];
6933 const int p2 = pix[-3*pix_next];
6934 const int q0 = pix[0];
6935 const int q1 = pix[1*pix_next];
6936 const int q2 = pix[2*pix_next];
6938 if( FFABS( p0 - q0 ) < alpha &&
6939 FFABS( p1 - p0 ) < beta &&
6940 FFABS( q1 - q0 ) < beta ) {
6942 const int p3 = pix[-4*pix_next];
6943 const int q3 = pix[ 3*pix_next];
6945 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6946 if( FFABS( p2 - p0 ) < beta) {
6948 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6949 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6950 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6953 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6955 if( FFABS( q2 - q0 ) < beta) {
6957 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6958 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6959 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6962 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6966 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6967 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6969 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6976 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6978 const int index_a = qp + h->slice_alpha_c0_offset;
6979 const int alpha = (alpha_table+52)[index_a];
6980 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6985 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6986 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6988 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6992 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6993 MpegEncContext * const s = &h->s;
6995 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6997 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) {
6998 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
7001 assert(!FRAME_MBAFF);
7003 mb_xy = mb_x + mb_y*s->mb_stride;
7004 mb_type = s->current_picture.mb_type[mb_xy];
7005 qp = s->current_picture.qscale_table[mb_xy];
7006 qp0 = s->current_picture.qscale_table[mb_xy-1];
7007 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
7008 qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
7009 qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
7010 qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
7011 qp0 = (qp + qp0 + 1) >> 1;
7012 qp1 = (qp + qp1 + 1) >> 1;
7013 qpc0 = (qpc + qpc0 + 1) >> 1;
7014 qpc1 = (qpc + qpc1 + 1) >> 1;
7015 qp_thresh = 15 - h->slice_alpha_c0_offset;
7016 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
7017 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
7020 if( IS_INTRA(mb_type) ) {
7021 int16_t bS4[4] = {4,4,4,4};
7022 int16_t bS3[4] = {3,3,3,3};
7023 if( IS_8x8DCT(mb_type) ) {
7024 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7025 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7026 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7027 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7029 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
7030 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
7031 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
7032 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
7033 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
7034 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
7035 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
7036 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
7038 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
7039 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
7040 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
7041 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
7042 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7043 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
7044 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
7045 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
7048 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
7049 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
7051 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
7053 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
7055 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
7056 (mb_type & MB_TYPE_16x8) ? 1 : 0;
7057 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
7058 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
7060 int step = IS_8x8DCT(mb_type) ? 2 : 1;
7061 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
7062 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
7063 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
7065 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
7066 bSv[0][0] = 0x0004000400040004ULL;
7067 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
7068 bSv[1][0] = 0x0004000400040004ULL;
7070 #define FILTER(hv,dir,edge)\
7071 if(bSv[dir][edge]) {\
7072 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
7074 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7075 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
7081 } else if( IS_8x8DCT(mb_type) ) {
7100 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7101 MpegEncContext * const s = &h->s;
7102 const int mb_xy= mb_x + mb_y*s->mb_stride;
7103 const int mb_type = s->current_picture.mb_type[mb_xy];
7104 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
7105 int first_vertical_edge_done = 0;
7107 /* FIXME: A given frame may occupy more than one position in
7108 * the reference list. So ref2frm should be populated with
7109 * frame numbers, not indices. */
7110 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
7111 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7113 //for sufficiently low qp, filtering wouldn't do anything
7114 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
7116 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
7117 int qp = s->current_picture.qscale_table[mb_xy];
7119 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
7120 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
7126 // left mb is in picture
7127 && h->slice_table[mb_xy-1] != 255
7128 // and current and left pair do not have the same interlaced type
7129 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
7130 // and left mb is in the same slice if deblocking_filter == 2
7131 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
7132 /* First vertical edge is different in MBAFF frames
7133 * There are 8 different bS to compute and 2 different Qp
7135 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
7136 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
7140 int mb_qp, mbn0_qp, mbn1_qp;
7142 first_vertical_edge_done = 1;
7144 if( IS_INTRA(mb_type) )
7145 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
7147 for( i = 0; i < 8; i++ ) {
7148 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
7150 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
7152 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
7153 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
7154 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
7161 mb_qp = s->current_picture.qscale_table[mb_xy];
7162 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
7163 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
7164 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
7165 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7166 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
7167 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
7168 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7169 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
7172 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
7173 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7174 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
7175 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
7176 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
7178 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
7179 for( dir = 0; dir < 2; dir++ )
7182 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
7183 const int mbm_type = s->current_picture.mb_type[mbm_xy];
7184 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
7186 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
7187 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
7188 // how often to recheck mv-based bS when iterating between edges
7189 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
7190 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
7191 // how often to recheck mv-based bS when iterating along each edge
7192 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
7194 if (first_vertical_edge_done) {
7196 first_vertical_edge_done = 0;
7199 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
7202 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
7203 && !IS_INTERLACED(mb_type)
7204 && IS_INTERLACED(mbm_type)
7206 // This is a special case in the norm where the filtering must
7207 // be done twice (one each of the field) even if we are in a
7208 // frame macroblock.
7210 static const int nnz_idx[4] = {4,5,6,3};
7211 unsigned int tmp_linesize = 2 * linesize;
7212 unsigned int tmp_uvlinesize = 2 * uvlinesize;
7213 int mbn_xy = mb_xy - 2 * s->mb_stride;
7218 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
7219 if( IS_INTRA(mb_type) ||
7220 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
7221 bS[0] = bS[1] = bS[2] = bS[3] = 3;
7223 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
7224 for( i = 0; i < 4; i++ ) {
7225 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
7226 mbn_nnz[nnz_idx[i]] != 0 )
7232 // Do not use s->qscale as luma quantizer because it has not the same
7233 // value in IPCM macroblocks.
7234 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7235 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
7236 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7237 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
7238 chroma_qp = ( h->chroma_qp +
7239 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7240 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7241 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7248 for( edge = start; edge < edges; edge++ ) {
7249 /* mbn_xy: neighbor macroblock */
7250 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
7251 const int mbn_type = s->current_picture.mb_type[mbn_xy];
7255 if( (edge&1) && IS_8x8DCT(mb_type) )
7258 if( IS_INTRA(mb_type) ||
7259 IS_INTRA(mbn_type) ) {
7262 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
7263 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
7272 bS[0] = bS[1] = bS[2] = bS[3] = value;
7277 if( edge & mask_edge ) {
7278 bS[0] = bS[1] = bS[2] = bS[3] = 0;
7281 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
7282 bS[0] = bS[1] = bS[2] = bS[3] = 1;
7285 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
7286 int b_idx= 8 + 4 + edge * (dir ? 8:1);
7287 int bn_idx= b_idx - (dir ? 8:1);
7289 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
7290 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7291 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7292 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
7294 bS[0] = bS[1] = bS[2] = bS[3] = v;
7300 for( i = 0; i < 4; i++ ) {
7301 int x = dir == 0 ? edge : i;
7302 int y = dir == 0 ? i : edge;
7303 int b_idx= 8 + 4 + x + 8*y;
7304 int bn_idx= b_idx - (dir ? 8:1);
7306 if( h->non_zero_count_cache[b_idx] != 0 ||
7307 h->non_zero_count_cache[bn_idx] != 0 ) {
7313 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
7314 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7315 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7316 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7324 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7329 // Do not use s->qscale as luma quantizer because it has not the same
7330 // value in IPCM macroblocks.
7331 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7332 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7333 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7334 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7336 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7337 if( (edge&1) == 0 ) {
7338 int chroma_qp = ( h->chroma_qp +
7339 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7340 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
7341 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
7344 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7345 if( (edge&1) == 0 ) {
7346 int chroma_qp = ( h->chroma_qp +
7347 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7348 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7349 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7356 static int decode_slice(H264Context *h){
7357 MpegEncContext * const s = &h->s;
7358 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7362 if( h->pps.cabac ) {
7366 align_get_bits( &s->gb );
7369 ff_init_cabac_states( &h->cabac);
7370 ff_init_cabac_decoder( &h->cabac,
7371 s->gb.buffer + get_bits_count(&s->gb)/8,
7372 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7373 /* calculate pre-state */
7374 for( i= 0; i < 460; i++ ) {
7376 if( h->slice_type == I_TYPE )
7377 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7379 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7382 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
7384 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7389 int ret = decode_mb_cabac(h);
7391 //STOP_TIMER("decode_mb_cabac")
7393 if(ret>=0) hl_decode_mb(h);
7395 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7398 if(ret>=0) ret = decode_mb_cabac(h);
7400 if(ret>=0) hl_decode_mb(h);
7403 eos = get_cabac_terminate( &h->cabac );
7405 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
7406 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7407 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7411 if( ++s->mb_x >= s->mb_width ) {
7413 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7420 if( eos || s->mb_y >= s->mb_height ) {
7421 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7422 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7429 int ret = decode_mb_cavlc(h);
7431 if(ret>=0) hl_decode_mb(h);
7433 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7435 ret = decode_mb_cavlc(h);
7437 if(ret>=0) hl_decode_mb(h);
7442 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7443 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7448 if(++s->mb_x >= s->mb_width){
7450 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7455 if(s->mb_y >= s->mb_height){
7456 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7458 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7459 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7463 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7470 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7471 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7472 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7473 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7477 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7486 for(;s->mb_y < s->mb_height; s->mb_y++){
7487 for(;s->mb_x < s->mb_width; s->mb_x++){
7488 int ret= decode_mb(h);
7493 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7494 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7499 if(++s->mb_x >= s->mb_width){
7501 if(++s->mb_y >= s->mb_height){
7502 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7503 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7507 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7514 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7515 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7516 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7520 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7527 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7530 return -1; //not reached
7533 static int decode_unregistered_user_data(H264Context *h, int size){
7534 MpegEncContext * const s = &h->s;
7535 uint8_t user_data[16+256];
7541 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7542 user_data[i]= get_bits(&s->gb, 8);
7546 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7547 if(e==1 && build>=0)
7548 h->x264_build= build;
7550 if(s->avctx->debug & FF_DEBUG_BUGS)
7551 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7554 skip_bits(&s->gb, 8);
7559 static int decode_sei(H264Context *h){
7560 MpegEncContext * const s = &h->s;
7562 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7567 type+= show_bits(&s->gb, 8);
7568 }while(get_bits(&s->gb, 8) == 255);
7572 size+= show_bits(&s->gb, 8);
7573 }while(get_bits(&s->gb, 8) == 255);
7577 if(decode_unregistered_user_data(h, size) < 0)
7581 skip_bits(&s->gb, 8*size);
7584 //FIXME check bits here
7585 align_get_bits(&s->gb);
7591 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7592 MpegEncContext * const s = &h->s;
7594 cpb_count = get_ue_golomb(&s->gb) + 1;
7595 get_bits(&s->gb, 4); /* bit_rate_scale */
7596 get_bits(&s->gb, 4); /* cpb_size_scale */
7597 for(i=0; i<cpb_count; i++){
7598 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7599 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7600 get_bits1(&s->gb); /* cbr_flag */
7602 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7603 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7604 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7605 get_bits(&s->gb, 5); /* time_offset_length */
7608 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7609 MpegEncContext * const s = &h->s;
7610 int aspect_ratio_info_present_flag;
7611 unsigned int aspect_ratio_idc;
7612 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7614 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7616 if( aspect_ratio_info_present_flag ) {
7617 aspect_ratio_idc= get_bits(&s->gb, 8);
7618 if( aspect_ratio_idc == EXTENDED_SAR ) {
7619 sps->sar.num= get_bits(&s->gb, 16);
7620 sps->sar.den= get_bits(&s->gb, 16);
7621 }else if(aspect_ratio_idc < 14){
7622 sps->sar= pixel_aspect[aspect_ratio_idc];
7624 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7631 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7633 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7634 get_bits1(&s->gb); /* overscan_appropriate_flag */
7637 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7638 get_bits(&s->gb, 3); /* video_format */
7639 get_bits1(&s->gb); /* video_full_range_flag */
7640 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7641 get_bits(&s->gb, 8); /* colour_primaries */
7642 get_bits(&s->gb, 8); /* transfer_characteristics */
7643 get_bits(&s->gb, 8); /* matrix_coefficients */
7647 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7648 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7649 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7652 sps->timing_info_present_flag = get_bits1(&s->gb);
7653 if(sps->timing_info_present_flag){
7654 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7655 sps->time_scale = get_bits_long(&s->gb, 32);
7656 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7659 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7660 if(nal_hrd_parameters_present_flag)
7661 decode_hrd_parameters(h, sps);
7662 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7663 if(vcl_hrd_parameters_present_flag)
7664 decode_hrd_parameters(h, sps);
7665 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7666 get_bits1(&s->gb); /* low_delay_hrd_flag */
7667 get_bits1(&s->gb); /* pic_struct_present_flag */
7669 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7670 if(sps->bitstream_restriction_flag){
7671 unsigned int num_reorder_frames;
7672 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7673 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7674 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7675 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7676 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7677 num_reorder_frames= get_ue_golomb(&s->gb);
7678 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7680 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7681 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7685 sps->num_reorder_frames= num_reorder_frames;
7691 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7692 const uint8_t *jvt_list, const uint8_t *fallback_list){
7693 MpegEncContext * const s = &h->s;
7694 int i, last = 8, next = 8;
7695 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7696 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7697 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7699 for(i=0;i<size;i++){
7701 next = (last + get_se_golomb(&s->gb)) & 0xff;
7702 if(!i && !next){ /* matrix not written, we use the preset one */
7703 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7706 last = factors[scan[i]] = next ? next : last;
7710 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7711 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7712 MpegEncContext * const s = &h->s;
7713 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7714 const uint8_t *fallback[4] = {
7715 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7716 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7717 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7718 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7720 if(get_bits1(&s->gb)){
7721 sps->scaling_matrix_present |= is_sps;
7722 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7723 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7724 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7725 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7726 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7727 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7728 if(is_sps || pps->transform_8x8_mode){
7729 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7730 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7732 } else if(fallback_sps) {
7733 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7734 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7738 static inline int decode_seq_parameter_set(H264Context *h){
7739 MpegEncContext * const s = &h->s;
7740 int profile_idc, level_idc;
7741 unsigned int sps_id, tmp, mb_width, mb_height;
7745 profile_idc= get_bits(&s->gb, 8);
7746 get_bits1(&s->gb); //constraint_set0_flag
7747 get_bits1(&s->gb); //constraint_set1_flag
7748 get_bits1(&s->gb); //constraint_set2_flag
7749 get_bits1(&s->gb); //constraint_set3_flag
7750 get_bits(&s->gb, 4); // reserved
7751 level_idc= get_bits(&s->gb, 8);
7752 sps_id= get_ue_golomb(&s->gb);
7754 if (sps_id >= MAX_SPS_COUNT){
7755 // ok it has gone out of hand, someone is sending us bad stuff.
7756 av_log(h->s.avctx, AV_LOG_ERROR, "illegal sps_id (%d)\n", sps_id);
7760 sps= &h->sps_buffer[ sps_id ];
7761 sps->profile_idc= profile_idc;
7762 sps->level_idc= level_idc;
7764 if(sps->profile_idc >= 100){ //high profile
7765 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7766 get_bits1(&s->gb); //residual_color_transform_flag
7767 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7768 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7769 sps->transform_bypass = get_bits1(&s->gb);
7770 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7772 sps->scaling_matrix_present = 0;
7774 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7775 sps->poc_type= get_ue_golomb(&s->gb);
7777 if(sps->poc_type == 0){ //FIXME #define
7778 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7779 } else if(sps->poc_type == 1){//FIXME #define
7780 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7781 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7782 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7783 tmp= get_ue_golomb(&s->gb);
7785 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7786 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7789 sps->poc_cycle_length= tmp;
7791 for(i=0; i<sps->poc_cycle_length; i++)
7792 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7793 }else if(sps->poc_type != 2){
7794 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7798 tmp= get_ue_golomb(&s->gb);
7799 if(tmp > MAX_PICTURE_COUNT-2){
7800 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7802 sps->ref_frame_count= tmp;
7803 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7804 mb_width= get_ue_golomb(&s->gb) + 1;
7805 mb_height= get_ue_golomb(&s->gb) + 1;
7806 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7807 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7808 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7811 sps->mb_width = mb_width;
7812 sps->mb_height= mb_height;
7814 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7815 if(!sps->frame_mbs_only_flag)
7816 sps->mb_aff= get_bits1(&s->gb);
7820 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7822 #ifndef ALLOW_INTERLACE
7824 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7826 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7827 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7829 sps->crop= get_bits1(&s->gb);
7831 sps->crop_left = get_ue_golomb(&s->gb);
7832 sps->crop_right = get_ue_golomb(&s->gb);
7833 sps->crop_top = get_ue_golomb(&s->gb);
7834 sps->crop_bottom= get_ue_golomb(&s->gb);
7835 if(sps->crop_left || sps->crop_top){
7836 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7842 sps->crop_bottom= 0;
7845 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7846 if( sps->vui_parameters_present_flag )
7847 decode_vui_parameters(h, sps);
7849 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7850 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7851 sps_id, sps->profile_idc, sps->level_idc,
7853 sps->ref_frame_count,
7854 sps->mb_width, sps->mb_height,
7855 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7856 sps->direct_8x8_inference_flag ? "8B8" : "",
7857 sps->crop_left, sps->crop_right,
7858 sps->crop_top, sps->crop_bottom,
7859 sps->vui_parameters_present_flag ? "VUI" : ""
7865 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7866 MpegEncContext * const s = &h->s;
7867 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7870 if(pps_id>=MAX_PPS_COUNT){
7871 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
7874 pps = &h->pps_buffer[pps_id];
7876 tmp= get_ue_golomb(&s->gb);
7877 if(tmp>=MAX_SPS_COUNT){
7878 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7883 pps->cabac= get_bits1(&s->gb);
7884 pps->pic_order_present= get_bits1(&s->gb);
7885 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7886 if(pps->slice_group_count > 1 ){
7887 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7888 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7889 switch(pps->mb_slice_group_map_type){
7892 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7893 | run_length[ i ] |1 |ue(v) |
7898 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7900 | top_left_mb[ i ] |1 |ue(v) |
7901 | bottom_right_mb[ i ] |1 |ue(v) |
7909 | slice_group_change_direction_flag |1 |u(1) |
7910 | slice_group_change_rate_minus1 |1 |ue(v) |
7915 | slice_group_id_cnt_minus1 |1 |ue(v) |
7916 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7918 | slice_group_id[ i ] |1 |u(v) |
7923 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7924 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7925 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7926 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7927 pps->ref_count[0]= pps->ref_count[1]= 1;
7931 pps->weighted_pred= get_bits1(&s->gb);
7932 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7933 pps->init_qp= get_se_golomb(&s->gb) + 26;
7934 pps->init_qs= get_se_golomb(&s->gb) + 26;
7935 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7936 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7937 pps->constrained_intra_pred= get_bits1(&s->gb);
7938 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7940 pps->transform_8x8_mode= 0;
7941 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7942 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7943 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7945 if(get_bits_count(&s->gb) < bit_length){
7946 pps->transform_8x8_mode= get_bits1(&s->gb);
7947 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7948 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7951 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7952 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7953 pps_id, pps->sps_id,
7954 pps->cabac ? "CABAC" : "CAVLC",
7955 pps->slice_group_count,
7956 pps->ref_count[0], pps->ref_count[1],
7957 pps->weighted_pred ? "weighted" : "",
7958 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7959 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7960 pps->constrained_intra_pred ? "CONSTR" : "",
7961 pps->redundant_pic_cnt_present ? "REDU" : "",
7962 pps->transform_8x8_mode ? "8x8DCT" : ""
7970 * finds the end of the current frame in the bitstream.
7971 * @return the position of the first byte of the next frame, or -1
7973 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7976 ParseContext *pc = &(h->s.parse_context);
7977 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7978 // mb_addr= pc->mb_addr - 1;
7980 for(i=0; i<=buf_size; i++){
7981 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7982 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7983 if(pc->frame_start_found){
7984 // If there isn't one more byte in the buffer
7985 // the test on first_mb_in_slice cannot be done yet
7986 // do it at next call.
7987 if (i >= buf_size) break;
7988 if (buf[i] & 0x80) {
7989 // first_mb_in_slice is 0, probably the first nal of a new
7991 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7993 pc->frame_start_found= 0;
7997 pc->frame_start_found = 1;
7999 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
8000 if(pc->frame_start_found){
8002 pc->frame_start_found= 0;
8007 state= (state<<8) | buf[i];
8011 return END_NOT_FOUND;
8014 #ifdef CONFIG_H264_PARSER
8015 static int h264_parse(AVCodecParserContext *s,
8016 AVCodecContext *avctx,
8017 uint8_t **poutbuf, int *poutbuf_size,
8018 const uint8_t *buf, int buf_size)
8020 H264Context *h = s->priv_data;
8021 ParseContext *pc = &h->s.parse_context;
8024 next= find_frame_end(h, buf, buf_size);
8026 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
8032 *poutbuf = (uint8_t *)buf;
8033 *poutbuf_size = buf_size;
8037 static int h264_split(AVCodecContext *avctx,
8038 const uint8_t *buf, int buf_size)
8041 uint32_t state = -1;
8044 for(i=0; i<=buf_size; i++){
8045 if((state&0xFFFFFF1F) == 0x107)
8047 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
8049 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
8051 while(i>4 && buf[i-5]==0) i--;
8056 state= (state<<8) | buf[i];
8060 #endif /* CONFIG_H264_PARSER */
8062 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
8063 MpegEncContext * const s = &h->s;
8064 AVCodecContext * const avctx= s->avctx;
8068 for(i=0; i<50; i++){
8069 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
8073 s->current_picture_ptr= NULL;
8082 if(buf_index >= buf_size) break;
8084 for(i = 0; i < h->nal_length_size; i++)
8085 nalsize = (nalsize << 8) | buf[buf_index++];
8086 if(nalsize <= 1 || nalsize > buf_size){
8091 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
8096 // start code prefix search
8097 for(; buf_index + 3 < buf_size; buf_index++){
8098 // this should allways succeed in the first iteration
8099 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
8103 if(buf_index+3 >= buf_size) break;
8108 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
8109 if (ptr==NULL || dst_length <= 0){
8112 while(ptr[dst_length - 1] == 0 && dst_length > 1)
8114 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
8116 if(s->avctx->debug&FF_DEBUG_STARTCODE){
8117 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
8120 if (h->is_avc && (nalsize != consumed))
8121 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
8123 buf_index += consumed;
8125 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
8126 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
8129 switch(h->nal_unit_type){
8131 idr(h); //FIXME ensure we don't loose some frames if there is reordering
8133 init_get_bits(&s->gb, ptr, bit_length);
8135 h->inter_gb_ptr= &s->gb;
8136 s->data_partitioning = 0;
8138 if(decode_slice_header(h) < 0){
8139 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8142 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
8143 if(h->redundant_pic_count==0 && s->hurry_up < 5
8144 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8145 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8146 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8147 && avctx->skip_frame < AVDISCARD_ALL)
8151 init_get_bits(&s->gb, ptr, bit_length);
8153 h->inter_gb_ptr= NULL;
8154 s->data_partitioning = 1;
8156 if(decode_slice_header(h) < 0){
8157 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8161 init_get_bits(&h->intra_gb, ptr, bit_length);
8162 h->intra_gb_ptr= &h->intra_gb;
8165 init_get_bits(&h->inter_gb, ptr, bit_length);
8166 h->inter_gb_ptr= &h->inter_gb;
8168 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
8169 && s->context_initialized
8171 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8172 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8173 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8174 && avctx->skip_frame < AVDISCARD_ALL)
8178 init_get_bits(&s->gb, ptr, bit_length);
8182 init_get_bits(&s->gb, ptr, bit_length);
8183 decode_seq_parameter_set(h);
8185 if(s->flags& CODEC_FLAG_LOW_DELAY)
8188 if(avctx->has_b_frames < 2)
8189 avctx->has_b_frames= !s->low_delay;
8192 init_get_bits(&s->gb, ptr, bit_length);
8194 decode_picture_parameter_set(h, bit_length);
8198 case NAL_END_SEQUENCE:
8199 case NAL_END_STREAM:
8200 case NAL_FILLER_DATA:
8202 case NAL_AUXILIARY_SLICE:
8205 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
8209 if(!s->current_picture_ptr) return buf_index; //no frame
8211 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
8212 s->current_picture_ptr->pict_type= s->pict_type;
8214 h->prev_frame_num_offset= h->frame_num_offset;
8215 h->prev_frame_num= h->frame_num;
8216 if(s->current_picture_ptr->reference){
8217 h->prev_poc_msb= h->poc_msb;
8218 h->prev_poc_lsb= h->poc_lsb;
8220 if(s->current_picture_ptr->reference)
8221 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
8231 * returns the number of bytes consumed for building the current frame
8233 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
8234 if(s->flags&CODEC_FLAG_TRUNCATED){
8235 pos -= s->parse_context.last_index;
8236 if(pos<0) pos=0; // FIXME remove (unneeded?)
8240 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
8241 if(pos+10>buf_size) pos=buf_size; // oops ;)
8247 static int decode_frame(AVCodecContext *avctx,
8248 void *data, int *data_size,
8249 uint8_t *buf, int buf_size)
8251 H264Context *h = avctx->priv_data;
8252 MpegEncContext *s = &h->s;
8253 AVFrame *pict = data;
8256 s->flags= avctx->flags;
8257 s->flags2= avctx->flags2;
8259 /* no supplementary picture */
8260 if (buf_size == 0) {
8264 if(s->flags&CODEC_FLAG_TRUNCATED){
8265 int next= find_frame_end(h, buf, buf_size);
8267 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
8269 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
8272 if(h->is_avc && !h->got_avcC) {
8273 int i, cnt, nalsize;
8274 unsigned char *p = avctx->extradata;
8275 if(avctx->extradata_size < 7) {
8276 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
8280 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
8283 /* sps and pps in the avcC always have length coded with 2 bytes,
8284 so put a fake nal_length_size = 2 while parsing them */
8285 h->nal_length_size = 2;
8286 // Decode sps from avcC
8287 cnt = *(p+5) & 0x1f; // Number of sps
8289 for (i = 0; i < cnt; i++) {
8290 nalsize = AV_RB16(p) + 2;
8291 if(decode_nal_units(h, p, nalsize) < 0) {
8292 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
8297 // Decode pps from avcC
8298 cnt = *(p++); // Number of pps
8299 for (i = 0; i < cnt; i++) {
8300 nalsize = AV_RB16(p) + 2;
8301 if(decode_nal_units(h, p, nalsize) != nalsize) {
8302 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
8307 // Now store right nal length size, that will be use to parse all other nals
8308 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
8309 // Do not reparse avcC
8313 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
8314 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
8318 buf_index=decode_nal_units(h, buf, buf_size);
8322 //FIXME do something with unavailable reference frames
8324 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
8325 if(!s->current_picture_ptr){
8326 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
8331 Picture *out = s->current_picture_ptr;
8332 #if 0 //decode order
8333 *data_size = sizeof(AVFrame);
8335 /* Sort B-frames into display order */
8336 Picture *cur = s->current_picture_ptr;
8337 Picture *prev = h->delayed_output_pic;
8338 int i, pics, cross_idr, out_of_order, out_idx;
8340 if(h->sps.bitstream_restriction_flag
8341 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
8342 s->avctx->has_b_frames = h->sps.num_reorder_frames;
8347 while(h->delayed_pic[pics]) pics++;
8349 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
8351 h->delayed_pic[pics++] = cur;
8352 if(cur->reference == 0)
8356 for(i=0; h->delayed_pic[i]; i++)
8357 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
8360 out = h->delayed_pic[0];
8362 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8363 if(h->delayed_pic[i]->poc < out->poc){
8364 out = h->delayed_pic[i];
8368 out_of_order = !cross_idr && prev && out->poc < prev->poc;
8369 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
8371 else if(prev && pics <= s->avctx->has_b_frames)
8373 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8375 ((!cross_idr && prev && out->poc > prev->poc + 2)
8376 || cur->pict_type == B_TYPE)))
8379 s->avctx->has_b_frames++;
8382 else if(out_of_order)
8385 if(out_of_order || pics > s->avctx->has_b_frames){
8386 for(i=out_idx; h->delayed_pic[i]; i++)
8387 h->delayed_pic[i] = h->delayed_pic[i+1];
8393 *data_size = sizeof(AVFrame);
8394 if(prev && prev != out && prev->reference == 1)
8395 prev->reference = 0;
8396 h->delayed_output_pic = out;
8400 *pict= *(AVFrame*)out;
8402 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8405 assert(pict->data[0] || !*data_size);
8406 ff_print_debug_info(s, pict);
8407 //printf("out %d\n", (int)pict->data[0]);
8410 /* Return the Picture timestamp as the frame number */
8411 /* we substract 1 because it is added on utils.c */
8412 avctx->frame_number = s->picture_number - 1;
8414 return get_consumed_bytes(s, buf_index, buf_size);
8417 static inline void fill_mb_avail(H264Context *h){
8418 MpegEncContext * const s = &h->s;
8419 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8422 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8423 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8424 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8430 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8431 h->mb_avail[4]= 1; //FIXME move out
8432 h->mb_avail[5]= 0; //FIXME move out
8438 #define SIZE (COUNT*40)
8444 // int int_temp[10000];
8446 AVCodecContext avctx;
8448 dsputil_init(&dsp, &avctx);
8450 init_put_bits(&pb, temp, SIZE);
8451 printf("testing unsigned exp golomb\n");
8452 for(i=0; i<COUNT; i++){
8454 set_ue_golomb(&pb, i);
8455 STOP_TIMER("set_ue_golomb");
8457 flush_put_bits(&pb);
8459 init_get_bits(&gb, temp, 8*SIZE);
8460 for(i=0; i<COUNT; i++){
8463 s= show_bits(&gb, 24);
8466 j= get_ue_golomb(&gb);
8468 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8471 STOP_TIMER("get_ue_golomb");
8475 init_put_bits(&pb, temp, SIZE);
8476 printf("testing signed exp golomb\n");
8477 for(i=0; i<COUNT; i++){
8479 set_se_golomb(&pb, i - COUNT/2);
8480 STOP_TIMER("set_se_golomb");
8482 flush_put_bits(&pb);
8484 init_get_bits(&gb, temp, 8*SIZE);
8485 for(i=0; i<COUNT; i++){
8488 s= show_bits(&gb, 24);
8491 j= get_se_golomb(&gb);
8492 if(j != i - COUNT/2){
8493 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8496 STOP_TIMER("get_se_golomb");
8499 printf("testing 4x4 (I)DCT\n");
8502 uint8_t src[16], ref[16];
8503 uint64_t error= 0, max_error=0;
8505 for(i=0; i<COUNT; i++){
8507 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8508 for(j=0; j<16; j++){
8509 ref[j]= random()%255;
8510 src[j]= random()%255;
8513 h264_diff_dct_c(block, src, ref, 4);
8516 for(j=0; j<16; j++){
8517 // printf("%d ", block[j]);
8518 block[j]= block[j]*4;
8519 if(j&1) block[j]= (block[j]*4 + 2)/5;
8520 if(j&4) block[j]= (block[j]*4 + 2)/5;
8524 s->dsp.h264_idct_add(ref, block, 4);
8525 /* for(j=0; j<16; j++){
8526 printf("%d ", ref[j]);
8530 for(j=0; j<16; j++){
8531 int diff= FFABS(src[j] - ref[j]);
8534 max_error= FFMAX(max_error, diff);
8537 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8539 printf("testing quantizer\n");
8540 for(qp=0; qp<52; qp++){
8542 src1_block[i]= src2_block[i]= random()%255;
8546 printf("Testing NAL layer\n");
8548 uint8_t bitstream[COUNT];
8549 uint8_t nal[COUNT*2];
8551 memset(&h, 0, sizeof(H264Context));
8553 for(i=0; i<COUNT; i++){
8561 for(j=0; j<COUNT; j++){
8562 bitstream[j]= (random() % 255) + 1;
8565 for(j=0; j<zeros; j++){
8566 int pos= random() % COUNT;
8567 while(bitstream[pos] == 0){
8576 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8578 printf("encoding failed\n");
8582 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8586 if(out_length != COUNT){
8587 printf("incorrect length %d %d\n", out_length, COUNT);
8591 if(consumed != nal_length){
8592 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8596 if(memcmp(bitstream, out, COUNT)){
8597 printf("missmatch\n");
8602 printf("Testing RBSP\n");
8610 static int decode_end(AVCodecContext *avctx)
8612 H264Context *h = avctx->priv_data;
8613 MpegEncContext *s = &h->s;
8615 av_freep(&h->rbsp_buffer);
8616 free_tables(h); //FIXME cleanup init stuff perhaps
8619 // memset(h, 0, sizeof(H264Context));
8625 AVCodec h264_decoder = {
8629 sizeof(H264Context),
8634 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8638 #ifdef CONFIG_H264_PARSER
8639 AVCodecParser h264_parser = {
8641 sizeof(H264Context),