2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
58 * Sequence parameter set
64 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
65 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
66 int poc_type; ///< pic_order_cnt_type
67 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
68 int delta_pic_order_always_zero_flag;
69 int offset_for_non_ref_pic;
70 int offset_for_top_to_bottom_field;
71 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
72 int ref_frame_count; ///< num_ref_frames
73 int gaps_in_frame_num_allowed_flag;
74 int mb_width; ///< frame_width_in_mbs_minus1 + 1
75 int mb_height; ///< frame_height_in_mbs_minus1 + 1
76 int frame_mbs_only_flag;
77 int mb_aff; ///<mb_adaptive_frame_field_flag
78 int direct_8x8_inference_flag;
79 int crop; ///< frame_cropping_flag
80 int crop_left; ///< frame_cropping_rect_left_offset
81 int crop_right; ///< frame_cropping_rect_right_offset
82 int crop_top; ///< frame_cropping_rect_top_offset
83 int crop_bottom; ///< frame_cropping_rect_bottom_offset
84 int vui_parameters_present_flag;
86 int timing_info_present_flag;
87 uint32_t num_units_in_tick;
89 int fixed_frame_rate_flag;
90 short offset_for_ref_frame[256]; //FIXME dyn aloc?
91 int bitstream_restriction_flag;
92 int num_reorder_frames;
93 int scaling_matrix_present;
94 uint8_t scaling_matrix4[6][16];
95 uint8_t scaling_matrix8[2][64];
99 * Picture parameter set
103 int cabac; ///< entropy_coding_mode_flag
104 int pic_order_present; ///< pic_order_present_flag
105 int slice_group_count; ///< num_slice_groups_minus1 + 1
106 int mb_slice_group_map_type;
107 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
108 int weighted_pred; ///< weighted_pred_flag
109 int weighted_bipred_idc;
110 int init_qp; ///< pic_init_qp_minus26 + 26
111 int init_qs; ///< pic_init_qs_minus26 + 26
112 int chroma_qp_index_offset;
113 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
114 int constrained_intra_pred; ///< constrained_intra_pred_flag
115 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
116 int transform_8x8_mode; ///< transform_8x8_mode_flag
117 uint8_t scaling_matrix4[6][16];
118 uint8_t scaling_matrix8[2][64];
122 * Memory management control operation opcode.
124 typedef enum MMCOOpcode{
135 * Memory management control operation.
146 typedef struct H264Context{
154 #define NAL_IDR_SLICE 5
159 #define NAL_END_SEQUENCE 10
160 #define NAL_END_STREAM 11
161 #define NAL_FILLER_DATA 12
162 #define NAL_SPS_EXT 13
163 #define NAL_AUXILIARY_SLICE 19
164 uint8_t *rbsp_buffer;
165 int rbsp_buffer_size;
168 * Used to parse AVC variant of h264
170 int is_avc; ///< this flag is != 0 if codec is avc1
171 int got_avcC; ///< flag used to parse avcC data only once
172 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
176 int prev_mb_skipped; //FIXME remove (IMHO not used)
179 int chroma_pred_mode;
180 int intra16x16_pred_mode;
185 int8_t intra4x4_pred_mode_cache[5*8];
186 int8_t (*intra4x4_pred_mode)[8];
187 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
188 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
189 void (*pred8x8 [4+3])(uint8_t *src, int stride);
190 void (*pred16x16[4+3])(uint8_t *src, int stride);
191 unsigned int topleft_samples_available;
192 unsigned int top_samples_available;
193 unsigned int topright_samples_available;
194 unsigned int left_samples_available;
195 uint8_t (*top_borders[2])[16+2*8];
196 uint8_t left_border[2*(17+2*9)];
199 * non zero coeff count cache.
200 * is 64 if not available.
202 uint8_t non_zero_count_cache[6*8] __align8;
203 uint8_t (*non_zero_count)[16];
206 * Motion vector cache.
208 int16_t mv_cache[2][5*8][2] __align8;
209 int8_t ref_cache[2][5*8] __align8;
210 #define LIST_NOT_USED -1 //FIXME rename?
211 #define PART_NOT_AVAILABLE -2
214 * is 1 if the specific list MV&references are set to 0,0,-2.
216 int mv_cache_clean[2];
219 * number of neighbors (top and/or left) that used 8x8 dct
221 int neighbor_transform_size;
224 * block_offset[ 0..23] for frame macroblocks
225 * block_offset[24..47] for field macroblocks
227 int block_offset[2*(16+8)];
229 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
231 int b_stride; //FIXME use s->b4_stride
237 int unknown_svq3_flag;
238 int next_slice_index;
240 SPS sps_buffer[MAX_SPS_COUNT];
241 SPS sps; ///< current sps
243 PPS pps_buffer[MAX_PPS_COUNT];
247 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
249 uint32_t dequant4_buffer[6][52][16];
250 uint32_t dequant8_buffer[2][52][64];
251 uint32_t (*dequant4_coeff[6])[16];
252 uint32_t (*dequant8_coeff[2])[64];
253 int dequant_coeff_pps; ///< reinit tables when pps changes
256 uint8_t *slice_table_base;
257 uint8_t *slice_table; ///< slice_table_base + mb_stride + 1
259 int slice_type_fixed;
261 //interlacing specific flags
263 int mb_field_decoding_flag;
270 int delta_poc_bottom;
273 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
274 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
275 int frame_num_offset; ///< for POC type 2
276 int prev_frame_num_offset; ///< for POC type 2
277 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
280 * frame_num for frames or 2*frame_num for field pics.
285 * max_frame_num or 2*max_frame_num for field pics.
289 //Weighted pred stuff
291 int use_weight_chroma;
292 int luma_log2_weight_denom;
293 int chroma_log2_weight_denom;
294 int luma_weight[2][16];
295 int luma_offset[2][16];
296 int chroma_weight[2][16][2];
297 int chroma_offset[2][16][2];
298 int implicit_weight[16][16];
301 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
302 int slice_alpha_c0_offset;
303 int slice_beta_offset;
305 int redundant_pic_count;
307 int direct_spatial_mv_pred;
308 int dist_scale_factor[16];
309 int map_col_to_list0[2][16];
312 * num_ref_idx_l0/1_active_minus1 + 1
314 int ref_count[2];// FIXME split for AFF
315 Picture *short_ref[32];
316 Picture *long_ref[32];
317 Picture default_ref_list[2][32];
318 Picture ref_list[2][32]; //FIXME size?
319 Picture field_ref_list[2][32]; //FIXME size?
320 Picture *delayed_pic[16]; //FIXME size?
321 Picture *delayed_output_pic;
324 * memory management control operations buffer.
326 MMCO mmco[MAX_MMCO_COUNT];
329 int long_ref_count; ///< number of actual long term references
330 int short_ref_count; ///< number of actual short term references
333 GetBitContext intra_gb;
334 GetBitContext inter_gb;
335 GetBitContext *intra_gb_ptr;
336 GetBitContext *inter_gb_ptr;
338 DCTELEM mb[16*24] __align8;
344 uint8_t cabac_state[460];
347 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
351 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
352 uint8_t *chroma_pred_mode_table;
353 int last_qscale_diff;
354 int16_t (*mvd_table[2])[2];
355 int16_t mvd_cache[2][5*8][2] __align8;
356 uint8_t *direct_table;
357 uint8_t direct_cache[5*8];
359 uint8_t zigzag_scan[16];
360 uint8_t field_scan[16];
361 const uint8_t *zigzag_scan_q0;
362 const uint8_t *field_scan_q0;
367 static VLC coeff_token_vlc[4];
368 static VLC chroma_dc_coeff_token_vlc;
370 static VLC total_zeros_vlc[15];
371 static VLC chroma_dc_total_zeros_vlc[3];
373 static VLC run_vlc[6];
376 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
377 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
378 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
380 static inline uint32_t pack16to32(int a, int b){
381 #ifdef WORDS_BIGENDIAN
382 return (b&0xFFFF) + (a<<16);
384 return (a&0xFFFF) + (b<<16);
390 * @param h height of the rectangle, should be a constant
391 * @param w width of the rectangle, should be a constant
392 * @param size the size of val (1 or 4), should be a constant
394 static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
395 uint8_t *p= (uint8_t*)vp;
396 assert(size==1 || size==4);
401 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
402 assert((stride&(w-1))==0);
403 //FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it
406 *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
407 }else if(w==2 && h==4){
408 *(uint16_t*)(p + 0*stride)=
409 *(uint16_t*)(p + 1*stride)=
410 *(uint16_t*)(p + 2*stride)=
411 *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
412 }else if(w==4 && h==1){
413 *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
414 }else if(w==4 && h==2){
415 *(uint32_t*)(p + 0*stride)=
416 *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
417 }else if(w==4 && h==4){
418 *(uint32_t*)(p + 0*stride)=
419 *(uint32_t*)(p + 1*stride)=
420 *(uint32_t*)(p + 2*stride)=
421 *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
422 }else if(w==8 && h==1){
424 *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
425 }else if(w==8 && h==2){
426 *(uint32_t*)(p + 0 + 0*stride)=
427 *(uint32_t*)(p + 4 + 0*stride)=
428 *(uint32_t*)(p + 0 + 1*stride)=
429 *(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101;
430 }else if(w==8 && h==4){
431 *(uint64_t*)(p + 0*stride)=
432 *(uint64_t*)(p + 1*stride)=
433 *(uint64_t*)(p + 2*stride)=
434 *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
435 }else if(w==16 && h==2){
436 *(uint64_t*)(p + 0+0*stride)=
437 *(uint64_t*)(p + 8+0*stride)=
438 *(uint64_t*)(p + 0+1*stride)=
439 *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
440 }else if(w==16 && h==4){
441 *(uint64_t*)(p + 0+0*stride)=
442 *(uint64_t*)(p + 8+0*stride)=
443 *(uint64_t*)(p + 0+1*stride)=
444 *(uint64_t*)(p + 8+1*stride)=
445 *(uint64_t*)(p + 0+2*stride)=
446 *(uint64_t*)(p + 8+2*stride)=
447 *(uint64_t*)(p + 0+3*stride)=
448 *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
453 static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
454 MpegEncContext * const s = &h->s;
455 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
456 int topleft_xy, top_xy, topright_xy, left_xy[2];
457 int topleft_type, top_type, topright_type, left_type[2];
461 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
462 // the actual condition is whether we're on the edge of a slice,
463 // and even then the intra and nnz parts are unnecessary.
464 if(for_deblock && h->slice_num == 1)
467 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
469 top_xy = mb_xy - s->mb_stride;
470 topleft_xy = top_xy - 1;
471 topright_xy= top_xy + 1;
472 left_xy[1] = left_xy[0] = mb_xy-1;
482 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
483 const int top_pair_xy = pair_xy - s->mb_stride;
484 const int topleft_pair_xy = top_pair_xy - 1;
485 const int topright_pair_xy = top_pair_xy + 1;
486 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
487 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
488 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
489 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
490 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
491 const int bottom = (s->mb_y & 1);
492 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
494 ? !curr_mb_frame_flag // bottom macroblock
495 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
497 top_xy -= s->mb_stride;
500 ? !curr_mb_frame_flag // bottom macroblock
501 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
503 topleft_xy -= s->mb_stride;
506 ? !curr_mb_frame_flag // bottom macroblock
507 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
509 topright_xy -= s->mb_stride;
511 if (left_mb_frame_flag != curr_mb_frame_flag) {
512 left_xy[1] = left_xy[0] = pair_xy - 1;
513 if (curr_mb_frame_flag) {
534 left_xy[1] += s->mb_stride;
547 h->top_mb_xy = top_xy;
548 h->left_mb_xy[0] = left_xy[0];
549 h->left_mb_xy[1] = left_xy[1];
551 topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
552 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
553 topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
554 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
555 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
557 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
558 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
559 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
560 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
561 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
564 if(IS_INTRA(mb_type)){
565 h->topleft_samples_available=
566 h->top_samples_available=
567 h->left_samples_available= 0xFFFF;
568 h->topright_samples_available= 0xEEEA;
570 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
571 h->topleft_samples_available= 0xB3FF;
572 h->top_samples_available= 0x33FF;
573 h->topright_samples_available= 0x26EA;
576 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
577 h->topleft_samples_available&= 0xDF5F;
578 h->left_samples_available&= 0x5F5F;
582 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
583 h->topleft_samples_available&= 0x7FFF;
585 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
586 h->topright_samples_available&= 0xFBFF;
588 if(IS_INTRA4x4(mb_type)){
589 if(IS_INTRA4x4(top_type)){
590 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
591 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
592 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
593 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
596 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
601 h->intra4x4_pred_mode_cache[4+8*0]=
602 h->intra4x4_pred_mode_cache[5+8*0]=
603 h->intra4x4_pred_mode_cache[6+8*0]=
604 h->intra4x4_pred_mode_cache[7+8*0]= pred;
607 if(IS_INTRA4x4(left_type[i])){
608 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
609 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
612 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
617 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
618 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
633 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
635 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
636 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
637 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
638 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
640 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
641 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
643 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
644 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
647 h->non_zero_count_cache[4+8*0]=
648 h->non_zero_count_cache[5+8*0]=
649 h->non_zero_count_cache[6+8*0]=
650 h->non_zero_count_cache[7+8*0]=
652 h->non_zero_count_cache[1+8*0]=
653 h->non_zero_count_cache[2+8*0]=
655 h->non_zero_count_cache[1+8*3]=
656 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
660 for (i=0; i<2; i++) {
662 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
663 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
664 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
665 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
667 h->non_zero_count_cache[3+8*1 + 2*8*i]=
668 h->non_zero_count_cache[3+8*2 + 2*8*i]=
669 h->non_zero_count_cache[0+8*1 + 8*i]=
670 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
677 h->top_cbp = h->cbp_table[top_xy];
678 } else if(IS_INTRA(mb_type)) {
685 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
686 } else if(IS_INTRA(mb_type)) {
692 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
695 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
700 //FIXME direct mb can skip much of this
701 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
703 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
704 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
705 /*if(!h->mv_cache_clean[list]){
706 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
707 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
708 h->mv_cache_clean[list]= 1;
712 h->mv_cache_clean[list]= 0;
714 if(IS_INTER(top_type)){
715 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
716 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
717 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
718 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
719 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
720 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
721 h->ref_cache[list][scan8[0] + 0 - 1*8]=
722 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
723 h->ref_cache[list][scan8[0] + 2 - 1*8]=
724 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
726 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
727 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
728 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
729 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
730 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
733 //FIXME unify cleanup or sth
734 if(IS_INTER(left_type[0])){
735 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
736 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
737 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
738 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
739 h->ref_cache[list][scan8[0] - 1 + 0*8]=
740 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
742 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
743 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
744 h->ref_cache[list][scan8[0] - 1 + 0*8]=
745 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
748 if(IS_INTER(left_type[1])){
749 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
750 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
751 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
752 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
753 h->ref_cache[list][scan8[0] - 1 + 2*8]=
754 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
756 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
757 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
758 h->ref_cache[list][scan8[0] - 1 + 2*8]=
759 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
760 assert((!left_type[0]) == (!left_type[1]));
763 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
766 if(IS_INTER(topleft_type)){
767 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
768 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
769 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
770 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
772 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
773 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
776 if(IS_INTER(topright_type)){
777 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
778 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
779 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
780 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
782 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
783 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
787 h->ref_cache[list][scan8[5 ]+1] =
788 h->ref_cache[list][scan8[7 ]+1] =
789 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
790 h->ref_cache[list][scan8[4 ]] =
791 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
792 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
793 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
794 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
795 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
796 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
799 /* XXX beurk, Load mvd */
800 if(IS_INTER(topleft_type)){
801 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
802 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy];
804 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0;
807 if(IS_INTER(top_type)){
808 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
809 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
810 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
811 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
812 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
814 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
815 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
816 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
817 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
819 if(IS_INTER(left_type[0])){
820 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
821 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
822 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
824 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
825 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
827 if(IS_INTER(left_type[1])){
828 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
829 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
830 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
832 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
833 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
835 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
836 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
837 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
838 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
839 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
841 if(h->slice_type == B_TYPE){
842 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
844 if(IS_DIRECT(top_type)){
845 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
846 }else if(IS_8X8(top_type)){
847 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
848 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
849 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
851 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
855 if(IS_DIRECT(left_type[0])){
856 h->direct_cache[scan8[0] - 1 + 0*8]=
857 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
858 }else if(IS_8X8(left_type[0])){
859 int b8_xy = h->mb2b8_xy[left_xy[0]] + 1;
860 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[b8_xy];
861 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[b8_xy + h->b8_stride];
863 h->direct_cache[scan8[0] - 1 + 0*8]=
864 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
872 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
875 static inline void write_back_intra_pred_mode(H264Context *h){
876 MpegEncContext * const s = &h->s;
877 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
879 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
880 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
881 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
882 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
883 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
884 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
885 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
889 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
891 static inline int check_intra4x4_pred_mode(H264Context *h){
892 MpegEncContext * const s = &h->s;
893 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
894 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
897 if(!(h->top_samples_available&0x8000)){
899 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
901 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
904 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
909 if(!(h->left_samples_available&0x8000)){
911 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
913 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
916 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
922 } //FIXME cleanup like next
925 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
927 static inline int check_intra_pred_mode(H264Context *h, int mode){
928 MpegEncContext * const s = &h->s;
929 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
930 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
932 if(mode < 0 || mode > 6) {
933 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
937 if(!(h->top_samples_available&0x8000)){
940 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
945 if(!(h->left_samples_available&0x8000)){
948 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
957 * gets the predicted intra4x4 prediction mode.
959 static inline int pred_intra_mode(H264Context *h, int n){
960 const int index8= scan8[n];
961 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
962 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
963 const int min= FFMIN(left, top);
965 tprintf("mode:%d %d min:%d\n", left ,top, min);
967 if(min<0) return DC_PRED;
971 static inline void write_back_non_zero_count(H264Context *h){
972 MpegEncContext * const s = &h->s;
973 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
975 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
976 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
977 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
978 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
979 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
980 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
981 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
983 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
984 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
985 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
987 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
988 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
989 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
993 * gets the predicted number of non zero coefficients.
994 * @param n block index
996 static inline int pred_non_zero_count(H264Context *h, int n){
997 const int index8= scan8[n];
998 const int left= h->non_zero_count_cache[index8 - 1];
999 const int top = h->non_zero_count_cache[index8 - 8];
1002 if(i<64) i= (i+1)>>1;
1004 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1009 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1010 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1012 if(topright_ref != PART_NOT_AVAILABLE){
1013 *C= h->mv_cache[list][ i - 8 + part_width ];
1014 return topright_ref;
1016 tprintf("topright MV not available\n");
1018 *C= h->mv_cache[list][ i - 8 - 1 ];
1019 return h->ref_cache[list][ i - 8 - 1 ];
1024 * gets the predicted MV.
1025 * @param n the block index
1026 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1027 * @param mx the x component of the predicted motion vector
1028 * @param my the y component of the predicted motion vector
1030 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1031 const int index8= scan8[n];
1032 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1033 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1034 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1035 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1037 int diagonal_ref, match_count;
1039 assert(part_width==1 || part_width==2 || part_width==4);
1049 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1050 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1051 tprintf("pred_motion match_count=%d\n", match_count);
1052 if(match_count > 1){ //most common
1053 *mx= mid_pred(A[0], B[0], C[0]);
1054 *my= mid_pred(A[1], B[1], C[1]);
1055 }else if(match_count==1){
1059 }else if(top_ref==ref){
1067 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1071 *mx= mid_pred(A[0], B[0], C[0]);
1072 *my= mid_pred(A[1], B[1], C[1]);
1076 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1080 * gets the directionally predicted 16x8 MV.
1081 * @param n the block index
1082 * @param mx the x component of the predicted motion vector
1083 * @param my the y component of the predicted motion vector
1085 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1087 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1088 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1090 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1098 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1099 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1101 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1103 if(left_ref == ref){
1111 pred_motion(h, n, 4, list, ref, mx, my);
1115 * gets the directionally predicted 8x16 MV.
1116 * @param n the block index
1117 * @param mx the x component of the predicted motion vector
1118 * @param my the y component of the predicted motion vector
1120 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1122 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1123 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1125 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1127 if(left_ref == ref){
1136 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1138 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1140 if(diagonal_ref == ref){
1148 pred_motion(h, n, 2, list, ref, mx, my);
1151 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1152 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1153 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1155 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1157 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1158 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1159 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1165 pred_motion(h, 0, 4, 0, 0, mx, my);
1170 static inline void direct_dist_scale_factor(H264Context * const h){
1171 const int poc = h->s.current_picture_ptr->poc;
1172 const int poc1 = h->ref_list[1][0].poc;
1174 for(i=0; i<h->ref_count[0]; i++){
1175 int poc0 = h->ref_list[0][i].poc;
1176 int td = clip(poc1 - poc0, -128, 127);
1177 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1178 h->dist_scale_factor[i] = 256;
1180 int tb = clip(poc - poc0, -128, 127);
1181 int tx = (16384 + (ABS(td) >> 1)) / td;
1182 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1186 static inline void direct_ref_list_init(H264Context * const h){
1187 MpegEncContext * const s = &h->s;
1188 Picture * const ref1 = &h->ref_list[1][0];
1189 Picture * const cur = s->current_picture_ptr;
1191 if(cur->pict_type == I_TYPE)
1192 cur->ref_count[0] = 0;
1193 if(cur->pict_type != B_TYPE)
1194 cur->ref_count[1] = 0;
1195 for(list=0; list<2; list++){
1196 cur->ref_count[list] = h->ref_count[list];
1197 for(j=0; j<h->ref_count[list]; j++)
1198 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1200 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1202 for(list=0; list<2; list++){
1203 for(i=0; i<ref1->ref_count[list]; i++){
1204 const int poc = ref1->ref_poc[list][i];
1205 h->map_col_to_list0[list][i] = PART_NOT_AVAILABLE;
1206 for(j=0; j<h->ref_count[list]; j++)
1207 if(h->ref_list[list][j].poc == poc){
1208 h->map_col_to_list0[list][i] = j;
1215 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1216 MpegEncContext * const s = &h->s;
1217 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1218 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1219 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1220 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1221 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1222 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1223 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1224 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1225 const int is_b8x8 = IS_8X8(*mb_type);
1229 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1230 /* FIXME save sub mb types from previous frames (or derive from MVs)
1231 * so we know exactly what block size to use */
1232 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1233 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1234 }else if(!is_b8x8 && (IS_16X16(mb_type_col) || IS_INTRA(mb_type_col))){
1235 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1236 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1238 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1239 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1242 *mb_type |= MB_TYPE_DIRECT2;
1244 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1246 if(h->direct_spatial_mv_pred){
1251 /* ref = min(neighbors) */
1252 for(list=0; list<2; list++){
1253 int refa = h->ref_cache[list][scan8[0] - 1];
1254 int refb = h->ref_cache[list][scan8[0] - 8];
1255 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1257 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1259 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1261 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1267 if(ref[0] < 0 && ref[1] < 0){
1268 ref[0] = ref[1] = 0;
1269 mv[0][0] = mv[0][1] =
1270 mv[1][0] = mv[1][1] = 0;
1272 for(list=0; list<2; list++){
1274 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1276 mv[list][0] = mv[list][1] = 0;
1281 *mb_type &= ~MB_TYPE_P0L1;
1282 sub_mb_type &= ~MB_TYPE_P0L1;
1283 }else if(ref[0] < 0){
1284 *mb_type &= ~MB_TYPE_P0L0;
1285 sub_mb_type &= ~MB_TYPE_P0L0;
1288 if(IS_16X16(*mb_type)){
1289 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1290 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1291 if(!IS_INTRA(mb_type_col)
1292 && ( (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1)
1293 || (l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1
1294 && (h->x264_build>33 || !h->x264_build)))){
1296 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1298 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1300 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1302 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1304 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1305 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1308 for(i8=0; i8<4; i8++){
1309 const int x8 = i8&1;
1310 const int y8 = i8>>1;
1312 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1314 h->sub_mb_type[i8] = sub_mb_type;
1316 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1317 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1318 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1319 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1322 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1323 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1324 && (h->x264_build>33 || !h->x264_build)))){
1325 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1326 if(IS_SUB_8X8(sub_mb_type)){
1327 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1328 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1330 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1332 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1335 for(i4=0; i4<4; i4++){
1336 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1337 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1339 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1341 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1347 }else{ /* direct temporal mv pred */
1348 if(IS_16X16(*mb_type)){
1349 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1350 if(IS_INTRA(mb_type_col)){
1351 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1352 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1353 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1355 const int ref0 = l1ref0[0] >= 0 ? h->map_col_to_list0[0][l1ref0[0]]
1356 : h->map_col_to_list0[1][l1ref1[0]];
1357 const int dist_scale_factor = h->dist_scale_factor[ref0];
1358 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1360 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1361 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1362 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1363 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1364 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1367 for(i8=0; i8<4; i8++){
1368 const int x8 = i8&1;
1369 const int y8 = i8>>1;
1370 int ref0, dist_scale_factor;
1371 const int16_t (*l1mv)[2]= l1mv0;
1373 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1375 h->sub_mb_type[i8] = sub_mb_type;
1376 if(IS_INTRA(mb_type_col)){
1377 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1378 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1379 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1380 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1384 ref0 = l1ref0[x8 + y8*h->b8_stride];
1386 ref0 = h->map_col_to_list0[0][ref0];
1388 ref0 = h->map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1391 dist_scale_factor = h->dist_scale_factor[ref0];
1393 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1394 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1395 if(IS_SUB_8X8(sub_mb_type)){
1396 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1397 int mx = (dist_scale_factor * mv_col[0] + 128) >> 8;
1398 int my = (dist_scale_factor * mv_col[1] + 128) >> 8;
1399 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1400 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1402 for(i4=0; i4<4; i4++){
1403 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1404 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1405 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1406 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1407 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1408 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1415 static inline void write_back_motion(H264Context *h, int mb_type){
1416 MpegEncContext * const s = &h->s;
1417 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1418 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1421 for(list=0; list<2; list++){
1423 if(!USES_LIST(mb_type, list)){
1424 if(1){ //FIXME skip or never read if mb_type doesn't use it
1426 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
1427 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
1429 if( h->pps.cabac ) {
1430 /* FIXME needed ? */
1432 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]=
1433 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= 0;
1437 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]=
1438 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= LIST_NOT_USED;
1445 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1446 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1448 if( h->pps.cabac ) {
1450 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1451 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1455 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
1456 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
1460 if(h->slice_type == B_TYPE && h->pps.cabac){
1461 if(IS_8X8(mb_type)){
1462 h->direct_table[b8_xy+1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1463 h->direct_table[b8_xy+0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1464 h->direct_table[b8_xy+1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1470 * Decodes a network abstraction layer unit.
1471 * @param consumed is the number of bytes used as input
1472 * @param length is the length of the array
1473 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1474 * @returns decoded bytes, might be src+1 if no escapes
1476 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1480 // src[0]&0x80; //forbidden bit
1481 h->nal_ref_idc= src[0]>>5;
1482 h->nal_unit_type= src[0]&0x1F;
1486 for(i=0; i<length; i++)
1487 printf("%2X ", src[i]);
1489 for(i=0; i+1<length; i+=2){
1490 if(src[i]) continue;
1491 if(i>0 && src[i-1]==0) i--;
1492 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1494 /* startcode, so we must be past the end */
1501 if(i>=length-1){ //no escaped 0
1502 *dst_length= length;
1503 *consumed= length+1; //+1 for the header
1507 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1508 dst= h->rbsp_buffer;
1510 //printf("decoding esc\n");
1513 //remove escapes (very rare 1:2^22)
1514 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1515 if(src[si+2]==3){ //escape
1520 }else //next start code
1524 dst[di++]= src[si++];
1528 *consumed= si + 1;//+1 for the header
1529 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1535 * @param src the data which should be escaped
1536 * @param dst the target buffer, dst+1 == src is allowed as a special case
1537 * @param length the length of the src data
1538 * @param dst_length the length of the dst array
1539 * @returns length of escaped data in bytes or -1 if an error occured
1541 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1542 int i, escape_count, si, di;
1546 assert(dst_length>0);
1548 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1550 if(length==0) return 1;
1553 for(i=0; i<length; i+=2){
1554 if(src[i]) continue;
1555 if(i>0 && src[i-1]==0)
1557 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1563 if(escape_count==0){
1565 memcpy(dst+1, src, length);
1569 if(length + escape_count + 1> dst_length)
1572 //this should be damn rare (hopefully)
1574 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1575 temp= h->rbsp_buffer;
1576 //printf("encoding esc\n");
1581 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1582 temp[di++]= 0; si++;
1583 temp[di++]= 0; si++;
1585 temp[di++]= src[si++];
1588 temp[di++]= src[si++];
1590 memcpy(dst+1, temp, length+escape_count);
1592 assert(di == length+escape_count);
1598 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1600 static void encode_rbsp_trailing(PutBitContext *pb){
1603 length= (-put_bits_count(pb))&7;
1604 if(length) put_bits(pb, length, 0);
1609 * identifies the exact end of the bitstream
1610 * @return the length of the trailing, or 0 if damaged
1612 static int decode_rbsp_trailing(uint8_t *src){
1616 tprintf("rbsp trailing %X\n", v);
1626 * idct tranforms the 16 dc values and dequantize them.
1627 * @param qp quantization parameter
1629 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1632 int temp[16]; //FIXME check if this is a good idea
1633 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1634 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1636 //memset(block, 64, 2*256);
1639 const int offset= y_offset[i];
1640 const int z0= block[offset+stride*0] + block[offset+stride*4];
1641 const int z1= block[offset+stride*0] - block[offset+stride*4];
1642 const int z2= block[offset+stride*1] - block[offset+stride*5];
1643 const int z3= block[offset+stride*1] + block[offset+stride*5];
1652 const int offset= x_offset[i];
1653 const int z0= temp[4*0+i] + temp[4*2+i];
1654 const int z1= temp[4*0+i] - temp[4*2+i];
1655 const int z2= temp[4*1+i] - temp[4*3+i];
1656 const int z3= temp[4*1+i] + temp[4*3+i];
1658 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1659 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1660 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1661 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1667 * dct tranforms the 16 dc values.
1668 * @param qp quantization parameter ??? FIXME
1670 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1671 // const int qmul= dequant_coeff[qp][0];
1673 int temp[16]; //FIXME check if this is a good idea
1674 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1675 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1678 const int offset= y_offset[i];
1679 const int z0= block[offset+stride*0] + block[offset+stride*4];
1680 const int z1= block[offset+stride*0] - block[offset+stride*4];
1681 const int z2= block[offset+stride*1] - block[offset+stride*5];
1682 const int z3= block[offset+stride*1] + block[offset+stride*5];
1691 const int offset= x_offset[i];
1692 const int z0= temp[4*0+i] + temp[4*2+i];
1693 const int z1= temp[4*0+i] - temp[4*2+i];
1694 const int z2= temp[4*1+i] - temp[4*3+i];
1695 const int z3= temp[4*1+i] + temp[4*3+i];
1697 block[stride*0 +offset]= (z0 + z3)>>1;
1698 block[stride*2 +offset]= (z1 + z2)>>1;
1699 block[stride*8 +offset]= (z1 - z2)>>1;
1700 block[stride*10+offset]= (z0 - z3)>>1;
1708 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1709 const int stride= 16*2;
1710 const int xStride= 16;
1713 a= block[stride*0 + xStride*0];
1714 b= block[stride*0 + xStride*1];
1715 c= block[stride*1 + xStride*0];
1716 d= block[stride*1 + xStride*1];
1723 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1724 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1725 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1726 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1730 static void chroma_dc_dct_c(DCTELEM *block){
1731 const int stride= 16*2;
1732 const int xStride= 16;
1735 a= block[stride*0 + xStride*0];
1736 b= block[stride*0 + xStride*1];
1737 c= block[stride*1 + xStride*0];
1738 d= block[stride*1 + xStride*1];
1745 block[stride*0 + xStride*0]= (a+c);
1746 block[stride*0 + xStride*1]= (e+b);
1747 block[stride*1 + xStride*0]= (a-c);
1748 block[stride*1 + xStride*1]= (e-b);
1753 * gets the chroma qp.
1755 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1757 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
1762 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1764 //FIXME try int temp instead of block
1767 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1768 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1769 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1770 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1771 const int z0= d0 + d3;
1772 const int z3= d0 - d3;
1773 const int z1= d1 + d2;
1774 const int z2= d1 - d2;
1776 block[0 + 4*i]= z0 + z1;
1777 block[1 + 4*i]= 2*z3 + z2;
1778 block[2 + 4*i]= z0 - z1;
1779 block[3 + 4*i]= z3 - 2*z2;
1783 const int z0= block[0*4 + i] + block[3*4 + i];
1784 const int z3= block[0*4 + i] - block[3*4 + i];
1785 const int z1= block[1*4 + i] + block[2*4 + i];
1786 const int z2= block[1*4 + i] - block[2*4 + i];
1788 block[0*4 + i]= z0 + z1;
1789 block[1*4 + i]= 2*z3 + z2;
1790 block[2*4 + i]= z0 - z1;
1791 block[3*4 + i]= z3 - 2*z2;
1796 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1797 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1798 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1800 const int * const quant_table= quant_coeff[qscale];
1801 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1802 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1803 const unsigned int threshold2= (threshold1<<1);
1809 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1810 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1811 const unsigned int dc_threshold2= (dc_threshold1<<1);
1813 int level= block[0]*quant_coeff[qscale+18][0];
1814 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1816 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1819 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1822 // last_non_zero = i;
1827 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1828 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1829 const unsigned int dc_threshold2= (dc_threshold1<<1);
1831 int level= block[0]*quant_table[0];
1832 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1834 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1837 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1840 // last_non_zero = i;
1853 const int j= scantable[i];
1854 int level= block[j]*quant_table[j];
1856 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1857 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1858 if(((unsigned)(level+threshold1))>threshold2){
1860 level= (bias + level)>>QUANT_SHIFT;
1863 level= (bias - level)>>QUANT_SHIFT;
1872 return last_non_zero;
1875 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1876 const uint32_t a= ((uint32_t*)(src-stride))[0];
1877 ((uint32_t*)(src+0*stride))[0]= a;
1878 ((uint32_t*)(src+1*stride))[0]= a;
1879 ((uint32_t*)(src+2*stride))[0]= a;
1880 ((uint32_t*)(src+3*stride))[0]= a;
1883 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1884 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1885 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1886 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1887 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1890 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1891 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1892 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1894 ((uint32_t*)(src+0*stride))[0]=
1895 ((uint32_t*)(src+1*stride))[0]=
1896 ((uint32_t*)(src+2*stride))[0]=
1897 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1900 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1901 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1903 ((uint32_t*)(src+0*stride))[0]=
1904 ((uint32_t*)(src+1*stride))[0]=
1905 ((uint32_t*)(src+2*stride))[0]=
1906 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1909 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1910 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1912 ((uint32_t*)(src+0*stride))[0]=
1913 ((uint32_t*)(src+1*stride))[0]=
1914 ((uint32_t*)(src+2*stride))[0]=
1915 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1918 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1919 ((uint32_t*)(src+0*stride))[0]=
1920 ((uint32_t*)(src+1*stride))[0]=
1921 ((uint32_t*)(src+2*stride))[0]=
1922 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1926 #define LOAD_TOP_RIGHT_EDGE\
1927 const int t4= topright[0];\
1928 const int t5= topright[1];\
1929 const int t6= topright[2];\
1930 const int t7= topright[3];\
1932 #define LOAD_LEFT_EDGE\
1933 const int l0= src[-1+0*stride];\
1934 const int l1= src[-1+1*stride];\
1935 const int l2= src[-1+2*stride];\
1936 const int l3= src[-1+3*stride];\
1938 #define LOAD_TOP_EDGE\
1939 const int t0= src[ 0-1*stride];\
1940 const int t1= src[ 1-1*stride];\
1941 const int t2= src[ 2-1*stride];\
1942 const int t3= src[ 3-1*stride];\
1944 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1945 const int lt= src[-1-1*stride];
1949 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
1951 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
1954 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
1958 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1961 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1963 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1964 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1967 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1972 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1974 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1977 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1981 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1984 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1986 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1987 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1990 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1991 const int lt= src[-1-1*stride];
1994 const __attribute__((unused)) int unu= l3;
1997 src[1+2*stride]=(lt + t0 + 1)>>1;
1999 src[2+2*stride]=(t0 + t1 + 1)>>1;
2001 src[3+2*stride]=(t1 + t2 + 1)>>1;
2002 src[3+0*stride]=(t2 + t3 + 1)>>1;
2004 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2006 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2008 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2009 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2010 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2011 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2014 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2017 const __attribute__((unused)) int unu= t7;
2019 src[0+0*stride]=(t0 + t1 + 1)>>1;
2021 src[0+2*stride]=(t1 + t2 + 1)>>1;
2023 src[1+2*stride]=(t2 + t3 + 1)>>1;
2025 src[2+2*stride]=(t3 + t4+ 1)>>1;
2026 src[3+2*stride]=(t4 + t5+ 1)>>1;
2027 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2029 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2031 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2033 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2034 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2037 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2040 src[0+0*stride]=(l0 + l1 + 1)>>1;
2041 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2043 src[0+1*stride]=(l1 + l2 + 1)>>1;
2045 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2047 src[0+2*stride]=(l2 + l3 + 1)>>1;
2049 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2058 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2059 const int lt= src[-1-1*stride];
2062 const __attribute__((unused)) int unu= t3;
2065 src[2+1*stride]=(lt + l0 + 1)>>1;
2067 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2068 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2069 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2071 src[2+2*stride]=(l0 + l1 + 1)>>1;
2073 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2075 src[2+3*stride]=(l1 + l2+ 1)>>1;
2077 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2078 src[0+3*stride]=(l2 + l3 + 1)>>1;
2079 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2082 static void pred16x16_vertical_c(uint8_t *src, int stride){
2084 const uint32_t a= ((uint32_t*)(src-stride))[0];
2085 const uint32_t b= ((uint32_t*)(src-stride))[1];
2086 const uint32_t c= ((uint32_t*)(src-stride))[2];
2087 const uint32_t d= ((uint32_t*)(src-stride))[3];
2089 for(i=0; i<16; i++){
2090 ((uint32_t*)(src+i*stride))[0]= a;
2091 ((uint32_t*)(src+i*stride))[1]= b;
2092 ((uint32_t*)(src+i*stride))[2]= c;
2093 ((uint32_t*)(src+i*stride))[3]= d;
2097 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2100 for(i=0; i<16; i++){
2101 ((uint32_t*)(src+i*stride))[0]=
2102 ((uint32_t*)(src+i*stride))[1]=
2103 ((uint32_t*)(src+i*stride))[2]=
2104 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2108 static void pred16x16_dc_c(uint8_t *src, int stride){
2112 dc+= src[-1+i*stride];
2119 dc= 0x01010101*((dc + 16)>>5);
2121 for(i=0; i<16; i++){
2122 ((uint32_t*)(src+i*stride))[0]=
2123 ((uint32_t*)(src+i*stride))[1]=
2124 ((uint32_t*)(src+i*stride))[2]=
2125 ((uint32_t*)(src+i*stride))[3]= dc;
2129 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2133 dc+= src[-1+i*stride];
2136 dc= 0x01010101*((dc + 8)>>4);
2138 for(i=0; i<16; i++){
2139 ((uint32_t*)(src+i*stride))[0]=
2140 ((uint32_t*)(src+i*stride))[1]=
2141 ((uint32_t*)(src+i*stride))[2]=
2142 ((uint32_t*)(src+i*stride))[3]= dc;
2146 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2152 dc= 0x01010101*((dc + 8)>>4);
2154 for(i=0; i<16; i++){
2155 ((uint32_t*)(src+i*stride))[0]=
2156 ((uint32_t*)(src+i*stride))[1]=
2157 ((uint32_t*)(src+i*stride))[2]=
2158 ((uint32_t*)(src+i*stride))[3]= dc;
2162 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2165 for(i=0; i<16; i++){
2166 ((uint32_t*)(src+i*stride))[0]=
2167 ((uint32_t*)(src+i*stride))[1]=
2168 ((uint32_t*)(src+i*stride))[2]=
2169 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2173 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2176 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2177 const uint8_t * const src0 = src+7-stride;
2178 const uint8_t *src1 = src+8*stride-1;
2179 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2180 int H = src0[1] - src0[-1];
2181 int V = src1[0] - src2[ 0];
2182 for(k=2; k<=8; ++k) {
2183 src1 += stride; src2 -= stride;
2184 H += k*(src0[k] - src0[-k]);
2185 V += k*(src1[0] - src2[ 0]);
2188 H = ( 5*(H/4) ) / 16;
2189 V = ( 5*(V/4) ) / 16;
2191 /* required for 100% accuracy */
2192 i = H; H = V; V = i;
2194 H = ( 5*H+32 ) >> 6;
2195 V = ( 5*V+32 ) >> 6;
2198 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2199 for(j=16; j>0; --j) {
2202 for(i=-16; i<0; i+=4) {
2203 src[16+i] = cm[ (b ) >> 5 ];
2204 src[17+i] = cm[ (b+ H) >> 5 ];
2205 src[18+i] = cm[ (b+2*H) >> 5 ];
2206 src[19+i] = cm[ (b+3*H) >> 5 ];
2213 static void pred16x16_plane_c(uint8_t *src, int stride){
2214 pred16x16_plane_compat_c(src, stride, 0);
2217 static void pred8x8_vertical_c(uint8_t *src, int stride){
2219 const uint32_t a= ((uint32_t*)(src-stride))[0];
2220 const uint32_t b= ((uint32_t*)(src-stride))[1];
2223 ((uint32_t*)(src+i*stride))[0]= a;
2224 ((uint32_t*)(src+i*stride))[1]= b;
2228 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2232 ((uint32_t*)(src+i*stride))[0]=
2233 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2237 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2241 ((uint32_t*)(src+i*stride))[0]=
2242 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2246 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2252 dc0+= src[-1+i*stride];
2253 dc2+= src[-1+(i+4)*stride];
2255 dc0= 0x01010101*((dc0 + 2)>>2);
2256 dc2= 0x01010101*((dc2 + 2)>>2);
2259 ((uint32_t*)(src+i*stride))[0]=
2260 ((uint32_t*)(src+i*stride))[1]= dc0;
2263 ((uint32_t*)(src+i*stride))[0]=
2264 ((uint32_t*)(src+i*stride))[1]= dc2;
2268 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2274 dc0+= src[i-stride];
2275 dc1+= src[4+i-stride];
2277 dc0= 0x01010101*((dc0 + 2)>>2);
2278 dc1= 0x01010101*((dc1 + 2)>>2);
2281 ((uint32_t*)(src+i*stride))[0]= dc0;
2282 ((uint32_t*)(src+i*stride))[1]= dc1;
2285 ((uint32_t*)(src+i*stride))[0]= dc0;
2286 ((uint32_t*)(src+i*stride))[1]= dc1;
2291 static void pred8x8_dc_c(uint8_t *src, int stride){
2293 int dc0, dc1, dc2, dc3;
2297 dc0+= src[-1+i*stride] + src[i-stride];
2298 dc1+= src[4+i-stride];
2299 dc2+= src[-1+(i+4)*stride];
2301 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2302 dc0= 0x01010101*((dc0 + 4)>>3);
2303 dc1= 0x01010101*((dc1 + 2)>>2);
2304 dc2= 0x01010101*((dc2 + 2)>>2);
2307 ((uint32_t*)(src+i*stride))[0]= dc0;
2308 ((uint32_t*)(src+i*stride))[1]= dc1;
2311 ((uint32_t*)(src+i*stride))[0]= dc2;
2312 ((uint32_t*)(src+i*stride))[1]= dc3;
2316 static void pred8x8_plane_c(uint8_t *src, int stride){
2319 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2320 const uint8_t * const src0 = src+3-stride;
2321 const uint8_t *src1 = src+4*stride-1;
2322 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2323 int H = src0[1] - src0[-1];
2324 int V = src1[0] - src2[ 0];
2325 for(k=2; k<=4; ++k) {
2326 src1 += stride; src2 -= stride;
2327 H += k*(src0[k] - src0[-k]);
2328 V += k*(src1[0] - src2[ 0]);
2330 H = ( 17*H+16 ) >> 5;
2331 V = ( 17*V+16 ) >> 5;
2333 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2334 for(j=8; j>0; --j) {
2337 src[0] = cm[ (b ) >> 5 ];
2338 src[1] = cm[ (b+ H) >> 5 ];
2339 src[2] = cm[ (b+2*H) >> 5 ];
2340 src[3] = cm[ (b+3*H) >> 5 ];
2341 src[4] = cm[ (b+4*H) >> 5 ];
2342 src[5] = cm[ (b+5*H) >> 5 ];
2343 src[6] = cm[ (b+6*H) >> 5 ];
2344 src[7] = cm[ (b+7*H) >> 5 ];
2349 #define SRC(x,y) src[(x)+(y)*stride]
2351 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2352 #define PREDICT_8x8_LOAD_LEFT \
2353 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2354 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2355 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2356 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2359 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2360 #define PREDICT_8x8_LOAD_TOP \
2361 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2362 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2363 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2364 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2365 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2368 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2369 #define PREDICT_8x8_LOAD_TOPRIGHT \
2370 int t8, t9, t10, t11, t12, t13, t14, t15; \
2371 if(has_topright) { \
2372 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2373 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2374 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2376 #define PREDICT_8x8_LOAD_TOPLEFT \
2377 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2379 #define PREDICT_8x8_DC(v) \
2381 for( y = 0; y < 8; y++ ) { \
2382 ((uint32_t*)src)[0] = \
2383 ((uint32_t*)src)[1] = v; \
2387 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2389 PREDICT_8x8_DC(0x80808080);
2391 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2393 PREDICT_8x8_LOAD_LEFT;
2394 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2397 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2399 PREDICT_8x8_LOAD_TOP;
2400 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2403 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2405 PREDICT_8x8_LOAD_LEFT;
2406 PREDICT_8x8_LOAD_TOP;
2407 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2408 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2411 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2413 PREDICT_8x8_LOAD_LEFT;
2414 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2415 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2416 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2419 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2422 PREDICT_8x8_LOAD_TOP;
2431 for( y = 1; y < 8; y++ )
2432 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2434 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2436 PREDICT_8x8_LOAD_TOP;
2437 PREDICT_8x8_LOAD_TOPRIGHT;
2438 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2439 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2440 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2441 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2442 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2443 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2444 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2445 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2446 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2447 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2448 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2449 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2450 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2451 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2452 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2454 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2456 PREDICT_8x8_LOAD_TOP;
2457 PREDICT_8x8_LOAD_LEFT;
2458 PREDICT_8x8_LOAD_TOPLEFT;
2459 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2460 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2461 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2462 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2463 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2464 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2465 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2466 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2467 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2468 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2469 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2470 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2471 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2472 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2473 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2476 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2478 PREDICT_8x8_LOAD_TOP;
2479 PREDICT_8x8_LOAD_LEFT;
2480 PREDICT_8x8_LOAD_TOPLEFT;
2481 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2482 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2483 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2484 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2485 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2486 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2487 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2488 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2489 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2490 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2491 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2492 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2493 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2494 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2495 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2496 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2497 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2498 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2499 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2500 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2501 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2502 SRC(7,0)= (t6 + t7 + 1) >> 1;
2504 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2506 PREDICT_8x8_LOAD_TOP;
2507 PREDICT_8x8_LOAD_LEFT;
2508 PREDICT_8x8_LOAD_TOPLEFT;
2509 SRC(0,7)= (l6 + l7 + 1) >> 1;
2510 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2511 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2512 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2513 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2514 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2515 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2516 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2517 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2518 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2519 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2520 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2521 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2522 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2523 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2524 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2525 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2526 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2527 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2528 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2529 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2530 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2532 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2534 PREDICT_8x8_LOAD_TOP;
2535 PREDICT_8x8_LOAD_TOPRIGHT;
2536 SRC(0,0)= (t0 + t1 + 1) >> 1;
2537 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2538 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2539 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2540 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2541 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2542 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2543 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2544 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2545 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2546 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2547 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2548 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2549 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2550 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2551 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2552 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2553 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2554 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2555 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2556 SRC(7,6)= (t10 + t11 + 1) >> 1;
2557 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2559 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2561 PREDICT_8x8_LOAD_LEFT;
2562 SRC(0,0)= (l0 + l1 + 1) >> 1;
2563 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2564 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2565 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2566 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2567 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2568 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2569 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2570 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2571 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2572 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2573 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2574 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2575 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2576 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2577 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2578 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2579 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2581 #undef PREDICT_8x8_LOAD_LEFT
2582 #undef PREDICT_8x8_LOAD_TOP
2583 #undef PREDICT_8x8_LOAD_TOPLEFT
2584 #undef PREDICT_8x8_LOAD_TOPRIGHT
2585 #undef PREDICT_8x8_DC
2591 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2592 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2593 int src_x_offset, int src_y_offset,
2594 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2595 MpegEncContext * const s = &h->s;
2596 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2597 const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2598 const int luma_xy= (mx&3) + ((my&3)<<2);
2599 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
2600 uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
2601 uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
2602 int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
2603 int extra_height= extra_width;
2605 const int full_mx= mx>>2;
2606 const int full_my= my>>2;
2607 const int pic_width = 16*s->mb_width;
2608 const int pic_height = 16*s->mb_height;
2610 assert(pic->data[0]);
2612 if(mx&7) extra_width -= 3;
2613 if(my&7) extra_height -= 3;
2615 if( full_mx < 0-extra_width
2616 || full_my < 0-extra_height
2617 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2618 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2619 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2620 src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
2624 qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
2626 qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
2629 if(s->flags&CODEC_FLAG_GRAY) return;
2632 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2633 src_cb= s->edge_emu_buffer;
2635 chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
2638 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2639 src_cr= s->edge_emu_buffer;
2641 chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
2644 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2645 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2646 int x_offset, int y_offset,
2647 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2648 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2649 int list0, int list1){
2650 MpegEncContext * const s = &h->s;
2651 qpel_mc_func *qpix_op= qpix_put;
2652 h264_chroma_mc_func chroma_op= chroma_put;
2654 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2655 dest_cb += x_offset + y_offset*s->uvlinesize;
2656 dest_cr += x_offset + y_offset*s->uvlinesize;
2657 x_offset += 8*s->mb_x;
2658 y_offset += 8*s->mb_y;
2661 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2662 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2663 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2664 qpix_op, chroma_op);
2667 chroma_op= chroma_avg;
2671 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2672 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2673 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2674 qpix_op, chroma_op);
2678 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2679 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2680 int x_offset, int y_offset,
2681 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2682 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2683 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2684 int list0, int list1){
2685 MpegEncContext * const s = &h->s;
2687 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2688 dest_cb += x_offset + y_offset*s->uvlinesize;
2689 dest_cr += x_offset + y_offset*s->uvlinesize;
2690 x_offset += 8*s->mb_x;
2691 y_offset += 8*s->mb_y;
2694 /* don't optimize for luma-only case, since B-frames usually
2695 * use implicit weights => chroma too. */
2696 uint8_t *tmp_cb = s->obmc_scratchpad;
2697 uint8_t *tmp_cr = tmp_cb + 8*s->uvlinesize;
2698 uint8_t *tmp_y = tmp_cr + 8*s->uvlinesize;
2699 int refn0 = h->ref_cache[0][ scan8[n] ];
2700 int refn1 = h->ref_cache[1][ scan8[n] ];
2702 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2703 dest_y, dest_cb, dest_cr,
2704 x_offset, y_offset, qpix_put, chroma_put);
2705 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2706 tmp_y, tmp_cb, tmp_cr,
2707 x_offset, y_offset, qpix_put, chroma_put);
2709 if(h->use_weight == 2){
2710 int weight0 = h->implicit_weight[refn0][refn1];
2711 int weight1 = 64 - weight0;
2712 luma_weight_avg( dest_y, tmp_y, s-> linesize, 5, weight0, weight1, 0, 0);
2713 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0, 0);
2714 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0, 0);
2716 luma_weight_avg(dest_y, tmp_y, s->linesize, h->luma_log2_weight_denom,
2717 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2718 h->luma_offset[0][refn0], h->luma_offset[1][refn1]);
2719 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2720 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2721 h->chroma_offset[0][refn0][0], h->chroma_offset[1][refn1][0]);
2722 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2723 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2724 h->chroma_offset[0][refn0][1], h->chroma_offset[1][refn1][1]);
2727 int list = list1 ? 1 : 0;
2728 int refn = h->ref_cache[list][ scan8[n] ];
2729 Picture *ref= &h->ref_list[list][refn];
2730 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2731 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2732 qpix_put, chroma_put);
2734 luma_weight_op(dest_y, s->linesize, h->luma_log2_weight_denom,
2735 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2736 if(h->use_weight_chroma){
2737 chroma_weight_op(dest_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2738 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2739 chroma_weight_op(dest_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2740 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2745 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2746 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2747 int x_offset, int y_offset,
2748 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2749 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2750 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2751 int list0, int list1){
2752 if((h->use_weight==2 && list0 && list1
2753 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2754 || h->use_weight==1)
2755 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2756 x_offset, y_offset, qpix_put, chroma_put,
2757 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2759 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2760 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2763 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2764 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2765 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2766 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2767 MpegEncContext * const s = &h->s;
2768 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2769 const int mb_type= s->current_picture.mb_type[mb_xy];
2771 assert(IS_INTER(mb_type));
2773 if(IS_16X16(mb_type)){
2774 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2775 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2776 &weight_op[0], &weight_avg[0],
2777 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2778 }else if(IS_16X8(mb_type)){
2779 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2780 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2781 &weight_op[1], &weight_avg[1],
2782 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2783 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2784 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2785 &weight_op[1], &weight_avg[1],
2786 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2787 }else if(IS_8X16(mb_type)){
2788 mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
2789 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2790 &weight_op[2], &weight_avg[2],
2791 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2792 mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
2793 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2794 &weight_op[2], &weight_avg[2],
2795 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2799 assert(IS_8X8(mb_type));
2802 const int sub_mb_type= h->sub_mb_type[i];
2804 int x_offset= (i&1)<<2;
2805 int y_offset= (i&2)<<1;
2807 if(IS_SUB_8X8(sub_mb_type)){
2808 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2809 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2810 &weight_op[3], &weight_avg[3],
2811 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2812 }else if(IS_SUB_8X4(sub_mb_type)){
2813 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2814 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2815 &weight_op[4], &weight_avg[4],
2816 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2817 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2818 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2819 &weight_op[4], &weight_avg[4],
2820 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2821 }else if(IS_SUB_4X8(sub_mb_type)){
2822 mc_part(h, n , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2823 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2824 &weight_op[5], &weight_avg[5],
2825 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2826 mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2827 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2828 &weight_op[5], &weight_avg[5],
2829 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2832 assert(IS_SUB_4X4(sub_mb_type));
2834 int sub_x_offset= x_offset + 2*(j&1);
2835 int sub_y_offset= y_offset + (j&2);
2836 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2837 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2838 &weight_op[6], &weight_avg[6],
2839 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2846 static void decode_init_vlc(H264Context *h){
2847 static int done = 0;
2853 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
2854 &chroma_dc_coeff_token_len [0], 1, 1,
2855 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
2858 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
2859 &coeff_token_len [i][0], 1, 1,
2860 &coeff_token_bits[i][0], 1, 1, 1);
2864 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2865 &chroma_dc_total_zeros_len [i][0], 1, 1,
2866 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
2868 for(i=0; i<15; i++){
2869 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
2870 &total_zeros_len [i][0], 1, 1,
2871 &total_zeros_bits[i][0], 1, 1, 1);
2875 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2876 &run_len [i][0], 1, 1,
2877 &run_bits[i][0], 1, 1, 1);
2879 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2880 &run_len [6][0], 1, 1,
2881 &run_bits[6][0], 1, 1, 1);
2886 * Sets the intra prediction function pointers.
2888 static void init_pred_ptrs(H264Context *h){
2889 // MpegEncContext * const s = &h->s;
2891 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
2892 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
2893 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
2894 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2895 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2896 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
2897 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
2898 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
2899 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
2900 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
2901 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
2902 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
2904 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
2905 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
2906 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
2907 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
2908 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
2909 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
2910 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
2911 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
2912 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
2913 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
2914 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
2915 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
2917 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
2918 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
2919 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
2920 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
2921 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
2922 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
2923 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
2925 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
2926 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
2927 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
2928 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
2929 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
2930 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
2931 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
2934 static void free_tables(H264Context *h){
2935 av_freep(&h->intra4x4_pred_mode);
2936 av_freep(&h->chroma_pred_mode_table);
2937 av_freep(&h->cbp_table);
2938 av_freep(&h->mvd_table[0]);
2939 av_freep(&h->mvd_table[1]);
2940 av_freep(&h->direct_table);
2941 av_freep(&h->non_zero_count);
2942 av_freep(&h->slice_table_base);
2943 av_freep(&h->top_borders[1]);
2944 av_freep(&h->top_borders[0]);
2945 h->slice_table= NULL;
2947 av_freep(&h->mb2b_xy);
2948 av_freep(&h->mb2b8_xy);
2950 av_freep(&h->s.obmc_scratchpad);
2953 static void init_dequant8_coeff_table(H264Context *h){
2955 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2956 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2958 for(i=0; i<2; i++ ){
2959 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2960 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2964 for(q=0; q<52; q++){
2965 int shift = div6[q];
2968 h->dequant8_coeff[i][q][x] = ((uint32_t)dequant8_coeff_init[idx][
2969 dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * h->pps.scaling_matrix8[i][x]) << shift;
2974 static void init_dequant4_coeff_table(H264Context *h){
2976 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2977 for(i=0; i<6; i++ ){
2978 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2980 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2981 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2988 for(q=0; q<52; q++){
2989 int shift = div6[q] + 2;
2992 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2993 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2994 h->pps.scaling_matrix4[i][x]) << shift;
2999 static void init_dequant_tables(H264Context *h){
3001 init_dequant4_coeff_table(h);
3002 if(h->pps.transform_8x8_mode)
3003 init_dequant8_coeff_table(h);
3004 if(h->sps.transform_bypass){
3007 h->dequant4_coeff[i][0][x] = 1<<6;
3008 if(h->pps.transform_8x8_mode)
3011 h->dequant8_coeff[i][0][x] = 1<<6;
3018 * needs width/height
3020 static int alloc_tables(H264Context *h){
3021 MpegEncContext * const s = &h->s;
3022 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3025 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3027 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3028 CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t))
3029 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3030 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3031 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3033 if( h->pps.cabac ) {
3034 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3035 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3036 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3037 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3040 memset(h->slice_table_base, -1, big_mb_num * sizeof(uint8_t));
3041 h->slice_table= h->slice_table_base + s->mb_stride + 1;
3043 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3044 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3045 for(y=0; y<s->mb_height; y++){
3046 for(x=0; x<s->mb_width; x++){
3047 const int mb_xy= x + y*s->mb_stride;
3048 const int b_xy = 4*x + 4*y*h->b_stride;
3049 const int b8_xy= 2*x + 2*y*h->b8_stride;
3051 h->mb2b_xy [mb_xy]= b_xy;
3052 h->mb2b8_xy[mb_xy]= b8_xy;
3056 s->obmc_scratchpad = NULL;
3058 if(!h->dequant4_coeff[0])
3059 init_dequant_tables(h);
3067 static void common_init(H264Context *h){
3068 MpegEncContext * const s = &h->s;
3070 s->width = s->avctx->width;
3071 s->height = s->avctx->height;
3072 s->codec_id= s->avctx->codec->id;
3076 h->dequant_coeff_pps= -1;
3077 s->unrestricted_mv=1;
3078 s->decode=1; //FIXME
3080 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3081 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3084 static int decode_init(AVCodecContext *avctx){
3085 H264Context *h= avctx->priv_data;
3086 MpegEncContext * const s = &h->s;
3088 MPV_decode_defaults(s);
3093 s->out_format = FMT_H264;
3094 s->workaround_bugs= avctx->workaround_bugs;
3097 // s->decode_mb= ff_h263_decode_mb;
3099 avctx->pix_fmt= PIX_FMT_YUV420P;
3103 if(avctx->extradata_size > 0 && avctx->extradata &&
3104 *(char *)avctx->extradata == 1){
3114 static int frame_start(H264Context *h){
3115 MpegEncContext * const s = &h->s;
3118 if(MPV_frame_start(s, s->avctx) < 0)
3120 ff_er_frame_start(s);
3122 assert(s->linesize && s->uvlinesize);
3124 for(i=0; i<16; i++){
3125 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3126 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3129 h->block_offset[16+i]=
3130 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3131 h->block_offset[24+16+i]=
3132 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3135 /* can't be in alloc_tables because linesize isn't known there.
3136 * FIXME: redo bipred weight to not require extra buffer? */
3137 if(!s->obmc_scratchpad)
3138 s->obmc_scratchpad = av_malloc(16*s->linesize + 2*8*s->uvlinesize);
3140 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3144 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3145 MpegEncContext * const s = &h->s;
3149 src_cb -= uvlinesize;
3150 src_cr -= uvlinesize;
3152 // There are two lines saved, the line above the the top macroblock of a pair,
3153 // and the line above the bottom macroblock
3154 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3155 for(i=1; i<17; i++){
3156 h->left_border[i]= src_y[15+i* linesize];
3159 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3160 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3162 if(!(s->flags&CODEC_FLAG_GRAY)){
3163 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3164 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3166 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3167 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3169 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3170 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3174 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3175 MpegEncContext * const s = &h->s;
3178 int deblock_left = (s->mb_x > 0);
3179 int deblock_top = (s->mb_y > 0);
3181 src_y -= linesize + 1;
3182 src_cb -= uvlinesize + 1;
3183 src_cr -= uvlinesize + 1;
3185 #define XCHG(a,b,t,xchg)\
3192 for(i = !deblock_top; i<17; i++){
3193 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3198 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3199 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3200 if(s->mb_x+1 < s->mb_width){
3201 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3205 if(!(s->flags&CODEC_FLAG_GRAY)){
3207 for(i = !deblock_top; i<9; i++){
3208 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3209 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3213 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3214 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3219 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3220 MpegEncContext * const s = &h->s;
3223 src_y -= 2 * linesize;
3224 src_cb -= 2 * uvlinesize;
3225 src_cr -= 2 * uvlinesize;
3227 // There are two lines saved, the line above the the top macroblock of a pair,
3228 // and the line above the bottom macroblock
3229 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3230 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3231 for(i=2; i<34; i++){
3232 h->left_border[i]= src_y[15+i* linesize];
3235 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3236 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3237 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3238 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3240 if(!(s->flags&CODEC_FLAG_GRAY)){
3241 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3242 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3243 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3244 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3245 for(i=2; i<18; i++){
3246 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3247 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3249 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3250 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3251 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3252 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3256 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3257 MpegEncContext * const s = &h->s;
3260 int deblock_left = (s->mb_x > 0);
3261 int deblock_top = (s->mb_y > 0);
3263 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3265 src_y -= 2 * linesize + 1;
3266 src_cb -= 2 * uvlinesize + 1;
3267 src_cr -= 2 * uvlinesize + 1;
3269 #define XCHG(a,b,t,xchg)\
3276 for(i = (!deblock_top)<<1; i<34; i++){
3277 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3282 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3283 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3284 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3285 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3288 if(!(s->flags&CODEC_FLAG_GRAY)){
3290 for(i = (!deblock_top) << 1; i<18; i++){
3291 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3292 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3296 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3297 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3298 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3299 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3304 static void hl_decode_mb(H264Context *h){
3305 MpegEncContext * const s = &h->s;
3306 const int mb_x= s->mb_x;
3307 const int mb_y= s->mb_y;
3308 const int mb_xy= mb_x + mb_y*s->mb_stride;
3309 const int mb_type= s->current_picture.mb_type[mb_xy];
3310 uint8_t *dest_y, *dest_cb, *dest_cr;
3311 int linesize, uvlinesize /*dct_offset*/;
3313 int *block_offset = &h->block_offset[0];
3314 const unsigned int bottom = mb_y & 1;
3315 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3316 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3321 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3322 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3323 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3325 if (h->mb_field_decoding_flag) {
3326 linesize = s->linesize * 2;
3327 uvlinesize = s->uvlinesize * 2;
3328 block_offset = &h->block_offset[24];
3329 if(mb_y&1){ //FIXME move out of this func?
3330 dest_y -= s->linesize*15;
3331 dest_cb-= s->uvlinesize*7;
3332 dest_cr-= s->uvlinesize*7;
3335 linesize = s->linesize;
3336 uvlinesize = s->uvlinesize;
3337 // dct_offset = s->linesize * 16;
3340 idct_add = transform_bypass
3341 ? IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4
3342 : IS_8x8DCT(mb_type) ? s->dsp.h264_idct8_add : s->dsp.h264_idct_add;
3344 if (IS_INTRA_PCM(mb_type)) {
3347 // The pixels are stored in h->mb array in the same order as levels,
3348 // copy them in output in the correct order.
3349 for(i=0; i<16; i++) {
3350 for (y=0; y<4; y++) {
3351 for (x=0; x<4; x++) {
3352 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3356 for(i=16; i<16+4; i++) {
3357 for (y=0; y<4; y++) {
3358 for (x=0; x<4; x++) {
3359 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3363 for(i=20; i<20+4; i++) {
3364 for (y=0; y<4; y++) {
3365 for (x=0; x<4; x++) {
3366 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3371 if(IS_INTRA(mb_type)){
3372 if(h->deblocking_filter) {
3373 if (h->mb_aff_frame) {
3375 xchg_pair_border(h, dest_y, dest_cb, dest_cr, s->linesize, s->uvlinesize, 1);
3377 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3381 if(!(s->flags&CODEC_FLAG_GRAY)){
3382 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3383 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3386 if(IS_INTRA4x4(mb_type)){
3388 if(IS_8x8DCT(mb_type)){
3389 for(i=0; i<16; i+=4){
3390 uint8_t * const ptr= dest_y + block_offset[i];
3391 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3392 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3393 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3394 if(h->non_zero_count_cache[ scan8[i] ])
3395 idct_add(ptr, h->mb + i*16, linesize);
3398 for(i=0; i<16; i++){
3399 uint8_t * const ptr= dest_y + block_offset[i];
3401 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3404 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3405 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3406 assert(mb_y || linesize <= block_offset[i]);
3407 if(!topright_avail){
3408 tr= ptr[3 - linesize]*0x01010101;
3409 topright= (uint8_t*) &tr;
3411 topright= ptr + 4 - linesize;
3415 h->pred4x4[ dir ](ptr, topright, linesize);
3416 if(h->non_zero_count_cache[ scan8[i] ]){
3417 if(s->codec_id == CODEC_ID_H264)
3418 idct_add(ptr, h->mb + i*16, linesize);
3420 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3425 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3426 if(s->codec_id == CODEC_ID_H264){
3427 if(!transform_bypass)
3428 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3430 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3432 if(h->deblocking_filter) {
3433 if (h->mb_aff_frame) {
3435 uint8_t *pair_dest_y = s->current_picture.data[0] + ((mb_y-1) * 16* s->linesize ) + mb_x * 16;
3436 uint8_t *pair_dest_cb = s->current_picture.data[1] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3437 uint8_t *pair_dest_cr = s->current_picture.data[2] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3439 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3443 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3446 }else if(s->codec_id == CODEC_ID_H264){
3447 hl_motion(h, dest_y, dest_cb, dest_cr,
3448 s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
3449 s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
3450 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3454 if(!IS_INTRA4x4(mb_type)){
3455 if(s->codec_id == CODEC_ID_H264){
3456 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3457 for(i=0; i<16; i+=di){
3458 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3459 uint8_t * const ptr= dest_y + block_offset[i];
3460 idct_add(ptr, h->mb + i*16, linesize);
3464 for(i=0; i<16; i++){
3465 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3466 uint8_t * const ptr= dest_y + block_offset[i];
3467 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3473 if(!(s->flags&CODEC_FLAG_GRAY)){
3474 idct_add = transform_bypass ? s->dsp.add_pixels4 : s->dsp.h264_idct_add;
3475 if(!transform_bypass){
3476 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3477 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3479 if(s->codec_id == CODEC_ID_H264){
3480 for(i=16; i<16+4; i++){
3481 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3482 uint8_t * const ptr= dest_cb + block_offset[i];
3483 idct_add(ptr, h->mb + i*16, uvlinesize);
3486 for(i=20; i<20+4; i++){
3487 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3488 uint8_t * const ptr= dest_cr + block_offset[i];
3489 idct_add(ptr, h->mb + i*16, uvlinesize);
3493 for(i=16; i<16+4; i++){
3494 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3495 uint8_t * const ptr= dest_cb + block_offset[i];
3496 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3499 for(i=20; i<20+4; i++){
3500 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3501 uint8_t * const ptr= dest_cr + block_offset[i];
3502 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3508 if(h->deblocking_filter) {
3509 if (h->mb_aff_frame) {
3510 const int mb_y = s->mb_y - 1;
3511 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3512 const int mb_xy= mb_x + mb_y*s->mb_stride;
3513 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3514 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3515 uint8_t tmp = s->current_picture.data[1][384];
3516 if (!bottom) return;
3517 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3518 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3519 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3521 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3522 // TODO deblock a pair
3525 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3526 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3527 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3528 if (tmp != s->current_picture.data[1][384]) {
3529 tprintf("modified pixel 8,1 (1)\n");
3533 tprintf("call mbaff filter_mb\n");
3534 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3535 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3536 if (tmp != s->current_picture.data[1][384]) {
3537 tprintf("modified pixel 8,1 (2)\n");
3540 tprintf("call filter_mb\n");
3541 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3542 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3543 filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3549 * fills the default_ref_list.
3551 static int fill_default_ref_list(H264Context *h){
3552 MpegEncContext * const s = &h->s;
3554 int smallest_poc_greater_than_current = -1;
3555 Picture sorted_short_ref[32];
3557 if(h->slice_type==B_TYPE){
3561 /* sort frame according to poc in B slice */
3562 for(out_i=0; out_i<h->short_ref_count; out_i++){
3564 int best_poc=INT_MAX;
3566 for(i=0; i<h->short_ref_count; i++){
3567 const int poc= h->short_ref[i]->poc;
3568 if(poc > limit && poc < best_poc){
3574 assert(best_i != INT_MIN);
3577 sorted_short_ref[out_i]= *h->short_ref[best_i];
3578 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3579 if (-1 == smallest_poc_greater_than_current) {
3580 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3581 smallest_poc_greater_than_current = out_i;
3587 if(s->picture_structure == PICT_FRAME){
3588 if(h->slice_type==B_TYPE){
3590 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3592 // find the largest poc
3593 for(list=0; list<2; list++){
3596 int step= list ? -1 : 1;
3598 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3599 while(j<0 || j>= h->short_ref_count){
3600 if(j != -99 && step == (list ? -1 : 1))
3603 j= smallest_poc_greater_than_current + (step>>1);
3605 if(sorted_short_ref[j].reference != 3) continue;
3606 h->default_ref_list[list][index ]= sorted_short_ref[j];
3607 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3610 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3611 if(h->long_ref[i] == NULL) continue;
3612 if(h->long_ref[i]->reference != 3) continue;
3614 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3615 h->default_ref_list[ list ][index++].pic_id= i;;
3618 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3619 // swap the two first elements of L1 when
3620 // L0 and L1 are identical
3621 Picture temp= h->default_ref_list[1][0];
3622 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3623 h->default_ref_list[1][1] = temp;
3626 if(index < h->ref_count[ list ])
3627 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3631 for(i=0; i<h->short_ref_count; i++){
3632 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3633 h->default_ref_list[0][index ]= *h->short_ref[i];
3634 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3636 for(i = 0; i < 16; i++){
3637 if(h->long_ref[i] == NULL) continue;
3638 if(h->long_ref[i]->reference != 3) continue;
3639 h->default_ref_list[0][index ]= *h->long_ref[i];
3640 h->default_ref_list[0][index++].pic_id= i;;
3642 if(index < h->ref_count[0])
3643 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3646 if(h->slice_type==B_TYPE){
3648 //FIXME second field balh
3652 for (i=0; i<h->ref_count[0]; i++) {
3653 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3655 if(h->slice_type==B_TYPE){
3656 for (i=0; i<h->ref_count[1]; i++) {
3657 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3664 static void print_short_term(H264Context *h);
3665 static void print_long_term(H264Context *h);
3667 static int decode_ref_pic_list_reordering(H264Context *h){
3668 MpegEncContext * const s = &h->s;
3671 print_short_term(h);
3673 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3675 for(list=0; list<2; list++){
3676 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3678 if(get_bits1(&s->gb)){
3679 int pred= h->curr_pic_num;
3681 for(index=0; ; index++){
3682 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3685 Picture *ref = NULL;
3687 if(reordering_of_pic_nums_idc==3)
3690 if(index >= h->ref_count[list]){
3691 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3695 if(reordering_of_pic_nums_idc<3){
3696 if(reordering_of_pic_nums_idc<2){
3697 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3699 if(abs_diff_pic_num >= h->max_pic_num){
3700 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3704 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3705 else pred+= abs_diff_pic_num;
3706 pred &= h->max_pic_num - 1;
3708 for(i= h->short_ref_count-1; i>=0; i--){
3709 ref = h->short_ref[i];
3710 assert(ref->reference == 3);
3711 assert(!ref->long_ref);
3712 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3716 ref->pic_id= ref->frame_num;
3718 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3719 ref = h->long_ref[pic_id];
3720 ref->pic_id= pic_id;
3721 assert(ref->reference == 3);
3722 assert(ref->long_ref);
3727 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3728 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3730 for(i=index; i+1<h->ref_count[list]; i++){
3731 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3734 for(; i > index; i--){
3735 h->ref_list[list][i]= h->ref_list[list][i-1];
3737 h->ref_list[list][index]= *ref;
3740 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3746 if(h->slice_type!=B_TYPE) break;
3748 for(list=0; list<2; list++){
3749 for(index= 0; index < h->ref_count[list]; index++){
3750 if(!h->ref_list[list][index].data[0])
3751 h->ref_list[list][index]= s->current_picture;
3753 if(h->slice_type!=B_TYPE) break;
3756 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3757 direct_dist_scale_factor(h);
3758 direct_ref_list_init(h);
3762 static int pred_weight_table(H264Context *h){
3763 MpegEncContext * const s = &h->s;
3765 int luma_def, chroma_def;
3768 h->use_weight_chroma= 0;
3769 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3770 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3771 luma_def = 1<<h->luma_log2_weight_denom;
3772 chroma_def = 1<<h->chroma_log2_weight_denom;
3774 for(list=0; list<2; list++){
3775 for(i=0; i<h->ref_count[list]; i++){
3776 int luma_weight_flag, chroma_weight_flag;
3778 luma_weight_flag= get_bits1(&s->gb);
3779 if(luma_weight_flag){
3780 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3781 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3782 if( h->luma_weight[list][i] != luma_def
3783 || h->luma_offset[list][i] != 0)
3786 h->luma_weight[list][i]= luma_def;
3787 h->luma_offset[list][i]= 0;
3790 chroma_weight_flag= get_bits1(&s->gb);
3791 if(chroma_weight_flag){
3794 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3795 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3796 if( h->chroma_weight[list][i][j] != chroma_def
3797 || h->chroma_offset[list][i][j] != 0)
3798 h->use_weight_chroma= 1;
3803 h->chroma_weight[list][i][j]= chroma_def;
3804 h->chroma_offset[list][i][j]= 0;
3808 if(h->slice_type != B_TYPE) break;
3810 h->use_weight= h->use_weight || h->use_weight_chroma;
3814 static void implicit_weight_table(H264Context *h){
3815 MpegEncContext * const s = &h->s;
3817 int cur_poc = s->current_picture_ptr->poc;
3819 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3820 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3822 h->use_weight_chroma= 0;
3827 h->use_weight_chroma= 2;
3828 h->luma_log2_weight_denom= 5;
3829 h->chroma_log2_weight_denom= 5;
3832 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3833 int poc0 = h->ref_list[0][ref0].poc;
3834 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3835 int poc1 = h->ref_list[1][ref1].poc;
3836 int td = clip(poc1 - poc0, -128, 127);
3838 int tb = clip(cur_poc - poc0, -128, 127);
3839 int tx = (16384 + (ABS(td) >> 1)) / td;
3840 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3841 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3842 h->implicit_weight[ref0][ref1] = 32;
3844 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3846 h->implicit_weight[ref0][ref1] = 32;
3851 static inline void unreference_pic(H264Context *h, Picture *pic){
3854 if(pic == h->delayed_output_pic)
3857 for(i = 0; h->delayed_pic[i]; i++)
3858 if(pic == h->delayed_pic[i]){
3866 * instantaneous decoder refresh.
3868 static void idr(H264Context *h){
3871 for(i=0; i<16; i++){
3872 if (h->long_ref[i] != NULL) {
3873 unreference_pic(h, h->long_ref[i]);
3874 h->long_ref[i]= NULL;
3877 h->long_ref_count=0;
3879 for(i=0; i<h->short_ref_count; i++){
3880 unreference_pic(h, h->short_ref[i]);
3881 h->short_ref[i]= NULL;
3883 h->short_ref_count=0;
3886 /* forget old pics after a seek */
3887 static void flush_dpb(AVCodecContext *avctx){
3888 H264Context *h= avctx->priv_data;
3891 h->delayed_pic[i]= NULL;
3892 h->delayed_output_pic= NULL;
3894 if(h->s.current_picture_ptr)
3895 h->s.current_picture_ptr->reference= 0;
3900 * @return the removed picture or NULL if an error occurs
3902 static Picture * remove_short(H264Context *h, int frame_num){
3903 MpegEncContext * const s = &h->s;
3906 if(s->avctx->debug&FF_DEBUG_MMCO)
3907 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3909 for(i=0; i<h->short_ref_count; i++){
3910 Picture *pic= h->short_ref[i];
3911 if(s->avctx->debug&FF_DEBUG_MMCO)
3912 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3913 if(pic->frame_num == frame_num){
3914 h->short_ref[i]= NULL;
3915 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
3916 h->short_ref_count--;
3925 * @return the removed picture or NULL if an error occurs
3927 static Picture * remove_long(H264Context *h, int i){
3930 pic= h->long_ref[i];
3931 h->long_ref[i]= NULL;
3932 if(pic) h->long_ref_count--;
3938 * print short term list
3940 static void print_short_term(H264Context *h) {
3942 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3943 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3944 for(i=0; i<h->short_ref_count; i++){
3945 Picture *pic= h->short_ref[i];
3946 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3952 * print long term list
3954 static void print_long_term(H264Context *h) {
3956 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3957 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3958 for(i = 0; i < 16; i++){
3959 Picture *pic= h->long_ref[i];
3961 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3968 * Executes the reference picture marking (memory management control operations).
3970 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3971 MpegEncContext * const s = &h->s;
3973 int current_is_long=0;
3976 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3977 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3979 for(i=0; i<mmco_count; i++){
3980 if(s->avctx->debug&FF_DEBUG_MMCO)
3981 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
3983 switch(mmco[i].opcode){
3984 case MMCO_SHORT2UNUSED:
3985 pic= remove_short(h, mmco[i].short_frame_num);
3987 unreference_pic(h, pic);
3988 else if(s->avctx->debug&FF_DEBUG_MMCO)
3989 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
3991 case MMCO_SHORT2LONG:
3992 pic= remove_long(h, mmco[i].long_index);
3993 if(pic) unreference_pic(h, pic);
3995 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
3996 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3997 h->long_ref_count++;
3999 case MMCO_LONG2UNUSED:
4000 pic= remove_long(h, mmco[i].long_index);
4002 unreference_pic(h, pic);
4003 else if(s->avctx->debug&FF_DEBUG_MMCO)
4004 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4007 pic= remove_long(h, mmco[i].long_index);
4008 if(pic) unreference_pic(h, pic);
4010 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4011 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4012 h->long_ref_count++;
4016 case MMCO_SET_MAX_LONG:
4017 assert(mmco[i].long_index <= 16);
4018 // just remove the long term which index is greater than new max
4019 for(j = mmco[i].long_index; j<16; j++){
4020 pic = remove_long(h, j);
4021 if (pic) unreference_pic(h, pic);
4025 while(h->short_ref_count){
4026 pic= remove_short(h, h->short_ref[0]->frame_num);
4027 unreference_pic(h, pic);
4029 for(j = 0; j < 16; j++) {
4030 pic= remove_long(h, j);
4031 if(pic) unreference_pic(h, pic);
4038 if(!current_is_long){
4039 pic= remove_short(h, s->current_picture_ptr->frame_num);
4041 unreference_pic(h, pic);
4042 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4045 if(h->short_ref_count)
4046 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4048 h->short_ref[0]= s->current_picture_ptr;
4049 h->short_ref[0]->long_ref=0;
4050 h->short_ref_count++;
4053 print_short_term(h);
4058 static int decode_ref_pic_marking(H264Context *h){
4059 MpegEncContext * const s = &h->s;
4062 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4063 s->broken_link= get_bits1(&s->gb) -1;
4064 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4065 if(h->mmco[0].long_index == -1)
4068 h->mmco[0].opcode= MMCO_LONG;
4072 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4073 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4074 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4076 h->mmco[i].opcode= opcode;
4077 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4078 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4079 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4080 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4084 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4085 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4086 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4087 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4092 if(opcode > MMCO_LONG){
4093 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4096 if(opcode == MMCO_END)
4101 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4103 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4104 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4105 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4115 static int init_poc(H264Context *h){
4116 MpegEncContext * const s = &h->s;
4117 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4120 if(h->nal_unit_type == NAL_IDR_SLICE){
4121 h->frame_num_offset= 0;
4123 if(h->frame_num < h->prev_frame_num)
4124 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4126 h->frame_num_offset= h->prev_frame_num_offset;
4129 if(h->sps.poc_type==0){
4130 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4132 if(h->nal_unit_type == NAL_IDR_SLICE){
4137 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4138 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4139 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4140 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4142 h->poc_msb = h->prev_poc_msb;
4143 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4145 field_poc[1] = h->poc_msb + h->poc_lsb;
4146 if(s->picture_structure == PICT_FRAME)
4147 field_poc[1] += h->delta_poc_bottom;
4148 }else if(h->sps.poc_type==1){
4149 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4152 if(h->sps.poc_cycle_length != 0)
4153 abs_frame_num = h->frame_num_offset + h->frame_num;
4157 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4160 expected_delta_per_poc_cycle = 0;
4161 for(i=0; i < h->sps.poc_cycle_length; i++)
4162 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4164 if(abs_frame_num > 0){
4165 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4166 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4168 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4169 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4170 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4174 if(h->nal_ref_idc == 0)
4175 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4177 field_poc[0] = expectedpoc + h->delta_poc[0];
4178 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4180 if(s->picture_structure == PICT_FRAME)
4181 field_poc[1] += h->delta_poc[1];
4184 if(h->nal_unit_type == NAL_IDR_SLICE){
4187 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4188 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4194 if(s->picture_structure != PICT_BOTTOM_FIELD)
4195 s->current_picture_ptr->field_poc[0]= field_poc[0];
4196 if(s->picture_structure != PICT_TOP_FIELD)
4197 s->current_picture_ptr->field_poc[1]= field_poc[1];
4198 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4199 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4205 * decodes a slice header.
4206 * this will allso call MPV_common_init() and frame_start() as needed
4208 static int decode_slice_header(H264Context *h){
4209 MpegEncContext * const s = &h->s;
4210 int first_mb_in_slice, pps_id;
4211 int num_ref_idx_active_override_flag;
4212 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4214 int default_ref_list_done = 0;
4216 s->current_picture.reference= h->nal_ref_idc != 0;
4217 s->dropable= h->nal_ref_idc == 0;
4219 first_mb_in_slice= get_ue_golomb(&s->gb);
4221 slice_type= get_ue_golomb(&s->gb);
4223 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4228 h->slice_type_fixed=1;
4230 h->slice_type_fixed=0;
4232 slice_type= slice_type_map[ slice_type ];
4233 if (slice_type == I_TYPE
4234 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4235 default_ref_list_done = 1;
4237 h->slice_type= slice_type;
4239 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4241 pps_id= get_ue_golomb(&s->gb);
4243 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4246 h->pps= h->pps_buffer[pps_id];
4247 if(h->pps.slice_group_count == 0){
4248 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4252 h->sps= h->sps_buffer[ h->pps.sps_id ];
4253 if(h->sps.log2_max_frame_num == 0){
4254 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4258 if(h->dequant_coeff_pps != pps_id){
4259 h->dequant_coeff_pps = pps_id;
4260 init_dequant_tables(h);
4263 s->mb_width= h->sps.mb_width;
4264 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4266 h->b_stride= s->mb_width*4 + 1;
4267 h->b8_stride= s->mb_width*2 + 1;
4269 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4270 if(h->sps.frame_mbs_only_flag)
4271 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4273 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4275 if (s->context_initialized
4276 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4280 if (!s->context_initialized) {
4281 if (MPV_common_init(s) < 0)
4284 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4285 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4286 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4289 for(i=0; i<16; i++){
4290 #define T(x) (x>>2) | ((x<<2) & 0xF)
4291 h->zigzag_scan[i] = T(zigzag_scan[i]);
4292 h-> field_scan[i] = T( field_scan[i]);
4295 if(h->sps.transform_bypass){ //FIXME same ugly
4296 h->zigzag_scan_q0 = zigzag_scan;
4297 h->field_scan_q0 = field_scan;
4299 h->zigzag_scan_q0 = h->zigzag_scan;
4300 h->field_scan_q0 = h->field_scan;
4305 s->avctx->width = s->width;
4306 s->avctx->height = s->height;
4307 s->avctx->sample_aspect_ratio= h->sps.sar;
4308 if(!s->avctx->sample_aspect_ratio.den)
4309 s->avctx->sample_aspect_ratio.den = 1;
4311 if(h->sps.timing_info_present_flag){
4312 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
4316 if(h->slice_num == 0){
4317 if(frame_start(h) < 0)
4321 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4322 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4324 h->mb_aff_frame = 0;
4325 if(h->sps.frame_mbs_only_flag){
4326 s->picture_structure= PICT_FRAME;
4328 if(get_bits1(&s->gb)) { //field_pic_flag
4329 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4331 s->picture_structure= PICT_FRAME;
4332 first_mb_in_slice <<= h->sps.mb_aff;
4333 h->mb_aff_frame = h->sps.mb_aff;
4337 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4338 s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width;
4339 if(s->mb_y >= s->mb_height){
4343 if(s->picture_structure==PICT_FRAME){
4344 h->curr_pic_num= h->frame_num;
4345 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4347 h->curr_pic_num= 2*h->frame_num;
4348 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4351 if(h->nal_unit_type == NAL_IDR_SLICE){
4352 get_ue_golomb(&s->gb); /* idr_pic_id */
4355 if(h->sps.poc_type==0){
4356 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4358 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4359 h->delta_poc_bottom= get_se_golomb(&s->gb);
4363 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4364 h->delta_poc[0]= get_se_golomb(&s->gb);
4366 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4367 h->delta_poc[1]= get_se_golomb(&s->gb);
4372 if(h->pps.redundant_pic_cnt_present){
4373 h->redundant_pic_count= get_ue_golomb(&s->gb);
4376 //set defaults, might be overriden a few line later
4377 h->ref_count[0]= h->pps.ref_count[0];
4378 h->ref_count[1]= h->pps.ref_count[1];
4380 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4381 if(h->slice_type == B_TYPE){
4382 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4384 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4386 if(num_ref_idx_active_override_flag){
4387 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4388 if(h->slice_type==B_TYPE)
4389 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4391 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4392 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4398 if(!default_ref_list_done){
4399 fill_default_ref_list(h);
4402 if(decode_ref_pic_list_reordering(h) < 0)
4405 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4406 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4407 pred_weight_table(h);
4408 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4409 implicit_weight_table(h);
4413 if(s->current_picture.reference)
4414 decode_ref_pic_marking(h);
4416 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4417 h->cabac_init_idc = get_ue_golomb(&s->gb);
4419 h->last_qscale_diff = 0;
4420 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4421 if(s->qscale<0 || s->qscale>51){
4422 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4425 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4426 //FIXME qscale / qp ... stuff
4427 if(h->slice_type == SP_TYPE){
4428 get_bits1(&s->gb); /* sp_for_switch_flag */
4430 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4431 get_se_golomb(&s->gb); /* slice_qs_delta */
4434 h->deblocking_filter = 1;
4435 h->slice_alpha_c0_offset = 0;
4436 h->slice_beta_offset = 0;
4437 if( h->pps.deblocking_filter_parameters_present ) {
4438 h->deblocking_filter= get_ue_golomb(&s->gb);
4439 if(h->deblocking_filter < 2)
4440 h->deblocking_filter^= 1; // 1<->0
4442 if( h->deblocking_filter ) {
4443 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4444 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4447 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4448 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4449 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4450 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4451 h->deblocking_filter= 0;
4454 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4455 slice_group_change_cycle= get_bits(&s->gb, ?);
4460 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4461 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4463 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4465 av_get_pict_type_char(h->slice_type),
4466 pps_id, h->frame_num,
4467 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4468 h->ref_count[0], h->ref_count[1],
4470 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4472 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4482 static inline int get_level_prefix(GetBitContext *gb){
4486 OPEN_READER(re, gb);
4487 UPDATE_CACHE(re, gb);
4488 buf=GET_CACHE(re, gb);
4490 log= 32 - av_log2(buf);
4492 print_bin(buf>>(32-log), log);
4493 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4496 LAST_SKIP_BITS(re, gb, log);
4497 CLOSE_READER(re, gb);
4502 static inline int get_dct8x8_allowed(H264Context *h){
4505 if(!IS_SUB_8X8(h->sub_mb_type[i])
4506 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4513 * decodes a residual block.
4514 * @param n block index
4515 * @param scantable scantable
4516 * @param max_coeff number of coefficients in the block
4517 * @return <0 if an error occured
4519 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4520 MpegEncContext * const s = &h->s;
4521 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4523 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4525 //FIXME put trailing_onex into the context
4527 if(n == CHROMA_DC_BLOCK_INDEX){
4528 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4529 total_coeff= coeff_token>>2;
4531 if(n == LUMA_DC_BLOCK_INDEX){
4532 total_coeff= pred_non_zero_count(h, 0);
4533 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4534 total_coeff= coeff_token>>2;
4536 total_coeff= pred_non_zero_count(h, n);
4537 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4538 total_coeff= coeff_token>>2;
4539 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4543 //FIXME set last_non_zero?
4548 trailing_ones= coeff_token&3;
4549 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4550 assert(total_coeff<=16);
4552 for(i=0; i<trailing_ones; i++){
4553 level[i]= 1 - 2*get_bits1(gb);
4557 int level_code, mask;
4558 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4559 int prefix= get_level_prefix(gb);
4561 //first coefficient has suffix_length equal to 0 or 1
4562 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4564 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4566 level_code= (prefix<<suffix_length); //part
4567 }else if(prefix==14){
4569 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4571 level_code= prefix + get_bits(gb, 4); //part
4572 }else if(prefix==15){
4573 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4574 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4576 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4580 if(trailing_ones < 3) level_code += 2;
4585 mask= -(level_code&1);
4586 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4589 //remaining coefficients have suffix_length > 0
4590 for(;i<total_coeff;i++) {
4591 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4592 prefix = get_level_prefix(gb);
4594 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4595 }else if(prefix==15){
4596 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4598 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4601 mask= -(level_code&1);
4602 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4603 if(level_code > suffix_limit[suffix_length])
4608 if(total_coeff == max_coeff)
4611 if(n == CHROMA_DC_BLOCK_INDEX)
4612 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4614 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4617 coeff_num = zeros_left + total_coeff - 1;
4618 j = scantable[coeff_num];
4620 block[j] = level[0];
4621 for(i=1;i<total_coeff;i++) {
4624 else if(zeros_left < 7){
4625 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4627 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4629 zeros_left -= run_before;
4630 coeff_num -= 1 + run_before;
4631 j= scantable[ coeff_num ];
4636 block[j] = (level[0] * qmul[j] + 32)>>6;
4637 for(i=1;i<total_coeff;i++) {
4640 else if(zeros_left < 7){
4641 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4643 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4645 zeros_left -= run_before;
4646 coeff_num -= 1 + run_before;
4647 j= scantable[ coeff_num ];
4649 block[j]= (level[i] * qmul[j] + 32)>>6;
4654 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4662 * decodes a P_SKIP or B_SKIP macroblock
4664 static void decode_mb_skip(H264Context *h){
4665 MpegEncContext * const s = &h->s;
4666 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4669 memset(h->non_zero_count[mb_xy], 0, 16);
4670 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4672 if(h->mb_aff_frame && s->mb_skip_run==0 && (s->mb_y&1)==0){
4673 h->mb_field_decoding_flag= get_bits1(&s->gb);
4675 if(h->mb_field_decoding_flag)
4676 mb_type|= MB_TYPE_INTERLACED;
4678 if( h->slice_type == B_TYPE )
4680 // just for fill_caches. pred_direct_motion will set the real mb_type
4681 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4683 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4684 pred_direct_motion(h, &mb_type);
4686 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4687 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
4693 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4695 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4696 pred_pskip_motion(h, &mx, &my);
4697 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4698 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4700 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4703 write_back_motion(h, mb_type);
4704 s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP;
4705 s->current_picture.qscale_table[mb_xy]= s->qscale;
4706 h->slice_table[ mb_xy ]= h->slice_num;
4707 h->prev_mb_skipped= 1;
4711 * decodes a macroblock
4712 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4714 static int decode_mb_cavlc(H264Context *h){
4715 MpegEncContext * const s = &h->s;
4716 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4717 int mb_type, partition_count, cbp;
4718 int dct8x8_allowed= h->pps.transform_8x8_mode;
4720 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4722 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4723 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4725 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4726 if(s->mb_skip_run==-1)
4727 s->mb_skip_run= get_ue_golomb(&s->gb);
4729 if (s->mb_skip_run--) {
4734 if(h->mb_aff_frame){
4735 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
4736 h->mb_field_decoding_flag = get_bits1(&s->gb);
4738 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4740 h->prev_mb_skipped= 0;
4742 mb_type= get_ue_golomb(&s->gb);
4743 if(h->slice_type == B_TYPE){
4745 partition_count= b_mb_type_info[mb_type].partition_count;
4746 mb_type= b_mb_type_info[mb_type].type;
4749 goto decode_intra_mb;
4751 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4753 partition_count= p_mb_type_info[mb_type].partition_count;
4754 mb_type= p_mb_type_info[mb_type].type;
4757 goto decode_intra_mb;
4760 assert(h->slice_type == I_TYPE);
4763 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4767 cbp= i_mb_type_info[mb_type].cbp;
4768 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4769 mb_type= i_mb_type_info[mb_type].type;
4772 if(h->mb_field_decoding_flag)
4773 mb_type |= MB_TYPE_INTERLACED;
4775 h->slice_table[ mb_xy ]= h->slice_num;
4777 if(IS_INTRA_PCM(mb_type)){
4780 // we assume these blocks are very rare so we dont optimize it
4781 align_get_bits(&s->gb);
4783 // The pixels are stored in the same order as levels in h->mb array.
4784 for(y=0; y<16; y++){
4785 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4786 for(x=0; x<16; x++){
4787 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4788 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4792 const int index= 256 + 4*(y&3) + 32*(y>>2);
4794 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4795 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4799 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4801 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4802 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4806 // In deblocking, the quantizer is 0
4807 s->current_picture.qscale_table[mb_xy]= 0;
4808 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
4809 // All coeffs are present
4810 memset(h->non_zero_count[mb_xy], 16, 16);
4812 s->current_picture.mb_type[mb_xy]= mb_type;
4816 fill_caches(h, mb_type, 0);
4819 if(IS_INTRA(mb_type)){
4820 // init_top_left_availability(h);
4821 if(IS_INTRA4x4(mb_type)){
4824 if(dct8x8_allowed && get_bits1(&s->gb)){
4825 mb_type |= MB_TYPE_8x8DCT;
4829 // fill_intra4x4_pred_table(h);
4830 for(i=0; i<16; i+=di){
4831 const int mode_coded= !get_bits1(&s->gb);
4832 const int predicted_mode= pred_intra_mode(h, i);
4836 const int rem_mode= get_bits(&s->gb, 3);
4837 if(rem_mode<predicted_mode)
4842 mode= predicted_mode;
4846 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4848 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4850 write_back_intra_pred_mode(h);
4851 if( check_intra4x4_pred_mode(h) < 0)
4854 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4855 if(h->intra16x16_pred_mode < 0)
4858 h->chroma_pred_mode= get_ue_golomb(&s->gb);
4860 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
4861 if(h->chroma_pred_mode < 0)
4863 }else if(partition_count==4){
4864 int i, j, sub_partition_count[4], list, ref[2][4];
4866 if(h->slice_type == B_TYPE){
4868 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4869 if(h->sub_mb_type[i] >=13){
4870 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4873 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4874 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4876 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4877 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4878 pred_direct_motion(h, &mb_type);
4879 h->ref_cache[0][scan8[4]] =
4880 h->ref_cache[1][scan8[4]] =
4881 h->ref_cache[0][scan8[12]] =
4882 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4885 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4887 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4888 if(h->sub_mb_type[i] >=4){
4889 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4892 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4893 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4897 for(list=0; list<2; list++){
4898 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4899 if(ref_count == 0) continue;
4900 if (h->mb_aff_frame && h->mb_field_decoding_flag) {
4904 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4905 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4906 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4915 dct8x8_allowed = get_dct8x8_allowed(h);
4917 for(list=0; list<2; list++){
4918 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4919 if(ref_count == 0) continue;
4922 if(IS_DIRECT(h->sub_mb_type[i])) {
4923 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4926 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4927 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4929 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4930 const int sub_mb_type= h->sub_mb_type[i];
4931 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4932 for(j=0; j<sub_partition_count[i]; j++){
4934 const int index= 4*i + block_width*j;
4935 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4936 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4937 mx += get_se_golomb(&s->gb);
4938 my += get_se_golomb(&s->gb);
4939 tprintf("final mv:%d %d\n", mx, my);
4941 if(IS_SUB_8X8(sub_mb_type)){
4942 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
4943 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4944 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
4945 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4946 }else if(IS_SUB_8X4(sub_mb_type)){
4947 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
4948 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
4949 }else if(IS_SUB_4X8(sub_mb_type)){
4950 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
4951 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
4953 assert(IS_SUB_4X4(sub_mb_type));
4954 mv_cache[ 0 ][0]= mx;
4955 mv_cache[ 0 ][1]= my;
4959 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4965 }else if(IS_DIRECT(mb_type)){
4966 pred_direct_motion(h, &mb_type);
4967 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4969 int list, mx, my, i;
4970 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4971 if(IS_16X16(mb_type)){
4972 for(list=0; list<2; list++){
4973 if(h->ref_count[list]>0){
4974 if(IS_DIR(mb_type, 0, list)){
4975 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4976 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4978 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
4981 for(list=0; list<2; list++){
4982 if(IS_DIR(mb_type, 0, list)){
4983 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4984 mx += get_se_golomb(&s->gb);
4985 my += get_se_golomb(&s->gb);
4986 tprintf("final mv:%d %d\n", mx, my);
4988 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
4990 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
4993 else if(IS_16X8(mb_type)){
4994 for(list=0; list<2; list++){
4995 if(h->ref_count[list]>0){
4997 if(IS_DIR(mb_type, i, list)){
4998 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4999 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5001 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5005 for(list=0; list<2; list++){
5007 if(IS_DIR(mb_type, i, list)){
5008 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5009 mx += get_se_golomb(&s->gb);
5010 my += get_se_golomb(&s->gb);
5011 tprintf("final mv:%d %d\n", mx, my);
5013 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5015 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5019 assert(IS_8X16(mb_type));
5020 for(list=0; list<2; list++){
5021 if(h->ref_count[list]>0){
5023 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5024 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5025 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5027 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5031 for(list=0; list<2; list++){
5033 if(IS_DIR(mb_type, i, list)){
5034 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5035 mx += get_se_golomb(&s->gb);
5036 my += get_se_golomb(&s->gb);
5037 tprintf("final mv:%d %d\n", mx, my);
5039 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5041 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5047 if(IS_INTER(mb_type))
5048 write_back_motion(h, mb_type);
5050 if(!IS_INTRA16x16(mb_type)){
5051 cbp= get_ue_golomb(&s->gb);
5053 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5057 if(IS_INTRA4x4(mb_type))
5058 cbp= golomb_to_intra4x4_cbp[cbp];
5060 cbp= golomb_to_inter_cbp[cbp];
5063 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5064 if(get_bits1(&s->gb))
5065 mb_type |= MB_TYPE_8x8DCT;
5067 s->current_picture.mb_type[mb_xy]= mb_type;
5069 if(cbp || IS_INTRA16x16(mb_type)){
5070 int i8x8, i4x4, chroma_idx;
5071 int chroma_qp, dquant;
5072 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5073 const uint8_t *scan, *dc_scan;
5075 // fill_non_zero_count_cache(h);
5077 if(IS_INTERLACED(mb_type)){
5078 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5079 dc_scan= luma_dc_field_scan;
5081 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5082 dc_scan= luma_dc_zigzag_scan;
5085 dquant= get_se_golomb(&s->gb);
5087 if( dquant > 25 || dquant < -26 ){
5088 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5092 s->qscale += dquant;
5093 if(((unsigned)s->qscale) > 51){
5094 if(s->qscale<0) s->qscale+= 52;
5095 else s->qscale-= 52;
5098 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5099 if(IS_INTRA16x16(mb_type)){
5100 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5101 return -1; //FIXME continue if partitioned and other return -1 too
5104 assert((cbp&15) == 0 || (cbp&15) == 15);
5107 for(i8x8=0; i8x8<4; i8x8++){
5108 for(i4x4=0; i4x4<4; i4x4++){
5109 const int index= i4x4 + 4*i8x8;
5110 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5116 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5119 for(i8x8=0; i8x8<4; i8x8++){
5120 if(cbp & (1<<i8x8)){
5121 if(IS_8x8DCT(mb_type)){
5122 DCTELEM *buf = &h->mb[64*i8x8];
5124 for(i4x4=0; i4x4<4; i4x4++){
5125 if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4,
5126 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5129 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5130 nnz[0] |= nnz[1] | nnz[8] | nnz[9];
5132 for(i4x4=0; i4x4<4; i4x4++){
5133 const int index= i4x4 + 4*i8x8;
5135 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5141 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5142 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5148 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5149 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5155 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5156 for(i4x4=0; i4x4<4; i4x4++){
5157 const int index= 16 + 4*chroma_idx + i4x4;
5158 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5164 uint8_t * const nnz= &h->non_zero_count_cache[0];
5165 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5166 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5169 uint8_t * const nnz= &h->non_zero_count_cache[0];
5170 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5171 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5172 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5174 s->current_picture.qscale_table[mb_xy]= s->qscale;
5175 write_back_non_zero_count(h);
5180 static int decode_cabac_field_decoding_flag(H264Context *h) {
5181 MpegEncContext * const s = &h->s;
5182 const int mb_x = s->mb_x;
5183 const int mb_y = s->mb_y & ~1;
5184 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5185 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5187 unsigned int ctx = 0;
5189 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5192 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5196 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5199 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5200 uint8_t *state= &h->cabac_state[ctx_base];
5204 MpegEncContext * const s = &h->s;
5205 const int mba_xy = h->left_mb_xy[0];
5206 const int mbb_xy = h->top_mb_xy;
5208 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5210 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5212 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5213 return 0; /* I4x4 */
5216 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5217 return 0; /* I4x4 */
5220 if( get_cabac_terminate( &h->cabac ) )
5221 return 25; /* PCM */
5223 mb_type = 1; /* I16x16 */
5224 if( get_cabac( &h->cabac, &state[1] ) )
5225 mb_type += 12; /* cbp_luma != 0 */
5227 if( get_cabac( &h->cabac, &state[2] ) ) {
5228 if( get_cabac( &h->cabac, &state[2+intra_slice] ) )
5229 mb_type += 4 * 2; /* cbp_chroma == 2 */
5231 mb_type += 4 * 1; /* cbp_chroma == 1 */
5233 if( get_cabac( &h->cabac, &state[3+intra_slice] ) )
5235 if( get_cabac( &h->cabac, &state[3+2*intra_slice] ) )
5240 static int decode_cabac_mb_type( H264Context *h ) {
5241 MpegEncContext * const s = &h->s;
5243 if( h->slice_type == I_TYPE ) {
5244 return decode_cabac_intra_mb_type(h, 3, 1);
5245 } else if( h->slice_type == P_TYPE ) {
5246 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5248 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5249 if( get_cabac( &h->cabac, &h->cabac_state[16] ) == 0 )
5250 return 0; /* P_L0_D16x16; */
5252 return 3; /* P_8x8; */
5254 if( get_cabac( &h->cabac, &h->cabac_state[17] ) == 0 )
5255 return 2; /* P_L0_D8x16; */
5257 return 1; /* P_L0_D16x8; */
5260 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5262 } else if( h->slice_type == B_TYPE ) {
5263 const int mba_xy = h->left_mb_xy[0];
5264 const int mbb_xy = h->top_mb_xy;
5268 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] )
5269 && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5271 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )
5272 && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5275 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5276 return 0; /* B_Direct_16x16 */
5278 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5279 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5282 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5283 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5284 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5285 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5287 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5288 else if( bits == 13 ) {
5289 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5290 } else if( bits == 14 )
5291 return 11; /* B_L1_L0_8x16 */
5292 else if( bits == 15 )
5293 return 22; /* B_8x8 */
5295 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5296 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5298 /* TODO SI/SP frames? */
5303 static int decode_cabac_mb_skip( H264Context *h) {
5304 MpegEncContext * const s = &h->s;
5305 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5306 const int mba_xy = mb_xy - 1;
5307 const int mbb_xy = mb_xy - s->mb_stride;
5310 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5312 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5315 if( h->slice_type == P_TYPE || h->slice_type == SP_TYPE)
5316 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5318 return get_cabac( &h->cabac, &h->cabac_state[24+ctx] );
5321 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5324 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5327 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5328 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5329 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5331 if( mode >= pred_mode )
5337 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5338 const int mba_xy = h->left_mb_xy[0];
5339 const int mbb_xy = h->top_mb_xy;
5343 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5344 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5347 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5350 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5353 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5355 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5361 static const uint8_t block_idx_x[16] = {
5362 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5364 static const uint8_t block_idx_y[16] = {
5365 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5367 static const uint8_t block_idx_xy[4][4] = {
5374 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5375 MpegEncContext * const s = &h->s;
5380 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5386 x = block_idx_x[4*i8x8];
5387 y = block_idx_y[4*i8x8];
5391 else if( s->mb_x > 0 && (h->slice_table[h->left_mb_xy[0]] == h->slice_num)) {
5392 cbp_a = h->left_cbp;
5393 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5398 else if( s->mb_y > 0 && (h->slice_table[h->top_mb_xy] == h->slice_num)) {
5400 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5403 /* No need to test for skip as we put 0 for skip block */
5404 /* No need to test for IPCM as we put 1 for IPCM block */
5406 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5407 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5412 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5413 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5417 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5423 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5427 cbp_a = (h->left_cbp>>4)&0x03;
5428 cbp_b = (h-> top_cbp>>4)&0x03;
5431 if( cbp_a > 0 ) ctx++;
5432 if( cbp_b > 0 ) ctx += 2;
5433 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5437 if( cbp_a == 2 ) ctx++;
5438 if( cbp_b == 2 ) ctx += 2;
5439 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5441 static int decode_cabac_mb_dqp( H264Context *h) {
5442 MpegEncContext * const s = &h->s;
5448 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5450 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5452 if( h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) )
5455 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5461 if(val > 52) //prevent infinite loop
5468 return -(val + 1)/2;
5470 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5471 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5473 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5475 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5479 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5481 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5482 return 0; /* B_Direct_8x8 */
5483 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5484 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5486 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5487 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5488 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5491 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5492 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5496 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5497 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5500 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5501 int refa = h->ref_cache[list][scan8[n] - 1];
5502 int refb = h->ref_cache[list][scan8[n] - 8];
5506 if( h->slice_type == B_TYPE) {
5507 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5509 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5518 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5528 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5529 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5530 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5531 int ctxbase = (l == 0) ? 40 : 47;
5536 else if( amvd > 32 )
5541 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5546 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5554 while( get_cabac_bypass( &h->cabac ) ) {
5559 if( get_cabac_bypass( &h->cabac ) )
5563 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
5567 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5572 nza = h->left_cbp&0x100;
5573 nzb = h-> top_cbp&0x100;
5574 } else if( cat == 1 || cat == 2 ) {
5575 nza = h->non_zero_count_cache[scan8[idx] - 1];
5576 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5577 } else if( cat == 3 ) {
5578 nza = (h->left_cbp>>(6+idx))&0x01;
5579 nzb = (h-> top_cbp>>(6+idx))&0x01;
5582 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5583 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5592 return ctx + 4 * cat;
5595 static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5596 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5597 static const int significant_coeff_flag_field_offset[2] = { 105, 277 };
5598 static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 };
5599 static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 };
5600 static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 };
5601 static const int coeff_abs_level_m1_offset[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 };
5602 static const int significant_coeff_flag_offset_8x8[63] = {
5603 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5604 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5605 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5606 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
5608 static const int last_coeff_flag_offset_8x8[63] = {
5609 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5610 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5611 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5612 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5618 int coeff_count = 0;
5621 int abslevelgt1 = 0;
5623 uint8_t *significant_coeff_ctx_base;
5624 uint8_t *last_coeff_ctx_base;
5625 uint8_t *abs_level_m1_ctx_base;
5627 /* cat: 0-> DC 16x16 n = 0
5628 * 1-> AC 16x16 n = luma4x4idx
5629 * 2-> Luma4x4 n = luma4x4idx
5630 * 3-> DC Chroma n = iCbCr
5631 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5632 * 5-> Luma8x8 n = 4 * luma8x8idx
5635 /* read coded block flag */
5637 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5638 if( cat == 1 || cat == 2 )
5639 h->non_zero_count_cache[scan8[n]] = 0;
5641 h->non_zero_count_cache[scan8[16+n]] = 0;
5647 significant_coeff_ctx_base = h->cabac_state
5648 + significant_coeff_flag_offset[cat]
5649 + significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5650 last_coeff_ctx_base = h->cabac_state
5651 + last_significant_coeff_flag_offset[cat]
5652 + last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5653 abs_level_m1_ctx_base = h->cabac_state
5654 + coeff_abs_level_m1_offset[cat];
5657 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5658 for(last= 0; last < coefs; last++) { \
5659 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5660 if( get_cabac( &h->cabac, sig_ctx )) { \
5661 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5662 index[coeff_count++] = last; \
5663 if( get_cabac( &h->cabac, last_ctx ) ) { \
5669 DECODE_SIGNIFICANCE( 63, significant_coeff_flag_offset_8x8[last],
5670 last_coeff_flag_offset_8x8[last] );
5672 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5674 if( last == max_coeff -1 ) {
5675 index[coeff_count++] = last;
5677 assert(coeff_count > 0);
5680 h->cbp_table[mb_xy] |= 0x100;
5681 else if( cat == 1 || cat == 2 )
5682 h->non_zero_count_cache[scan8[n]] = coeff_count;
5684 h->cbp_table[mb_xy] |= 0x40 << n;
5686 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5689 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, 1, 1);
5692 for( i = coeff_count - 1; i >= 0; i-- ) {
5693 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5694 int j= scantable[index[i]];
5696 if( get_cabac( &h->cabac, ctx ) == 0 ) {
5698 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
5701 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6;
5702 else block[j] = ( qmul[j] + 32) >> 6;
5708 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5709 while( coeff_abs < 15 && get_cabac( &h->cabac, ctx ) ) {
5713 if( coeff_abs >= 15 ) {
5715 while( get_cabac_bypass( &h->cabac ) ) {
5716 coeff_abs += 1 << j;
5721 if( get_cabac_bypass( &h->cabac ) )
5722 coeff_abs += 1 << j ;
5727 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
5728 else block[j] = coeff_abs;
5730 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5731 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5740 void inline compute_mb_neighboors(H264Context *h)
5742 MpegEncContext * const s = &h->s;
5743 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5744 h->top_mb_xy = mb_xy - s->mb_stride;
5745 h->left_mb_xy[0] = mb_xy - 1;
5746 if(h->mb_aff_frame){
5747 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5748 const int top_pair_xy = pair_xy - s->mb_stride;
5749 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5750 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5751 const int curr_mb_frame_flag = !h->mb_field_decoding_flag;
5752 const int bottom = (s->mb_y & 1);
5754 ? !curr_mb_frame_flag // bottom macroblock
5755 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5757 h->top_mb_xy -= s->mb_stride;
5759 if (left_mb_frame_flag != curr_mb_frame_flag) {
5760 h->left_mb_xy[0] = pair_xy - 1;
5767 * decodes a macroblock
5768 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5770 static int decode_mb_cabac(H264Context *h) {
5771 MpegEncContext * const s = &h->s;
5772 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5773 int mb_type, partition_count, cbp = 0;
5774 int dct8x8_allowed= h->pps.transform_8x8_mode;
5776 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5778 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5779 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5780 /* read skip flags */
5781 if( decode_cabac_mb_skip( h ) ) {
5784 h->cbp_table[mb_xy] = 0;
5785 h->chroma_pred_mode_table[mb_xy] = 0;
5786 h->last_qscale_diff = 0;
5792 if(h->mb_aff_frame){
5793 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
5794 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5796 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5798 h->prev_mb_skipped = 0;
5800 compute_mb_neighboors(h);
5801 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5802 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5806 if( h->slice_type == B_TYPE ) {
5808 partition_count= b_mb_type_info[mb_type].partition_count;
5809 mb_type= b_mb_type_info[mb_type].type;
5812 goto decode_intra_mb;
5814 } else if( h->slice_type == P_TYPE ) {
5816 partition_count= p_mb_type_info[mb_type].partition_count;
5817 mb_type= p_mb_type_info[mb_type].type;
5820 goto decode_intra_mb;
5823 assert(h->slice_type == I_TYPE);
5825 partition_count = 0;
5826 cbp= i_mb_type_info[mb_type].cbp;
5827 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5828 mb_type= i_mb_type_info[mb_type].type;
5830 if(h->mb_field_decoding_flag)
5831 mb_type |= MB_TYPE_INTERLACED;
5833 h->slice_table[ mb_xy ]= h->slice_num;
5835 if(IS_INTRA_PCM(mb_type)) {
5839 // We assume these blocks are very rare so we dont optimize it.
5840 // FIXME The two following lines get the bitstream position in the cabac
5841 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5842 ptr= h->cabac.bytestream;
5843 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
5845 // The pixels are stored in the same order as levels in h->mb array.
5846 for(y=0; y<16; y++){
5847 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5848 for(x=0; x<16; x++){
5849 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
5850 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5854 const int index= 256 + 4*(y&3) + 32*(y>>2);
5856 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5857 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5861 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5863 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5864 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5868 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5870 // All blocks are present
5871 h->cbp_table[mb_xy] = 0x1ef;
5872 h->chroma_pred_mode_table[mb_xy] = 0;
5873 // In deblocking, the quantizer is 0
5874 s->current_picture.qscale_table[mb_xy]= 0;
5875 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5876 // All coeffs are present
5877 memset(h->non_zero_count[mb_xy], 16, 16);
5878 s->current_picture.mb_type[mb_xy]= mb_type;
5882 fill_caches(h, mb_type, 0);
5884 if( IS_INTRA( mb_type ) ) {
5886 if( IS_INTRA4x4( mb_type ) ) {
5887 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5888 mb_type |= MB_TYPE_8x8DCT;
5889 for( i = 0; i < 16; i+=4 ) {
5890 int pred = pred_intra_mode( h, i );
5891 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5892 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5895 for( i = 0; i < 16; i++ ) {
5896 int pred = pred_intra_mode( h, i );
5897 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5899 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5902 write_back_intra_pred_mode(h);
5903 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5905 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5906 if( h->intra16x16_pred_mode < 0 ) return -1;
5908 h->chroma_pred_mode_table[mb_xy] =
5909 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5911 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
5912 if( h->chroma_pred_mode < 0 ) return -1;
5913 } else if( partition_count == 4 ) {
5914 int i, j, sub_partition_count[4], list, ref[2][4];
5916 if( h->slice_type == B_TYPE ) {
5917 for( i = 0; i < 4; i++ ) {
5918 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5919 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5920 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5922 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5923 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5924 pred_direct_motion(h, &mb_type);
5925 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5926 for( i = 0; i < 4; i++ )
5927 if( IS_DIRECT(h->sub_mb_type[i]) )
5928 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5932 for( i = 0; i < 4; i++ ) {
5933 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5934 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5935 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5939 for( list = 0; list < 2; list++ ) {
5940 if( h->ref_count[list] > 0 ) {
5941 for( i = 0; i < 4; i++ ) {
5942 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5943 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5944 if( h->ref_count[list] > 1 )
5945 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5951 h->ref_cache[list][ scan8[4*i]+1 ]=
5952 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5958 dct8x8_allowed = get_dct8x8_allowed(h);
5960 for(list=0; list<2; list++){
5962 if(IS_DIRECT(h->sub_mb_type[i])){
5963 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5966 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5968 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5969 const int sub_mb_type= h->sub_mb_type[i];
5970 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5971 for(j=0; j<sub_partition_count[i]; j++){
5974 const int index= 4*i + block_width*j;
5975 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5976 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5977 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5979 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5980 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5981 tprintf("final mv:%d %d\n", mx, my);
5983 if(IS_SUB_8X8(sub_mb_type)){
5984 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5985 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5986 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5987 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5989 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
5990 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5991 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
5992 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5993 }else if(IS_SUB_8X4(sub_mb_type)){
5994 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5995 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5997 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
5998 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
5999 }else if(IS_SUB_4X8(sub_mb_type)){
6000 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
6001 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
6003 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
6004 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
6006 assert(IS_SUB_4X4(sub_mb_type));
6007 mv_cache[ 0 ][0]= mx;
6008 mv_cache[ 0 ][1]= my;
6010 mvd_cache[ 0 ][0]= mx - mpx;
6011 mvd_cache[ 0 ][1]= my - mpy;
6015 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6016 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6017 p[0] = p[1] = p[8] = p[9] = 0;
6018 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6022 } else if( IS_DIRECT(mb_type) ) {
6023 pred_direct_motion(h, &mb_type);
6024 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6025 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6026 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6028 int list, mx, my, i, mpx, mpy;
6029 if(IS_16X16(mb_type)){
6030 for(list=0; list<2; list++){
6031 if(IS_DIR(mb_type, 0, list)){
6032 if(h->ref_count[list] > 0 ){
6033 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6034 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6037 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6039 for(list=0; list<2; list++){
6040 if(IS_DIR(mb_type, 0, list)){
6041 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6043 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6044 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6045 tprintf("final mv:%d %d\n", mx, my);
6047 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6048 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6050 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6053 else if(IS_16X8(mb_type)){
6054 for(list=0; list<2; list++){
6055 if(h->ref_count[list]>0){
6057 if(IS_DIR(mb_type, i, list)){
6058 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6059 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6061 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6065 for(list=0; list<2; list++){
6067 if(IS_DIR(mb_type, i, list)){
6068 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6069 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6070 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6071 tprintf("final mv:%d %d\n", mx, my);
6073 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6074 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6076 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6077 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6082 assert(IS_8X16(mb_type));
6083 for(list=0; list<2; list++){
6084 if(h->ref_count[list]>0){
6086 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6087 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6088 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6090 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6094 for(list=0; list<2; list++){
6096 if(IS_DIR(mb_type, i, list)){
6097 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6098 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6099 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6101 tprintf("final mv:%d %d\n", mx, my);
6102 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6103 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6105 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6106 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6113 if( IS_INTER( mb_type ) ) {
6114 h->chroma_pred_mode_table[mb_xy] = 0;
6115 write_back_motion( h, mb_type );
6118 if( !IS_INTRA16x16( mb_type ) ) {
6119 cbp = decode_cabac_mb_cbp_luma( h );
6120 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6123 h->cbp_table[mb_xy] = cbp;
6125 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6126 if( decode_cabac_mb_transform_size( h ) )
6127 mb_type |= MB_TYPE_8x8DCT;
6129 s->current_picture.mb_type[mb_xy]= mb_type;
6131 if( cbp || IS_INTRA16x16( mb_type ) ) {
6132 const uint8_t *scan, *dc_scan;
6135 if(IS_INTERLACED(mb_type)){
6136 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6137 dc_scan= luma_dc_field_scan;
6139 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6140 dc_scan= luma_dc_zigzag_scan;
6143 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6144 if( dqp == INT_MIN ){
6145 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6149 if(((unsigned)s->qscale) > 51){
6150 if(s->qscale<0) s->qscale+= 52;
6151 else s->qscale-= 52;
6153 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6155 if( IS_INTRA16x16( mb_type ) ) {
6157 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6158 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6161 for( i = 0; i < 16; i++ ) {
6162 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6163 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6167 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6171 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6172 if( cbp & (1<<i8x8) ) {
6173 if( IS_8x8DCT(mb_type) ) {
6174 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6175 zigzag_scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6178 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6179 const int index = 4*i8x8 + i4x4;
6180 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6181 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6185 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6186 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6193 for( c = 0; c < 2; c++ ) {
6194 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6195 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6202 for( c = 0; c < 2; c++ ) {
6203 for( i = 0; i < 4; i++ ) {
6204 const int index = 16 + 4 * c + i;
6205 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6206 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6211 uint8_t * const nnz= &h->non_zero_count_cache[0];
6212 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6213 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6216 uint8_t * const nnz= &h->non_zero_count_cache[0];
6217 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6218 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6219 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6222 s->current_picture.qscale_table[mb_xy]= s->qscale;
6223 write_back_non_zero_count(h);
6229 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6231 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6232 const int alpha = alpha_table[index_a];
6233 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6238 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6239 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6241 /* 16px edge length, because bS=4 is triggered by being at
6242 * the edge of an intra MB, so all 4 bS are the same */
6243 for( d = 0; d < 16; d++ ) {
6244 const int p0 = pix[-1];
6245 const int p1 = pix[-2];
6246 const int p2 = pix[-3];
6248 const int q0 = pix[0];
6249 const int q1 = pix[1];
6250 const int q2 = pix[2];
6252 if( ABS( p0 - q0 ) < alpha &&
6253 ABS( p1 - p0 ) < beta &&
6254 ABS( q1 - q0 ) < beta ) {
6256 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6257 if( ABS( p2 - p0 ) < beta)
6259 const int p3 = pix[-4];
6261 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6262 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6263 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6266 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6268 if( ABS( q2 - q0 ) < beta)
6270 const int q3 = pix[3];
6272 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6273 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6274 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6277 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6281 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6282 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6284 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6290 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6292 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6293 const int alpha = alpha_table[index_a];
6294 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6299 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6300 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6302 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6306 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6308 for( i = 0; i < 16; i++, pix += stride) {
6314 int bS_index = (i >> 1);
6315 if (h->mb_field_decoding_flag) {
6317 bS_index |= (i & 1);
6320 if( bS[bS_index] == 0 ) {
6324 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6325 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6326 alpha = alpha_table[index_a];
6327 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6330 if( bS[bS_index] < 4 ) {
6331 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6332 /* 4px edge length */
6333 const int p0 = pix[-1];
6334 const int p1 = pix[-2];
6335 const int p2 = pix[-3];
6336 const int q0 = pix[0];
6337 const int q1 = pix[1];
6338 const int q2 = pix[2];
6340 if( ABS( p0 - q0 ) < alpha &&
6341 ABS( p1 - p0 ) < beta &&
6342 ABS( q1 - q0 ) < beta ) {
6346 if( ABS( p2 - p0 ) < beta ) {
6347 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6350 if( ABS( q2 - q0 ) < beta ) {
6351 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6355 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6356 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6357 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6358 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6361 /* 4px edge length */
6362 const int p0 = pix[-1];
6363 const int p1 = pix[-2];
6364 const int p2 = pix[-3];
6366 const int q0 = pix[0];
6367 const int q1 = pix[1];
6368 const int q2 = pix[2];
6370 if( ABS( p0 - q0 ) < alpha &&
6371 ABS( p1 - p0 ) < beta &&
6372 ABS( q1 - q0 ) < beta ) {
6374 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6375 if( ABS( p2 - p0 ) < beta)
6377 const int p3 = pix[-4];
6379 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6380 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6381 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6384 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6386 if( ABS( q2 - q0 ) < beta)
6388 const int q3 = pix[3];
6390 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6391 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6392 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6395 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6399 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6400 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6402 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6407 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp[2] ) {
6409 for( i = 0; i < 8; i++, pix += stride) {
6417 if( bS[bS_index] == 0 ) {
6421 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6422 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6423 alpha = alpha_table[index_a];
6424 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6425 if( bS[bS_index] < 4 ) {
6426 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6427 /* 2px edge length (because we use same bS than the one for luma) */
6428 const int p0 = pix[-1];
6429 const int p1 = pix[-2];
6430 const int q0 = pix[0];
6431 const int q1 = pix[1];
6433 if( ABS( p0 - q0 ) < alpha &&
6434 ABS( p1 - p0 ) < beta &&
6435 ABS( q1 - q0 ) < beta ) {
6436 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6438 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6439 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6440 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6443 const int p0 = pix[-1];
6444 const int p1 = pix[-2];
6445 const int q0 = pix[0];
6446 const int q1 = pix[1];
6448 if( ABS( p0 - q0 ) < alpha &&
6449 ABS( p1 - p0 ) < beta &&
6450 ABS( q1 - q0 ) < beta ) {
6452 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6453 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6454 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6460 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6462 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6463 const int alpha = alpha_table[index_a];
6464 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6465 const int pix_next = stride;
6470 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6471 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6473 /* 16px edge length, see filter_mb_edgev */
6474 for( d = 0; d < 16; d++ ) {
6475 const int p0 = pix[-1*pix_next];
6476 const int p1 = pix[-2*pix_next];
6477 const int p2 = pix[-3*pix_next];
6478 const int q0 = pix[0];
6479 const int q1 = pix[1*pix_next];
6480 const int q2 = pix[2*pix_next];
6482 if( ABS( p0 - q0 ) < alpha &&
6483 ABS( p1 - p0 ) < beta &&
6484 ABS( q1 - q0 ) < beta ) {
6486 const int p3 = pix[-4*pix_next];
6487 const int q3 = pix[ 3*pix_next];
6489 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6490 if( ABS( p2 - p0 ) < beta) {
6492 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6493 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6494 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6497 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6499 if( ABS( q2 - q0 ) < beta) {
6501 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6502 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6503 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6506 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6510 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6511 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6513 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6520 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6522 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6523 const int alpha = alpha_table[index_a];
6524 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6529 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6530 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6532 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6536 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6537 MpegEncContext * const s = &h->s;
6538 const int mb_xy= mb_x + mb_y*s->mb_stride;
6539 int first_vertical_edge_done = 0;
6541 /* FIXME: A given frame may occupy more than one position in
6542 * the reference list. So ref2frm should be populated with
6543 * frame numbers, not indices. */
6544 static const int ref2frm[18] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6547 // left mb is in picture
6548 && h->slice_table[mb_xy-1] != 255
6549 // and current and left pair do not have the same interlaced type
6550 && (IS_INTERLACED(s->current_picture.mb_type[mb_xy]) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6551 // and left mb is in the same slice if deblocking_filter == 2
6552 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6553 /* First vertical edge is different in MBAFF frames
6554 * There are 8 different bS to compute and 2 different Qp
6561 first_vertical_edge_done = 1;
6562 for( i = 0; i < 8; i++ ) {
6564 int b_idx= 8 + 4 + 8*y;
6565 int bn_idx= b_idx - 1;
6567 int mbn_xy = h->mb_field_decoding_flag ? h->left_mb_xy[i>>2] : h->left_mb_xy[i&1];
6569 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6570 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6572 } else if( h->non_zero_count_cache[b_idx] != 0 ||
6573 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6574 h->non_zero_count_cache[bn_idx] != 0 ) {
6579 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6580 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6581 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6582 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6589 if(bS[0]+bS[1]+bS[2]+bS[3] != 0) {
6590 // Do not use s->qscale as luma quantizer because it has not the same
6591 // value in IPCM macroblocks.
6592 qp[0] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[0]] + 1 ) >> 1;
6593 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6594 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[0]] ) + 1 ) >> 1;
6595 qp[1] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[1]] + 1 ) >> 1;
6596 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6597 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[1]] ) + 1 ) >> 1;
6600 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
6601 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6602 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6603 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
6604 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
6607 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6608 for( dir = 0; dir < 2; dir++ )
6611 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6612 const int mb_type = s->current_picture.mb_type[mb_xy];
6613 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6614 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6616 const int edges = ((mb_type & mbm_type) & (MB_TYPE_16x16|MB_TYPE_SKIP))
6617 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6618 // how often to recheck mv-based bS when iterating between edges
6619 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6620 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6621 // how often to recheck mv-based bS when iterating along each edge
6622 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6624 if (first_vertical_edge_done) {
6626 first_vertical_edge_done = 0;
6629 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6633 for( edge = start; edge < edges; edge++ ) {
6634 /* mbn_xy: neighbor macroblock */
6635 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6636 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6640 if( (edge&1) && IS_8x8DCT(mb_type) )
6643 if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0)
6644 && !IS_INTERLACED(mb_type)
6645 && IS_INTERLACED(mbn_type)
6647 // This is a special case in the norm where the filtering must
6648 // be done twice (one each of the field) even if we are in a
6649 // frame macroblock.
6651 unsigned int tmp_linesize = 2 * linesize;
6652 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6653 int mbn_xy = mb_xy - 2 * s->mb_stride;
6657 if( IS_INTRA(mb_type) ||
6658 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6659 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6662 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6665 // Do not use s->qscale as luma quantizer because it has not the same
6666 // value in IPCM macroblocks.
6667 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6668 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6669 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6670 filter_mb_edgeh( h, &img_y[0], tmp_linesize, bS, qp );
6671 chroma_qp = ( h->chroma_qp +
6672 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6673 filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp );
6674 filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp );
6677 mbn_xy += s->mb_stride;
6678 if( IS_INTRA(mb_type) ||
6679 IS_INTRA(mbn_type) ) {
6680 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6683 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6686 // Do not use s->qscale as luma quantizer because it has not the same
6687 // value in IPCM macroblocks.
6688 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6689 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6690 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6691 filter_mb_edgeh( h, &img_y[linesize], tmp_linesize, bS, qp );
6692 chroma_qp = ( h->chroma_qp +
6693 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6694 filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6695 filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6698 if( IS_INTRA(mb_type) ||
6699 IS_INTRA(mbn_type) ) {
6702 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6703 || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6712 bS[0] = bS[1] = bS[2] = bS[3] = value;
6717 if( edge & mask_edge ) {
6718 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6721 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6722 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6723 int bn_idx= b_idx - (dir ? 8:1);
6725 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6726 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6727 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6728 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4;
6730 bS[0] = bS[1] = bS[2] = bS[3] = v;
6736 for( i = 0; i < 4; i++ ) {
6737 int x = dir == 0 ? edge : i;
6738 int y = dir == 0 ? i : edge;
6739 int b_idx= 8 + 4 + x + 8*y;
6740 int bn_idx= b_idx - (dir ? 8:1);
6742 if( h->non_zero_count_cache[b_idx] != 0 ||
6743 h->non_zero_count_cache[bn_idx] != 0 ) {
6749 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6750 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6751 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6752 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6760 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6765 // Do not use s->qscale as luma quantizer because it has not the same
6766 // value in IPCM macroblocks.
6767 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6768 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6769 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6770 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6772 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6773 if( (edge&1) == 0 ) {
6774 int chroma_qp = ( h->chroma_qp +
6775 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6776 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
6777 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
6780 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6781 if( (edge&1) == 0 ) {
6782 int chroma_qp = ( h->chroma_qp +
6783 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6784 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6785 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6792 static int decode_slice(H264Context *h){
6793 MpegEncContext * const s = &h->s;
6794 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6798 if( h->pps.cabac ) {
6802 align_get_bits( &s->gb );
6805 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
6806 ff_init_cabac_decoder( &h->cabac,
6807 s->gb.buffer + get_bits_count(&s->gb)/8,
6808 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6809 /* calculate pre-state */
6810 for( i= 0; i < 460; i++ ) {
6812 if( h->slice_type == I_TYPE )
6813 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6815 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6818 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6820 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6824 int ret = decode_mb_cabac(h);
6827 if(ret>=0) hl_decode_mb(h);
6829 /* XXX: useless as decode_mb_cabac it doesn't support that ... */
6830 if( ret >= 0 && h->mb_aff_frame ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6833 if(ret>=0) ret = decode_mb_cabac(h);
6835 if(ret>=0) hl_decode_mb(h);
6838 eos = get_cabac_terminate( &h->cabac );
6840 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
6841 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6842 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6846 if( ++s->mb_x >= s->mb_width ) {
6848 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6850 if(h->mb_aff_frame) {
6855 if( eos || s->mb_y >= s->mb_height ) {
6856 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6857 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6864 int ret = decode_mb_cavlc(h);
6866 if(ret>=0) hl_decode_mb(h);
6868 if(ret>=0 && h->mb_aff_frame){ //FIXME optimal? or let mb_decode decode 16x32 ?
6870 ret = decode_mb_cavlc(h);
6872 if(ret>=0) hl_decode_mb(h);
6877 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6878 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6883 if(++s->mb_x >= s->mb_width){
6885 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6887 if(h->mb_aff_frame) {
6890 if(s->mb_y >= s->mb_height){
6891 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6893 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6894 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6898 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6905 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6906 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6907 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6908 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6912 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6921 for(;s->mb_y < s->mb_height; s->mb_y++){
6922 for(;s->mb_x < s->mb_width; s->mb_x++){
6923 int ret= decode_mb(h);
6928 fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6929 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6934 if(++s->mb_x >= s->mb_width){
6936 if(++s->mb_y >= s->mb_height){
6937 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6938 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6942 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6949 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6950 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6951 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6955 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6962 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6965 return -1; //not reached
6968 static int decode_unregistered_user_data(H264Context *h, int size){
6969 MpegEncContext * const s = &h->s;
6970 uint8_t user_data[16+256];
6976 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6977 user_data[i]= get_bits(&s->gb, 8);
6981 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6982 if(e==1 && build>=0)
6983 h->x264_build= build;
6985 if(s->avctx->debug & FF_DEBUG_BUGS)
6986 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6989 skip_bits(&s->gb, 8);
6994 static int decode_sei(H264Context *h){
6995 MpegEncContext * const s = &h->s;
6997 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7002 type+= show_bits(&s->gb, 8);
7003 }while(get_bits(&s->gb, 8) == 255);
7007 size+= show_bits(&s->gb, 8);
7008 }while(get_bits(&s->gb, 8) == 255);
7012 if(decode_unregistered_user_data(h, size) < 0);
7016 skip_bits(&s->gb, 8*size);
7019 //FIXME check bits here
7020 align_get_bits(&s->gb);
7026 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7027 MpegEncContext * const s = &h->s;
7029 cpb_count = get_ue_golomb(&s->gb) + 1;
7030 get_bits(&s->gb, 4); /* bit_rate_scale */
7031 get_bits(&s->gb, 4); /* cpb_size_scale */
7032 for(i=0; i<cpb_count; i++){
7033 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7034 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7035 get_bits1(&s->gb); /* cbr_flag */
7037 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7038 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7039 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7040 get_bits(&s->gb, 5); /* time_offset_length */
7043 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7044 MpegEncContext * const s = &h->s;
7045 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7046 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7048 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7050 if( aspect_ratio_info_present_flag ) {
7051 aspect_ratio_idc= get_bits(&s->gb, 8);
7052 if( aspect_ratio_idc == EXTENDED_SAR ) {
7053 sps->sar.num= get_bits(&s->gb, 16);
7054 sps->sar.den= get_bits(&s->gb, 16);
7055 }else if(aspect_ratio_idc < 16){
7056 sps->sar= pixel_aspect[aspect_ratio_idc];
7058 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7065 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7067 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7068 get_bits1(&s->gb); /* overscan_appropriate_flag */
7071 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7072 get_bits(&s->gb, 3); /* video_format */
7073 get_bits1(&s->gb); /* video_full_range_flag */
7074 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7075 get_bits(&s->gb, 8); /* colour_primaries */
7076 get_bits(&s->gb, 8); /* transfer_characteristics */
7077 get_bits(&s->gb, 8); /* matrix_coefficients */
7081 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7082 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7083 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7086 sps->timing_info_present_flag = get_bits1(&s->gb);
7087 if(sps->timing_info_present_flag){
7088 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7089 sps->time_scale = get_bits_long(&s->gb, 32);
7090 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7093 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7094 if(nal_hrd_parameters_present_flag)
7095 decode_hrd_parameters(h, sps);
7096 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7097 if(vcl_hrd_parameters_present_flag)
7098 decode_hrd_parameters(h, sps);
7099 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7100 get_bits1(&s->gb); /* low_delay_hrd_flag */
7101 get_bits1(&s->gb); /* pic_struct_present_flag */
7103 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7104 if(sps->bitstream_restriction_flag){
7105 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7106 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7107 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7108 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7109 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7110 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7111 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7117 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7118 const uint8_t *jvt_list, const uint8_t *fallback_list){
7119 MpegEncContext * const s = &h->s;
7120 int i, last = 8, next = 8;
7121 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7122 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7123 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7125 for(i=0;i<size;i++){
7127 next = (last + get_se_golomb(&s->gb)) & 0xff;
7128 if(!i && !next){ /* matrix not written, we use the preset one */
7129 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7132 last = factors[scan[i]] = next ? next : last;
7136 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7137 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7138 MpegEncContext * const s = &h->s;
7139 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7140 const uint8_t *fallback[4] = {
7141 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7142 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7143 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7144 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7146 if(get_bits1(&s->gb)){
7147 sps->scaling_matrix_present |= is_sps;
7148 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7149 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7150 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7151 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7152 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7153 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7154 if(is_sps || pps->transform_8x8_mode){
7155 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7156 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7158 } else if(fallback_sps) {
7159 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7160 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7164 static inline int decode_seq_parameter_set(H264Context *h){
7165 MpegEncContext * const s = &h->s;
7166 int profile_idc, level_idc;
7170 profile_idc= get_bits(&s->gb, 8);
7171 get_bits1(&s->gb); //constraint_set0_flag
7172 get_bits1(&s->gb); //constraint_set1_flag
7173 get_bits1(&s->gb); //constraint_set2_flag
7174 get_bits1(&s->gb); //constraint_set3_flag
7175 get_bits(&s->gb, 4); // reserved
7176 level_idc= get_bits(&s->gb, 8);
7177 sps_id= get_ue_golomb(&s->gb);
7179 sps= &h->sps_buffer[ sps_id ];
7180 sps->profile_idc= profile_idc;
7181 sps->level_idc= level_idc;
7183 if(sps->profile_idc >= 100){ //high profile
7184 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7185 get_bits1(&s->gb); //residual_color_transform_flag
7186 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7187 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7188 sps->transform_bypass = get_bits1(&s->gb);
7189 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7191 sps->scaling_matrix_present = 0;
7193 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7194 sps->poc_type= get_ue_golomb(&s->gb);
7196 if(sps->poc_type == 0){ //FIXME #define
7197 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7198 } else if(sps->poc_type == 1){//FIXME #define
7199 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7200 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7201 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7202 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7204 for(i=0; i<sps->poc_cycle_length; i++)
7205 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7207 if(sps->poc_type > 2){
7208 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7212 sps->ref_frame_count= get_ue_golomb(&s->gb);
7213 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7214 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7216 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7217 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7218 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7219 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7220 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7223 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7224 if(!sps->frame_mbs_only_flag)
7225 sps->mb_aff= get_bits1(&s->gb);
7229 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7231 sps->crop= get_bits1(&s->gb);
7233 sps->crop_left = get_ue_golomb(&s->gb);
7234 sps->crop_right = get_ue_golomb(&s->gb);
7235 sps->crop_top = get_ue_golomb(&s->gb);
7236 sps->crop_bottom= get_ue_golomb(&s->gb);
7237 if(sps->crop_left || sps->crop_top){
7238 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7244 sps->crop_bottom= 0;
7247 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7248 if( sps->vui_parameters_present_flag )
7249 decode_vui_parameters(h, sps);
7251 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7252 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7253 sps_id, sps->profile_idc, sps->level_idc,
7255 sps->ref_frame_count,
7256 sps->mb_width, sps->mb_height,
7257 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7258 sps->direct_8x8_inference_flag ? "8B8" : "",
7259 sps->crop_left, sps->crop_right,
7260 sps->crop_top, sps->crop_bottom,
7261 sps->vui_parameters_present_flag ? "VUI" : ""
7267 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7268 MpegEncContext * const s = &h->s;
7269 int pps_id= get_ue_golomb(&s->gb);
7270 PPS *pps= &h->pps_buffer[pps_id];
7272 pps->sps_id= get_ue_golomb(&s->gb);
7273 pps->cabac= get_bits1(&s->gb);
7274 pps->pic_order_present= get_bits1(&s->gb);
7275 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7276 if(pps->slice_group_count > 1 ){
7277 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7278 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7279 switch(pps->mb_slice_group_map_type){
7282 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7283 | run_length[ i ] |1 |ue(v) |
7288 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7290 | top_left_mb[ i ] |1 |ue(v) |
7291 | bottom_right_mb[ i ] |1 |ue(v) |
7299 | slice_group_change_direction_flag |1 |u(1) |
7300 | slice_group_change_rate_minus1 |1 |ue(v) |
7305 | slice_group_id_cnt_minus1 |1 |ue(v) |
7306 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7308 | slice_group_id[ i ] |1 |u(v) |
7313 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7314 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7315 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7316 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7320 pps->weighted_pred= get_bits1(&s->gb);
7321 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7322 pps->init_qp= get_se_golomb(&s->gb) + 26;
7323 pps->init_qs= get_se_golomb(&s->gb) + 26;
7324 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7325 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7326 pps->constrained_intra_pred= get_bits1(&s->gb);
7327 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7329 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7330 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7332 if(get_bits_count(&s->gb) < bit_length){
7333 pps->transform_8x8_mode= get_bits1(&s->gb);
7334 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7335 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7338 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7339 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7340 pps_id, pps->sps_id,
7341 pps->cabac ? "CABAC" : "CAVLC",
7342 pps->slice_group_count,
7343 pps->ref_count[0], pps->ref_count[1],
7344 pps->weighted_pred ? "weighted" : "",
7345 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7346 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7347 pps->constrained_intra_pred ? "CONSTR" : "",
7348 pps->redundant_pic_cnt_present ? "REDU" : "",
7349 pps->transform_8x8_mode ? "8x8DCT" : ""
7357 * finds the end of the current frame in the bitstream.
7358 * @return the position of the first byte of the next frame, or -1
7360 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7363 ParseContext *pc = &(h->s.parse_context);
7364 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7365 // mb_addr= pc->mb_addr - 1;
7367 for(i=0; i<=buf_size; i++){
7368 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7369 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7370 if(pc->frame_start_found){
7371 // If there isn't one more byte in the buffer
7372 // the test on first_mb_in_slice cannot be done yet
7373 // do it at next call.
7374 if (i >= buf_size) break;
7375 if (buf[i] & 0x80) {
7376 // first_mb_in_slice is 0, probably the first nal of a new
7378 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7380 pc->frame_start_found= 0;
7384 pc->frame_start_found = 1;
7386 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7387 if(pc->frame_start_found){
7389 pc->frame_start_found= 0;
7394 state= (state<<8) | buf[i];
7398 return END_NOT_FOUND;
7401 static int h264_parse(AVCodecParserContext *s,
7402 AVCodecContext *avctx,
7403 uint8_t **poutbuf, int *poutbuf_size,
7404 const uint8_t *buf, int buf_size)
7406 H264Context *h = s->priv_data;
7407 ParseContext *pc = &h->s.parse_context;
7410 next= find_frame_end(h, buf, buf_size);
7412 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
7418 *poutbuf = (uint8_t *)buf;
7419 *poutbuf_size = buf_size;
7423 static int h264_split(AVCodecContext *avctx,
7424 const uint8_t *buf, int buf_size)
7427 uint32_t state = -1;
7430 for(i=0; i<=buf_size; i++){
7431 if((state&0xFFFFFF1F) == 0x107)
7433 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7435 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
7437 while(i>4 && buf[i-5]==0) i--;
7442 state= (state<<8) | buf[i];
7448 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7449 MpegEncContext * const s = &h->s;
7450 AVCodecContext * const avctx= s->avctx;
7454 for(i=0; i<50; i++){
7455 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7459 s->current_picture_ptr= NULL;
7468 if(buf_index >= buf_size) break;
7470 for(i = 0; i < h->nal_length_size; i++)
7471 nalsize = (nalsize << 8) | buf[buf_index++];
7473 // start code prefix search
7474 for(; buf_index + 3 < buf_size; buf_index++){
7475 // this should allways succeed in the first iteration
7476 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7480 if(buf_index+3 >= buf_size) break;
7485 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7486 if(ptr[dst_length - 1] == 0) dst_length--;
7487 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
7489 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7490 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7493 if (h->is_avc && (nalsize != consumed))
7494 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7496 buf_index += consumed;
7498 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
7499 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7502 switch(h->nal_unit_type){
7504 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7506 init_get_bits(&s->gb, ptr, bit_length);
7508 h->inter_gb_ptr= &s->gb;
7509 s->data_partitioning = 0;
7511 if(decode_slice_header(h) < 0){
7512 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7515 if(h->redundant_pic_count==0 && s->hurry_up < 5
7516 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7517 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7518 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7519 && avctx->skip_frame < AVDISCARD_ALL)
7523 init_get_bits(&s->gb, ptr, bit_length);
7525 h->inter_gb_ptr= NULL;
7526 s->data_partitioning = 1;
7528 if(decode_slice_header(h) < 0){
7529 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7533 init_get_bits(&h->intra_gb, ptr, bit_length);
7534 h->intra_gb_ptr= &h->intra_gb;
7537 init_get_bits(&h->inter_gb, ptr, bit_length);
7538 h->inter_gb_ptr= &h->inter_gb;
7540 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
7542 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7543 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7544 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7545 && avctx->skip_frame < AVDISCARD_ALL)
7549 init_get_bits(&s->gb, ptr, bit_length);
7553 init_get_bits(&s->gb, ptr, bit_length);
7554 decode_seq_parameter_set(h);
7556 if(s->flags& CODEC_FLAG_LOW_DELAY)
7559 if(avctx->has_b_frames < 2)
7560 avctx->has_b_frames= !s->low_delay;
7563 init_get_bits(&s->gb, ptr, bit_length);
7565 decode_picture_parameter_set(h, bit_length);
7569 case NAL_END_SEQUENCE:
7570 case NAL_END_STREAM:
7571 case NAL_FILLER_DATA:
7573 case NAL_AUXILIARY_SLICE:
7576 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
7580 if(!s->current_picture_ptr) return buf_index; //no frame
7582 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7583 s->current_picture_ptr->pict_type= s->pict_type;
7584 s->current_picture_ptr->key_frame= s->pict_type == I_TYPE && h->nal_unit_type == NAL_IDR_SLICE;
7586 h->prev_frame_num_offset= h->frame_num_offset;
7587 h->prev_frame_num= h->frame_num;
7588 if(s->current_picture_ptr->reference){
7589 h->prev_poc_msb= h->poc_msb;
7590 h->prev_poc_lsb= h->poc_lsb;
7592 if(s->current_picture_ptr->reference)
7593 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7603 * returns the number of bytes consumed for building the current frame
7605 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7606 if(s->flags&CODEC_FLAG_TRUNCATED){
7607 pos -= s->parse_context.last_index;
7608 if(pos<0) pos=0; // FIXME remove (unneeded?)
7612 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
7613 if(pos+10>buf_size) pos=buf_size; // oops ;)
7619 static int decode_frame(AVCodecContext *avctx,
7620 void *data, int *data_size,
7621 uint8_t *buf, int buf_size)
7623 H264Context *h = avctx->priv_data;
7624 MpegEncContext *s = &h->s;
7625 AVFrame *pict = data;
7628 s->flags= avctx->flags;
7629 s->flags2= avctx->flags2;
7631 /* no supplementary picture */
7632 if (buf_size == 0) {
7636 if(s->flags&CODEC_FLAG_TRUNCATED){
7637 int next= find_frame_end(h, buf, buf_size);
7639 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7641 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7644 if(h->is_avc && !h->got_avcC) {
7645 int i, cnt, nalsize;
7646 unsigned char *p = avctx->extradata;
7647 if(avctx->extradata_size < 7) {
7648 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7652 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7655 /* sps and pps in the avcC always have length coded with 2 bytes,
7656 so put a fake nal_length_size = 2 while parsing them */
7657 h->nal_length_size = 2;
7658 // Decode sps from avcC
7659 cnt = *(p+5) & 0x1f; // Number of sps
7661 for (i = 0; i < cnt; i++) {
7662 nalsize = BE_16(p) + 2;
7663 if(decode_nal_units(h, p, nalsize) < 0) {
7664 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7669 // Decode pps from avcC
7670 cnt = *(p++); // Number of pps
7671 for (i = 0; i < cnt; i++) {
7672 nalsize = BE_16(p) + 2;
7673 if(decode_nal_units(h, p, nalsize) != nalsize) {
7674 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7679 // Now store right nal length size, that will be use to parse all other nals
7680 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7681 // Do not reparse avcC
7685 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
7686 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7690 buf_index=decode_nal_units(h, buf, buf_size);
7694 //FIXME do something with unavailable reference frames
7696 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
7697 if(!s->current_picture_ptr){
7698 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
7703 Picture *out = s->current_picture_ptr;
7704 #if 0 //decode order
7705 *data_size = sizeof(AVFrame);
7707 /* Sort B-frames into display order */
7708 Picture *cur = s->current_picture_ptr;
7709 Picture *prev = h->delayed_output_pic;
7714 int dropped_frame = 0;
7717 if(h->sps.bitstream_restriction_flag
7718 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7719 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7723 while(h->delayed_pic[pics]) pics++;
7724 h->delayed_pic[pics++] = cur;
7725 if(cur->reference == 0)
7728 for(i=0; h->delayed_pic[i]; i++)
7729 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7732 out = h->delayed_pic[0];
7733 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7734 if(h->delayed_pic[i]->poc < out->poc){
7735 out = h->delayed_pic[i];
7739 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7740 if(prev && pics <= s->avctx->has_b_frames)
7742 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7744 ((!cross_idr && prev && out->poc > prev->poc + 2)
7745 || cur->pict_type == B_TYPE)))
7748 s->avctx->has_b_frames++;
7751 else if(out_of_order)
7754 if(out_of_order || pics > s->avctx->has_b_frames){
7755 dropped_frame = (out != h->delayed_pic[out_idx]);
7756 for(i=out_idx; h->delayed_pic[i]; i++)
7757 h->delayed_pic[i] = h->delayed_pic[i+1];
7760 if(prev == out && !dropped_frame)
7763 *data_size = sizeof(AVFrame);
7764 if(prev && prev != out && prev->reference == 1)
7765 prev->reference = 0;
7766 h->delayed_output_pic = out;
7770 *pict= *(AVFrame*)out;
7772 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7775 assert(pict->data[0] || !*data_size);
7776 ff_print_debug_info(s, pict);
7777 //printf("out %d\n", (int)pict->data[0]);
7780 /* Return the Picture timestamp as the frame number */
7781 /* we substract 1 because it is added on utils.c */
7782 avctx->frame_number = s->picture_number - 1;
7784 return get_consumed_bytes(s, buf_index, buf_size);
7787 static inline void fill_mb_avail(H264Context *h){
7788 MpegEncContext * const s = &h->s;
7789 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7792 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7793 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7794 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7800 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7801 h->mb_avail[4]= 1; //FIXME move out
7802 h->mb_avail[5]= 0; //FIXME move out
7808 #define SIZE (COUNT*40)
7814 // int int_temp[10000];
7816 AVCodecContext avctx;
7818 dsputil_init(&dsp, &avctx);
7820 init_put_bits(&pb, temp, SIZE);
7821 printf("testing unsigned exp golomb\n");
7822 for(i=0; i<COUNT; i++){
7824 set_ue_golomb(&pb, i);
7825 STOP_TIMER("set_ue_golomb");
7827 flush_put_bits(&pb);
7829 init_get_bits(&gb, temp, 8*SIZE);
7830 for(i=0; i<COUNT; i++){
7833 s= show_bits(&gb, 24);
7836 j= get_ue_golomb(&gb);
7838 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7841 STOP_TIMER("get_ue_golomb");
7845 init_put_bits(&pb, temp, SIZE);
7846 printf("testing signed exp golomb\n");
7847 for(i=0; i<COUNT; i++){
7849 set_se_golomb(&pb, i - COUNT/2);
7850 STOP_TIMER("set_se_golomb");
7852 flush_put_bits(&pb);
7854 init_get_bits(&gb, temp, 8*SIZE);
7855 for(i=0; i<COUNT; i++){
7858 s= show_bits(&gb, 24);
7861 j= get_se_golomb(&gb);
7862 if(j != i - COUNT/2){
7863 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7866 STOP_TIMER("get_se_golomb");
7869 printf("testing 4x4 (I)DCT\n");
7872 uint8_t src[16], ref[16];
7873 uint64_t error= 0, max_error=0;
7875 for(i=0; i<COUNT; i++){
7877 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7878 for(j=0; j<16; j++){
7879 ref[j]= random()%255;
7880 src[j]= random()%255;
7883 h264_diff_dct_c(block, src, ref, 4);
7886 for(j=0; j<16; j++){
7887 // printf("%d ", block[j]);
7888 block[j]= block[j]*4;
7889 if(j&1) block[j]= (block[j]*4 + 2)/5;
7890 if(j&4) block[j]= (block[j]*4 + 2)/5;
7894 s->dsp.h264_idct_add(ref, block, 4);
7895 /* for(j=0; j<16; j++){
7896 printf("%d ", ref[j]);
7900 for(j=0; j<16; j++){
7901 int diff= ABS(src[j] - ref[j]);
7904 max_error= FFMAX(max_error, diff);
7907 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7909 printf("testing quantizer\n");
7910 for(qp=0; qp<52; qp++){
7912 src1_block[i]= src2_block[i]= random()%255;
7916 printf("Testing NAL layer\n");
7918 uint8_t bitstream[COUNT];
7919 uint8_t nal[COUNT*2];
7921 memset(&h, 0, sizeof(H264Context));
7923 for(i=0; i<COUNT; i++){
7931 for(j=0; j<COUNT; j++){
7932 bitstream[j]= (random() % 255) + 1;
7935 for(j=0; j<zeros; j++){
7936 int pos= random() % COUNT;
7937 while(bitstream[pos] == 0){
7946 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7948 printf("encoding failed\n");
7952 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7956 if(out_length != COUNT){
7957 printf("incorrect length %d %d\n", out_length, COUNT);
7961 if(consumed != nal_length){
7962 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7966 if(memcmp(bitstream, out, COUNT)){
7967 printf("missmatch\n");
7972 printf("Testing RBSP\n");
7980 static int decode_end(AVCodecContext *avctx)
7982 H264Context *h = avctx->priv_data;
7983 MpegEncContext *s = &h->s;
7985 av_freep(&h->rbsp_buffer);
7986 free_tables(h); //FIXME cleanup init stuff perhaps
7989 // memset(h, 0, sizeof(H264Context));
7995 AVCodec h264_decoder = {
7999 sizeof(H264Context),
8004 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8008 AVCodecParser h264_parser = {
8010 sizeof(H264Context),