2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
58 * Sequence parameter set
64 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
65 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
66 int poc_type; ///< pic_order_cnt_type
67 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
68 int delta_pic_order_always_zero_flag;
69 int offset_for_non_ref_pic;
70 int offset_for_top_to_bottom_field;
71 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
72 int ref_frame_count; ///< num_ref_frames
73 int gaps_in_frame_num_allowed_flag;
74 int mb_width; ///< frame_width_in_mbs_minus1 + 1
75 int mb_height; ///< frame_height_in_mbs_minus1 + 1
76 int frame_mbs_only_flag;
77 int mb_aff; ///<mb_adaptive_frame_field_flag
78 int direct_8x8_inference_flag;
79 int crop; ///< frame_cropping_flag
80 int crop_left; ///< frame_cropping_rect_left_offset
81 int crop_right; ///< frame_cropping_rect_right_offset
82 int crop_top; ///< frame_cropping_rect_top_offset
83 int crop_bottom; ///< frame_cropping_rect_bottom_offset
84 int vui_parameters_present_flag;
86 int timing_info_present_flag;
87 uint32_t num_units_in_tick;
89 int fixed_frame_rate_flag;
90 short offset_for_ref_frame[256]; //FIXME dyn aloc?
91 int bitstream_restriction_flag;
92 int num_reorder_frames;
93 int scaling_matrix_present;
94 uint8_t scaling_matrix4[6][16];
95 uint8_t scaling_matrix8[2][64];
99 * Picture parameter set
103 int cabac; ///< entropy_coding_mode_flag
104 int pic_order_present; ///< pic_order_present_flag
105 int slice_group_count; ///< num_slice_groups_minus1 + 1
106 int mb_slice_group_map_type;
107 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
108 int weighted_pred; ///< weighted_pred_flag
109 int weighted_bipred_idc;
110 int init_qp; ///< pic_init_qp_minus26 + 26
111 int init_qs; ///< pic_init_qs_minus26 + 26
112 int chroma_qp_index_offset;
113 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
114 int constrained_intra_pred; ///< constrained_intra_pred_flag
115 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
116 int transform_8x8_mode; ///< transform_8x8_mode_flag
117 uint8_t scaling_matrix4[6][16];
118 uint8_t scaling_matrix8[2][64];
122 * Memory management control operation opcode.
124 typedef enum MMCOOpcode{
135 * Memory management control operation.
146 typedef struct H264Context{
154 #define NAL_IDR_SLICE 5
159 #define NAL_END_SEQUENCE 10
160 #define NAL_END_STREAM 11
161 #define NAL_FILLER_DATA 12
162 #define NAL_SPS_EXT 13
163 #define NAL_AUXILIARY_SLICE 19
164 uint8_t *rbsp_buffer;
165 int rbsp_buffer_size;
168 * Used to parse AVC variant of h264
170 int is_avc; ///< this flag is != 0 if codec is avc1
171 int got_avcC; ///< flag used to parse avcC data only once
172 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
176 int prev_mb_skipped; //FIXME remove (IMHO not used)
179 int chroma_pred_mode;
180 int intra16x16_pred_mode;
185 int8_t intra4x4_pred_mode_cache[5*8];
186 int8_t (*intra4x4_pred_mode)[8];
187 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
188 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
189 void (*pred8x8 [4+3])(uint8_t *src, int stride);
190 void (*pred16x16[4+3])(uint8_t *src, int stride);
191 unsigned int topleft_samples_available;
192 unsigned int top_samples_available;
193 unsigned int topright_samples_available;
194 unsigned int left_samples_available;
195 uint8_t (*top_borders[2])[16+2*8];
196 uint8_t left_border[2*(17+2*9)];
199 * non zero coeff count cache.
200 * is 64 if not available.
202 uint8_t non_zero_count_cache[6*8] __align8;
203 uint8_t (*non_zero_count)[16];
206 * Motion vector cache.
208 int16_t mv_cache[2][5*8][2] __align8;
209 int8_t ref_cache[2][5*8] __align8;
210 #define LIST_NOT_USED -1 //FIXME rename?
211 #define PART_NOT_AVAILABLE -2
214 * is 1 if the specific list MV&references are set to 0,0,-2.
216 int mv_cache_clean[2];
219 * number of neighbors (top and/or left) that used 8x8 dct
221 int neighbor_transform_size;
224 * block_offset[ 0..23] for frame macroblocks
225 * block_offset[24..47] for field macroblocks
227 int block_offset[2*(16+8)];
229 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
231 int b_stride; //FIXME use s->b4_stride
237 int unknown_svq3_flag;
238 int next_slice_index;
240 SPS sps_buffer[MAX_SPS_COUNT];
241 SPS sps; ///< current sps
243 PPS pps_buffer[MAX_PPS_COUNT];
247 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
249 uint32_t dequant4_buffer[6][52][16];
250 uint32_t dequant8_buffer[2][52][64];
251 uint32_t (*dequant4_coeff[6])[16];
252 uint32_t (*dequant8_coeff[2])[64];
253 int dequant_coeff_pps; ///< reinit tables when pps changes
256 uint8_t *slice_table_base;
257 uint8_t *slice_table; ///< slice_table_base + mb_stride + 1
259 int slice_type_fixed;
261 //interlacing specific flags
263 int mb_field_decoding_flag;
270 int delta_poc_bottom;
273 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
274 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
275 int frame_num_offset; ///< for POC type 2
276 int prev_frame_num_offset; ///< for POC type 2
277 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
280 * frame_num for frames or 2*frame_num for field pics.
285 * max_frame_num or 2*max_frame_num for field pics.
289 //Weighted pred stuff
291 int use_weight_chroma;
292 int luma_log2_weight_denom;
293 int chroma_log2_weight_denom;
294 int luma_weight[2][16];
295 int luma_offset[2][16];
296 int chroma_weight[2][16][2];
297 int chroma_offset[2][16][2];
298 int implicit_weight[16][16];
301 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
302 int slice_alpha_c0_offset;
303 int slice_beta_offset;
305 int redundant_pic_count;
307 int direct_spatial_mv_pred;
308 int dist_scale_factor[16];
309 int map_col_to_list0[2][16];
312 * num_ref_idx_l0/1_active_minus1 + 1
314 int ref_count[2];// FIXME split for AFF
315 Picture *short_ref[32];
316 Picture *long_ref[32];
317 Picture default_ref_list[2][32];
318 Picture ref_list[2][32]; //FIXME size?
319 Picture field_ref_list[2][32]; //FIXME size?
320 Picture *delayed_pic[16]; //FIXME size?
321 Picture *delayed_output_pic;
324 * memory management control operations buffer.
326 MMCO mmco[MAX_MMCO_COUNT];
329 int long_ref_count; ///< number of actual long term references
330 int short_ref_count; ///< number of actual short term references
333 GetBitContext intra_gb;
334 GetBitContext inter_gb;
335 GetBitContext *intra_gb_ptr;
336 GetBitContext *inter_gb_ptr;
338 DCTELEM mb[16*24] __align8;
344 uint8_t cabac_state[460];
347 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
351 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
352 uint8_t *chroma_pred_mode_table;
353 int last_qscale_diff;
354 int16_t (*mvd_table[2])[2];
355 int16_t mvd_cache[2][5*8][2] __align8;
356 uint8_t *direct_table;
357 uint8_t direct_cache[5*8];
359 uint8_t zigzag_scan[16];
360 uint8_t field_scan[16];
361 const uint8_t *zigzag_scan_q0;
362 const uint8_t *field_scan_q0;
367 static VLC coeff_token_vlc[4];
368 static VLC chroma_dc_coeff_token_vlc;
370 static VLC total_zeros_vlc[15];
371 static VLC chroma_dc_total_zeros_vlc[3];
373 static VLC run_vlc[6];
376 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
377 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
378 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
380 static inline uint32_t pack16to32(int a, int b){
381 #ifdef WORDS_BIGENDIAN
382 return (b&0xFFFF) + (a<<16);
384 return (a&0xFFFF) + (b<<16);
390 * @param h height of the rectangle, should be a constant
391 * @param w width of the rectangle, should be a constant
392 * @param size the size of val (1 or 4), should be a constant
394 static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
395 uint8_t *p= (uint8_t*)vp;
396 assert(size==1 || size==4);
401 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
402 assert((stride&(w-1))==0);
403 //FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it
406 *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
407 }else if(w==2 && h==4){
408 *(uint16_t*)(p + 0*stride)=
409 *(uint16_t*)(p + 1*stride)=
410 *(uint16_t*)(p + 2*stride)=
411 *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
412 }else if(w==4 && h==1){
413 *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
414 }else if(w==4 && h==2){
415 *(uint32_t*)(p + 0*stride)=
416 *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
417 }else if(w==4 && h==4){
418 *(uint32_t*)(p + 0*stride)=
419 *(uint32_t*)(p + 1*stride)=
420 *(uint32_t*)(p + 2*stride)=
421 *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
422 }else if(w==8 && h==1){
424 *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
425 }else if(w==8 && h==2){
426 *(uint32_t*)(p + 0 + 0*stride)=
427 *(uint32_t*)(p + 4 + 0*stride)=
428 *(uint32_t*)(p + 0 + 1*stride)=
429 *(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101;
430 }else if(w==8 && h==4){
431 *(uint64_t*)(p + 0*stride)=
432 *(uint64_t*)(p + 1*stride)=
433 *(uint64_t*)(p + 2*stride)=
434 *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
435 }else if(w==16 && h==2){
436 *(uint64_t*)(p + 0+0*stride)=
437 *(uint64_t*)(p + 8+0*stride)=
438 *(uint64_t*)(p + 0+1*stride)=
439 *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
440 }else if(w==16 && h==4){
441 *(uint64_t*)(p + 0+0*stride)=
442 *(uint64_t*)(p + 8+0*stride)=
443 *(uint64_t*)(p + 0+1*stride)=
444 *(uint64_t*)(p + 8+1*stride)=
445 *(uint64_t*)(p + 0+2*stride)=
446 *(uint64_t*)(p + 8+2*stride)=
447 *(uint64_t*)(p + 0+3*stride)=
448 *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
453 static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
454 MpegEncContext * const s = &h->s;
455 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
456 int topleft_xy, top_xy, topright_xy, left_xy[2];
457 int topleft_type, top_type, topright_type, left_type[2];
461 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
462 // the actual condition is whether we're on the edge of a slice,
463 // and even then the intra and nnz parts are unnecessary.
464 if(for_deblock && h->slice_num == 1)
467 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
469 top_xy = mb_xy - s->mb_stride;
470 topleft_xy = top_xy - 1;
471 topright_xy= top_xy + 1;
472 left_xy[1] = left_xy[0] = mb_xy-1;
482 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
483 const int top_pair_xy = pair_xy - s->mb_stride;
484 const int topleft_pair_xy = top_pair_xy - 1;
485 const int topright_pair_xy = top_pair_xy + 1;
486 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
487 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
488 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
489 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
490 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
491 const int bottom = (s->mb_y & 1);
492 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
494 ? !curr_mb_frame_flag // bottom macroblock
495 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
497 top_xy -= s->mb_stride;
500 ? !curr_mb_frame_flag // bottom macroblock
501 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
503 topleft_xy -= s->mb_stride;
506 ? !curr_mb_frame_flag // bottom macroblock
507 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
509 topright_xy -= s->mb_stride;
511 if (left_mb_frame_flag != curr_mb_frame_flag) {
512 left_xy[1] = left_xy[0] = pair_xy - 1;
513 if (curr_mb_frame_flag) {
534 left_xy[1] += s->mb_stride;
547 h->top_mb_xy = top_xy;
548 h->left_mb_xy[0] = left_xy[0];
549 h->left_mb_xy[1] = left_xy[1];
551 topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
552 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
553 topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
554 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
555 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
557 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
558 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
559 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
560 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
561 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
564 if(IS_INTRA(mb_type)){
565 h->topleft_samples_available=
566 h->top_samples_available=
567 h->left_samples_available= 0xFFFF;
568 h->topright_samples_available= 0xEEEA;
570 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
571 h->topleft_samples_available= 0xB3FF;
572 h->top_samples_available= 0x33FF;
573 h->topright_samples_available= 0x26EA;
576 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
577 h->topleft_samples_available&= 0xDF5F;
578 h->left_samples_available&= 0x5F5F;
582 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
583 h->topleft_samples_available&= 0x7FFF;
585 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
586 h->topright_samples_available&= 0xFBFF;
588 if(IS_INTRA4x4(mb_type)){
589 if(IS_INTRA4x4(top_type)){
590 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
591 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
592 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
593 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
596 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
601 h->intra4x4_pred_mode_cache[4+8*0]=
602 h->intra4x4_pred_mode_cache[5+8*0]=
603 h->intra4x4_pred_mode_cache[6+8*0]=
604 h->intra4x4_pred_mode_cache[7+8*0]= pred;
607 if(IS_INTRA4x4(left_type[i])){
608 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
609 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
612 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
617 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
618 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
633 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
635 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
636 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
637 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
638 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
640 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
641 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
643 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
644 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
647 h->non_zero_count_cache[4+8*0]=
648 h->non_zero_count_cache[5+8*0]=
649 h->non_zero_count_cache[6+8*0]=
650 h->non_zero_count_cache[7+8*0]=
652 h->non_zero_count_cache[1+8*0]=
653 h->non_zero_count_cache[2+8*0]=
655 h->non_zero_count_cache[1+8*3]=
656 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
660 for (i=0; i<2; i++) {
662 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
663 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
664 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
665 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
667 h->non_zero_count_cache[3+8*1 + 2*8*i]=
668 h->non_zero_count_cache[3+8*2 + 2*8*i]=
669 h->non_zero_count_cache[0+8*1 + 8*i]=
670 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
677 h->top_cbp = h->cbp_table[top_xy];
678 } else if(IS_INTRA(mb_type)) {
685 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
686 } else if(IS_INTRA(mb_type)) {
692 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
695 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
700 //FIXME direct mb can skip much of this
701 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
703 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
704 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
705 /*if(!h->mv_cache_clean[list]){
706 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
707 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
708 h->mv_cache_clean[list]= 1;
712 h->mv_cache_clean[list]= 0;
714 if(IS_INTER(top_type)){
715 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
716 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
717 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
718 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
719 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
720 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
721 h->ref_cache[list][scan8[0] + 0 - 1*8]=
722 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
723 h->ref_cache[list][scan8[0] + 2 - 1*8]=
724 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
726 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
727 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
728 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
729 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
730 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
733 //FIXME unify cleanup or sth
734 if(IS_INTER(left_type[0])){
735 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
736 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
737 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
738 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
739 h->ref_cache[list][scan8[0] - 1 + 0*8]=
740 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
742 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
743 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
744 h->ref_cache[list][scan8[0] - 1 + 0*8]=
745 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
748 if(IS_INTER(left_type[1])){
749 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
750 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
751 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
752 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
753 h->ref_cache[list][scan8[0] - 1 + 2*8]=
754 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
756 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
757 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
758 h->ref_cache[list][scan8[0] - 1 + 2*8]=
759 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
760 assert((!left_type[0]) == (!left_type[1]));
763 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
766 if(IS_INTER(topleft_type)){
767 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
768 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
769 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
770 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
772 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
773 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
776 if(IS_INTER(topright_type)){
777 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
778 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
779 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
780 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
782 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
783 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
787 h->ref_cache[list][scan8[5 ]+1] =
788 h->ref_cache[list][scan8[7 ]+1] =
789 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
790 h->ref_cache[list][scan8[4 ]] =
791 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
792 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
793 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
794 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
795 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
796 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
799 /* XXX beurk, Load mvd */
800 if(IS_INTER(topleft_type)){
801 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
802 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy];
804 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0;
807 if(IS_INTER(top_type)){
808 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
809 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
810 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
811 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
812 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
814 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
815 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
816 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
817 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
819 if(IS_INTER(left_type[0])){
820 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
821 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
822 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
824 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
825 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
827 if(IS_INTER(left_type[1])){
828 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
829 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
830 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
832 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
833 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
835 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
836 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
837 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
838 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
839 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
841 if(h->slice_type == B_TYPE){
842 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
844 if(IS_DIRECT(top_type)){
845 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
846 }else if(IS_8X8(top_type)){
847 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
848 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
849 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
851 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
855 if(IS_DIRECT(left_type[0])){
856 h->direct_cache[scan8[0] - 1 + 0*8]=
857 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
858 }else if(IS_8X8(left_type[0])){
859 int b8_xy = h->mb2b8_xy[left_xy[0]] + 1;
860 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[b8_xy];
861 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[b8_xy + h->b8_stride];
863 h->direct_cache[scan8[0] - 1 + 0*8]=
864 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
872 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
875 static inline void write_back_intra_pred_mode(H264Context *h){
876 MpegEncContext * const s = &h->s;
877 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
879 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
880 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
881 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
882 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
883 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
884 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
885 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
889 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
891 static inline int check_intra4x4_pred_mode(H264Context *h){
892 MpegEncContext * const s = &h->s;
893 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
894 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
897 if(!(h->top_samples_available&0x8000)){
899 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
901 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
904 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
909 if(!(h->left_samples_available&0x8000)){
911 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
913 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
916 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
922 } //FIXME cleanup like next
925 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
927 static inline int check_intra_pred_mode(H264Context *h, int mode){
928 MpegEncContext * const s = &h->s;
929 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
930 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
932 if(mode < 0 || mode > 6) {
933 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
937 if(!(h->top_samples_available&0x8000)){
940 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
945 if(!(h->left_samples_available&0x8000)){
948 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
957 * gets the predicted intra4x4 prediction mode.
959 static inline int pred_intra_mode(H264Context *h, int n){
960 const int index8= scan8[n];
961 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
962 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
963 const int min= FFMIN(left, top);
965 tprintf("mode:%d %d min:%d\n", left ,top, min);
967 if(min<0) return DC_PRED;
971 static inline void write_back_non_zero_count(H264Context *h){
972 MpegEncContext * const s = &h->s;
973 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
975 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
976 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
977 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
978 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
979 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
980 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
981 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
983 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
984 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
985 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
987 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
988 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
989 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
993 * gets the predicted number of non zero coefficients.
994 * @param n block index
996 static inline int pred_non_zero_count(H264Context *h, int n){
997 const int index8= scan8[n];
998 const int left= h->non_zero_count_cache[index8 - 1];
999 const int top = h->non_zero_count_cache[index8 - 8];
1002 if(i<64) i= (i+1)>>1;
1004 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1009 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1010 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1012 if(topright_ref != PART_NOT_AVAILABLE){
1013 *C= h->mv_cache[list][ i - 8 + part_width ];
1014 return topright_ref;
1016 tprintf("topright MV not available\n");
1018 *C= h->mv_cache[list][ i - 8 - 1 ];
1019 return h->ref_cache[list][ i - 8 - 1 ];
1024 * gets the predicted MV.
1025 * @param n the block index
1026 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1027 * @param mx the x component of the predicted motion vector
1028 * @param my the y component of the predicted motion vector
1030 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1031 const int index8= scan8[n];
1032 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1033 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1034 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1035 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1037 int diagonal_ref, match_count;
1039 assert(part_width==1 || part_width==2 || part_width==4);
1049 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1050 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1051 tprintf("pred_motion match_count=%d\n", match_count);
1052 if(match_count > 1){ //most common
1053 *mx= mid_pred(A[0], B[0], C[0]);
1054 *my= mid_pred(A[1], B[1], C[1]);
1055 }else if(match_count==1){
1059 }else if(top_ref==ref){
1067 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1071 *mx= mid_pred(A[0], B[0], C[0]);
1072 *my= mid_pred(A[1], B[1], C[1]);
1076 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1080 * gets the directionally predicted 16x8 MV.
1081 * @param n the block index
1082 * @param mx the x component of the predicted motion vector
1083 * @param my the y component of the predicted motion vector
1085 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1087 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1088 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1090 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1098 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1099 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1101 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1103 if(left_ref == ref){
1111 pred_motion(h, n, 4, list, ref, mx, my);
1115 * gets the directionally predicted 8x16 MV.
1116 * @param n the block index
1117 * @param mx the x component of the predicted motion vector
1118 * @param my the y component of the predicted motion vector
1120 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1122 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1123 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1125 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1127 if(left_ref == ref){
1136 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1138 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1140 if(diagonal_ref == ref){
1148 pred_motion(h, n, 2, list, ref, mx, my);
1151 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1152 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1153 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1155 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1157 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1158 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1159 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1165 pred_motion(h, 0, 4, 0, 0, mx, my);
1170 static inline void direct_dist_scale_factor(H264Context * const h){
1171 const int poc = h->s.current_picture_ptr->poc;
1172 const int poc1 = h->ref_list[1][0].poc;
1174 for(i=0; i<h->ref_count[0]; i++){
1175 int poc0 = h->ref_list[0][i].poc;
1176 int td = clip(poc1 - poc0, -128, 127);
1177 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1178 h->dist_scale_factor[i] = 256;
1180 int tb = clip(poc - poc0, -128, 127);
1181 int tx = (16384 + (ABS(td) >> 1)) / td;
1182 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1186 static inline void direct_ref_list_init(H264Context * const h){
1187 MpegEncContext * const s = &h->s;
1188 Picture * const ref1 = &h->ref_list[1][0];
1189 Picture * const cur = s->current_picture_ptr;
1191 if(cur->pict_type == I_TYPE)
1192 cur->ref_count[0] = 0;
1193 if(cur->pict_type != B_TYPE)
1194 cur->ref_count[1] = 0;
1195 for(list=0; list<2; list++){
1196 cur->ref_count[list] = h->ref_count[list];
1197 for(j=0; j<h->ref_count[list]; j++)
1198 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1200 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1202 for(list=0; list<2; list++){
1203 for(i=0; i<ref1->ref_count[list]; i++){
1204 const int poc = ref1->ref_poc[list][i];
1205 h->map_col_to_list0[list][i] = PART_NOT_AVAILABLE;
1206 for(j=0; j<h->ref_count[list]; j++)
1207 if(h->ref_list[list][j].poc == poc){
1208 h->map_col_to_list0[list][i] = j;
1215 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1216 MpegEncContext * const s = &h->s;
1217 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1218 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1219 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1220 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1221 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1222 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1223 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1224 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1225 const int is_b8x8 = IS_8X8(*mb_type);
1229 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1230 /* FIXME save sub mb types from previous frames (or derive from MVs)
1231 * so we know exactly what block size to use */
1232 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1233 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1234 }else if(!is_b8x8 && (IS_16X16(mb_type_col) || IS_INTRA(mb_type_col))){
1235 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1236 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1238 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1239 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1242 *mb_type |= MB_TYPE_DIRECT2;
1244 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1246 if(h->direct_spatial_mv_pred){
1251 /* ref = min(neighbors) */
1252 for(list=0; list<2; list++){
1253 int refa = h->ref_cache[list][scan8[0] - 1];
1254 int refb = h->ref_cache[list][scan8[0] - 8];
1255 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1257 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1259 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1261 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1267 if(ref[0] < 0 && ref[1] < 0){
1268 ref[0] = ref[1] = 0;
1269 mv[0][0] = mv[0][1] =
1270 mv[1][0] = mv[1][1] = 0;
1272 for(list=0; list<2; list++){
1274 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1276 mv[list][0] = mv[list][1] = 0;
1281 *mb_type &= ~MB_TYPE_P0L1;
1282 sub_mb_type &= ~MB_TYPE_P0L1;
1283 }else if(ref[0] < 0){
1284 *mb_type &= ~MB_TYPE_P0L0;
1285 sub_mb_type &= ~MB_TYPE_P0L0;
1288 if(IS_16X16(*mb_type)){
1289 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1290 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1291 if(!IS_INTRA(mb_type_col)
1292 && ( (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1)
1293 || (l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1
1294 && (h->x264_build>33 || !h->x264_build)))){
1296 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1298 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1300 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1302 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1304 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1305 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1308 for(i8=0; i8<4; i8++){
1309 const int x8 = i8&1;
1310 const int y8 = i8>>1;
1312 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1314 h->sub_mb_type[i8] = sub_mb_type;
1316 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1317 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1318 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1319 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1322 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1323 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1324 && (h->x264_build>33 || !h->x264_build)))){
1325 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1326 if(IS_SUB_8X8(sub_mb_type)){
1327 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1328 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1330 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1332 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1335 for(i4=0; i4<4; i4++){
1336 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1337 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1339 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1341 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1347 }else{ /* direct temporal mv pred */
1348 if(IS_16X16(*mb_type)){
1349 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1350 if(IS_INTRA(mb_type_col)){
1351 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1352 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1353 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1355 const int ref0 = l1ref0[0] >= 0 ? h->map_col_to_list0[0][l1ref0[0]]
1356 : h->map_col_to_list0[1][l1ref1[0]];
1357 const int dist_scale_factor = h->dist_scale_factor[ref0];
1358 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1360 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1361 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1362 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1363 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1364 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1367 for(i8=0; i8<4; i8++){
1368 const int x8 = i8&1;
1369 const int y8 = i8>>1;
1370 int ref0, dist_scale_factor;
1371 const int16_t (*l1mv)[2]= l1mv0;
1373 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1375 h->sub_mb_type[i8] = sub_mb_type;
1376 if(IS_INTRA(mb_type_col)){
1377 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1378 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1379 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1380 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1384 ref0 = l1ref0[x8 + y8*h->b8_stride];
1386 ref0 = h->map_col_to_list0[0][ref0];
1388 ref0 = h->map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1391 dist_scale_factor = h->dist_scale_factor[ref0];
1393 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1394 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1395 if(IS_SUB_8X8(sub_mb_type)){
1396 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1397 int mx = (dist_scale_factor * mv_col[0] + 128) >> 8;
1398 int my = (dist_scale_factor * mv_col[1] + 128) >> 8;
1399 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1400 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1402 for(i4=0; i4<4; i4++){
1403 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1404 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1405 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1406 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1407 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1408 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1415 static inline void write_back_motion(H264Context *h, int mb_type){
1416 MpegEncContext * const s = &h->s;
1417 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1418 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1421 for(list=0; list<2; list++){
1423 if(!USES_LIST(mb_type, list)){
1424 if(1){ //FIXME skip or never read if mb_type doesn't use it
1426 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
1427 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
1429 if( h->pps.cabac ) {
1430 /* FIXME needed ? */
1432 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]=
1433 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= 0;
1437 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]=
1438 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= LIST_NOT_USED;
1445 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1446 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1448 if( h->pps.cabac ) {
1450 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1451 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1455 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
1456 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
1460 if(h->slice_type == B_TYPE && h->pps.cabac){
1461 if(IS_8X8(mb_type)){
1462 h->direct_table[b8_xy+1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1463 h->direct_table[b8_xy+0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1464 h->direct_table[b8_xy+1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1470 * Decodes a network abstraction layer unit.
1471 * @param consumed is the number of bytes used as input
1472 * @param length is the length of the array
1473 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1474 * @returns decoded bytes, might be src+1 if no escapes
1476 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1480 // src[0]&0x80; //forbidden bit
1481 h->nal_ref_idc= src[0]>>5;
1482 h->nal_unit_type= src[0]&0x1F;
1486 for(i=0; i<length; i++)
1487 printf("%2X ", src[i]);
1489 for(i=0; i+1<length; i+=2){
1490 if(src[i]) continue;
1491 if(i>0 && src[i-1]==0) i--;
1492 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1494 /* startcode, so we must be past the end */
1501 if(i>=length-1){ //no escaped 0
1502 *dst_length= length;
1503 *consumed= length+1; //+1 for the header
1507 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1508 dst= h->rbsp_buffer;
1510 //printf("decoding esc\n");
1513 //remove escapes (very rare 1:2^22)
1514 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1515 if(src[si+2]==3){ //escape
1520 }else //next start code
1524 dst[di++]= src[si++];
1528 *consumed= si + 1;//+1 for the header
1529 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1535 * @param src the data which should be escaped
1536 * @param dst the target buffer, dst+1 == src is allowed as a special case
1537 * @param length the length of the src data
1538 * @param dst_length the length of the dst array
1539 * @returns length of escaped data in bytes or -1 if an error occured
1541 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1542 int i, escape_count, si, di;
1546 assert(dst_length>0);
1548 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1550 if(length==0) return 1;
1553 for(i=0; i<length; i+=2){
1554 if(src[i]) continue;
1555 if(i>0 && src[i-1]==0)
1557 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1563 if(escape_count==0){
1565 memcpy(dst+1, src, length);
1569 if(length + escape_count + 1> dst_length)
1572 //this should be damn rare (hopefully)
1574 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1575 temp= h->rbsp_buffer;
1576 //printf("encoding esc\n");
1581 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1582 temp[di++]= 0; si++;
1583 temp[di++]= 0; si++;
1585 temp[di++]= src[si++];
1588 temp[di++]= src[si++];
1590 memcpy(dst+1, temp, length+escape_count);
1592 assert(di == length+escape_count);
1598 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1600 static void encode_rbsp_trailing(PutBitContext *pb){
1603 length= (-put_bits_count(pb))&7;
1604 if(length) put_bits(pb, length, 0);
1609 * identifies the exact end of the bitstream
1610 * @return the length of the trailing, or 0 if damaged
1612 static int decode_rbsp_trailing(uint8_t *src){
1616 tprintf("rbsp trailing %X\n", v);
1626 * idct tranforms the 16 dc values and dequantize them.
1627 * @param qp quantization parameter
1629 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1632 int temp[16]; //FIXME check if this is a good idea
1633 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1634 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1636 //memset(block, 64, 2*256);
1639 const int offset= y_offset[i];
1640 const int z0= block[offset+stride*0] + block[offset+stride*4];
1641 const int z1= block[offset+stride*0] - block[offset+stride*4];
1642 const int z2= block[offset+stride*1] - block[offset+stride*5];
1643 const int z3= block[offset+stride*1] + block[offset+stride*5];
1652 const int offset= x_offset[i];
1653 const int z0= temp[4*0+i] + temp[4*2+i];
1654 const int z1= temp[4*0+i] - temp[4*2+i];
1655 const int z2= temp[4*1+i] - temp[4*3+i];
1656 const int z3= temp[4*1+i] + temp[4*3+i];
1658 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1659 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1660 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1661 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1667 * dct tranforms the 16 dc values.
1668 * @param qp quantization parameter ??? FIXME
1670 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1671 // const int qmul= dequant_coeff[qp][0];
1673 int temp[16]; //FIXME check if this is a good idea
1674 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1675 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1678 const int offset= y_offset[i];
1679 const int z0= block[offset+stride*0] + block[offset+stride*4];
1680 const int z1= block[offset+stride*0] - block[offset+stride*4];
1681 const int z2= block[offset+stride*1] - block[offset+stride*5];
1682 const int z3= block[offset+stride*1] + block[offset+stride*5];
1691 const int offset= x_offset[i];
1692 const int z0= temp[4*0+i] + temp[4*2+i];
1693 const int z1= temp[4*0+i] - temp[4*2+i];
1694 const int z2= temp[4*1+i] - temp[4*3+i];
1695 const int z3= temp[4*1+i] + temp[4*3+i];
1697 block[stride*0 +offset]= (z0 + z3)>>1;
1698 block[stride*2 +offset]= (z1 + z2)>>1;
1699 block[stride*8 +offset]= (z1 - z2)>>1;
1700 block[stride*10+offset]= (z0 - z3)>>1;
1708 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1709 const int stride= 16*2;
1710 const int xStride= 16;
1713 a= block[stride*0 + xStride*0];
1714 b= block[stride*0 + xStride*1];
1715 c= block[stride*1 + xStride*0];
1716 d= block[stride*1 + xStride*1];
1723 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1724 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1725 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1726 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1730 static void chroma_dc_dct_c(DCTELEM *block){
1731 const int stride= 16*2;
1732 const int xStride= 16;
1735 a= block[stride*0 + xStride*0];
1736 b= block[stride*0 + xStride*1];
1737 c= block[stride*1 + xStride*0];
1738 d= block[stride*1 + xStride*1];
1745 block[stride*0 + xStride*0]= (a+c);
1746 block[stride*0 + xStride*1]= (e+b);
1747 block[stride*1 + xStride*0]= (a-c);
1748 block[stride*1 + xStride*1]= (e-b);
1753 * gets the chroma qp.
1755 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1757 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
1762 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1764 //FIXME try int temp instead of block
1767 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1768 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1769 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1770 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1771 const int z0= d0 + d3;
1772 const int z3= d0 - d3;
1773 const int z1= d1 + d2;
1774 const int z2= d1 - d2;
1776 block[0 + 4*i]= z0 + z1;
1777 block[1 + 4*i]= 2*z3 + z2;
1778 block[2 + 4*i]= z0 - z1;
1779 block[3 + 4*i]= z3 - 2*z2;
1783 const int z0= block[0*4 + i] + block[3*4 + i];
1784 const int z3= block[0*4 + i] - block[3*4 + i];
1785 const int z1= block[1*4 + i] + block[2*4 + i];
1786 const int z2= block[1*4 + i] - block[2*4 + i];
1788 block[0*4 + i]= z0 + z1;
1789 block[1*4 + i]= 2*z3 + z2;
1790 block[2*4 + i]= z0 - z1;
1791 block[3*4 + i]= z3 - 2*z2;
1796 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1797 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1798 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1800 const int * const quant_table= quant_coeff[qscale];
1801 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1802 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1803 const unsigned int threshold2= (threshold1<<1);
1809 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1810 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1811 const unsigned int dc_threshold2= (dc_threshold1<<1);
1813 int level= block[0]*quant_coeff[qscale+18][0];
1814 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1816 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1819 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1822 // last_non_zero = i;
1827 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1828 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1829 const unsigned int dc_threshold2= (dc_threshold1<<1);
1831 int level= block[0]*quant_table[0];
1832 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1834 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1837 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1840 // last_non_zero = i;
1853 const int j= scantable[i];
1854 int level= block[j]*quant_table[j];
1856 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1857 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1858 if(((unsigned)(level+threshold1))>threshold2){
1860 level= (bias + level)>>QUANT_SHIFT;
1863 level= (bias - level)>>QUANT_SHIFT;
1872 return last_non_zero;
1875 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1876 const uint32_t a= ((uint32_t*)(src-stride))[0];
1877 ((uint32_t*)(src+0*stride))[0]= a;
1878 ((uint32_t*)(src+1*stride))[0]= a;
1879 ((uint32_t*)(src+2*stride))[0]= a;
1880 ((uint32_t*)(src+3*stride))[0]= a;
1883 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1884 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1885 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1886 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1887 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1890 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1891 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1892 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1894 ((uint32_t*)(src+0*stride))[0]=
1895 ((uint32_t*)(src+1*stride))[0]=
1896 ((uint32_t*)(src+2*stride))[0]=
1897 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1900 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1901 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1903 ((uint32_t*)(src+0*stride))[0]=
1904 ((uint32_t*)(src+1*stride))[0]=
1905 ((uint32_t*)(src+2*stride))[0]=
1906 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1909 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1910 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1912 ((uint32_t*)(src+0*stride))[0]=
1913 ((uint32_t*)(src+1*stride))[0]=
1914 ((uint32_t*)(src+2*stride))[0]=
1915 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1918 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1919 ((uint32_t*)(src+0*stride))[0]=
1920 ((uint32_t*)(src+1*stride))[0]=
1921 ((uint32_t*)(src+2*stride))[0]=
1922 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1926 #define LOAD_TOP_RIGHT_EDGE\
1927 const int t4= topright[0];\
1928 const int t5= topright[1];\
1929 const int t6= topright[2];\
1930 const int t7= topright[3];\
1932 #define LOAD_LEFT_EDGE\
1933 const int l0= src[-1+0*stride];\
1934 const int l1= src[-1+1*stride];\
1935 const int l2= src[-1+2*stride];\
1936 const int l3= src[-1+3*stride];\
1938 #define LOAD_TOP_EDGE\
1939 const int t0= src[ 0-1*stride];\
1940 const int t1= src[ 1-1*stride];\
1941 const int t2= src[ 2-1*stride];\
1942 const int t3= src[ 3-1*stride];\
1944 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1945 const int lt= src[-1-1*stride];
1949 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
1951 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
1954 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
1958 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1961 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1963 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1964 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1967 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1972 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1974 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1977 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1981 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1984 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1986 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1987 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1990 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1991 const int lt= src[-1-1*stride];
1994 const __attribute__((unused)) int unu= l3;
1997 src[1+2*stride]=(lt + t0 + 1)>>1;
1999 src[2+2*stride]=(t0 + t1 + 1)>>1;
2001 src[3+2*stride]=(t1 + t2 + 1)>>1;
2002 src[3+0*stride]=(t2 + t3 + 1)>>1;
2004 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2006 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2008 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2009 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2010 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2011 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2014 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2017 const __attribute__((unused)) int unu= t7;
2019 src[0+0*stride]=(t0 + t1 + 1)>>1;
2021 src[0+2*stride]=(t1 + t2 + 1)>>1;
2023 src[1+2*stride]=(t2 + t3 + 1)>>1;
2025 src[2+2*stride]=(t3 + t4+ 1)>>1;
2026 src[3+2*stride]=(t4 + t5+ 1)>>1;
2027 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2029 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2031 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2033 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2034 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2037 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2040 src[0+0*stride]=(l0 + l1 + 1)>>1;
2041 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2043 src[0+1*stride]=(l1 + l2 + 1)>>1;
2045 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2047 src[0+2*stride]=(l2 + l3 + 1)>>1;
2049 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2058 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2059 const int lt= src[-1-1*stride];
2062 const __attribute__((unused)) int unu= t3;
2065 src[2+1*stride]=(lt + l0 + 1)>>1;
2067 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2068 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2069 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2071 src[2+2*stride]=(l0 + l1 + 1)>>1;
2073 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2075 src[2+3*stride]=(l1 + l2+ 1)>>1;
2077 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2078 src[0+3*stride]=(l2 + l3 + 1)>>1;
2079 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2082 static void pred16x16_vertical_c(uint8_t *src, int stride){
2084 const uint32_t a= ((uint32_t*)(src-stride))[0];
2085 const uint32_t b= ((uint32_t*)(src-stride))[1];
2086 const uint32_t c= ((uint32_t*)(src-stride))[2];
2087 const uint32_t d= ((uint32_t*)(src-stride))[3];
2089 for(i=0; i<16; i++){
2090 ((uint32_t*)(src+i*stride))[0]= a;
2091 ((uint32_t*)(src+i*stride))[1]= b;
2092 ((uint32_t*)(src+i*stride))[2]= c;
2093 ((uint32_t*)(src+i*stride))[3]= d;
2097 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2100 for(i=0; i<16; i++){
2101 ((uint32_t*)(src+i*stride))[0]=
2102 ((uint32_t*)(src+i*stride))[1]=
2103 ((uint32_t*)(src+i*stride))[2]=
2104 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2108 static void pred16x16_dc_c(uint8_t *src, int stride){
2112 dc+= src[-1+i*stride];
2119 dc= 0x01010101*((dc + 16)>>5);
2121 for(i=0; i<16; i++){
2122 ((uint32_t*)(src+i*stride))[0]=
2123 ((uint32_t*)(src+i*stride))[1]=
2124 ((uint32_t*)(src+i*stride))[2]=
2125 ((uint32_t*)(src+i*stride))[3]= dc;
2129 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2133 dc+= src[-1+i*stride];
2136 dc= 0x01010101*((dc + 8)>>4);
2138 for(i=0; i<16; i++){
2139 ((uint32_t*)(src+i*stride))[0]=
2140 ((uint32_t*)(src+i*stride))[1]=
2141 ((uint32_t*)(src+i*stride))[2]=
2142 ((uint32_t*)(src+i*stride))[3]= dc;
2146 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2152 dc= 0x01010101*((dc + 8)>>4);
2154 for(i=0; i<16; i++){
2155 ((uint32_t*)(src+i*stride))[0]=
2156 ((uint32_t*)(src+i*stride))[1]=
2157 ((uint32_t*)(src+i*stride))[2]=
2158 ((uint32_t*)(src+i*stride))[3]= dc;
2162 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2165 for(i=0; i<16; i++){
2166 ((uint32_t*)(src+i*stride))[0]=
2167 ((uint32_t*)(src+i*stride))[1]=
2168 ((uint32_t*)(src+i*stride))[2]=
2169 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2173 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2176 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2177 const uint8_t * const src0 = src+7-stride;
2178 const uint8_t *src1 = src+8*stride-1;
2179 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2180 int H = src0[1] - src0[-1];
2181 int V = src1[0] - src2[ 0];
2182 for(k=2; k<=8; ++k) {
2183 src1 += stride; src2 -= stride;
2184 H += k*(src0[k] - src0[-k]);
2185 V += k*(src1[0] - src2[ 0]);
2188 H = ( 5*(H/4) ) / 16;
2189 V = ( 5*(V/4) ) / 16;
2191 /* required for 100% accuracy */
2192 i = H; H = V; V = i;
2194 H = ( 5*H+32 ) >> 6;
2195 V = ( 5*V+32 ) >> 6;
2198 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2199 for(j=16; j>0; --j) {
2202 for(i=-16; i<0; i+=4) {
2203 src[16+i] = cm[ (b ) >> 5 ];
2204 src[17+i] = cm[ (b+ H) >> 5 ];
2205 src[18+i] = cm[ (b+2*H) >> 5 ];
2206 src[19+i] = cm[ (b+3*H) >> 5 ];
2213 static void pred16x16_plane_c(uint8_t *src, int stride){
2214 pred16x16_plane_compat_c(src, stride, 0);
2217 static void pred8x8_vertical_c(uint8_t *src, int stride){
2219 const uint32_t a= ((uint32_t*)(src-stride))[0];
2220 const uint32_t b= ((uint32_t*)(src-stride))[1];
2223 ((uint32_t*)(src+i*stride))[0]= a;
2224 ((uint32_t*)(src+i*stride))[1]= b;
2228 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2232 ((uint32_t*)(src+i*stride))[0]=
2233 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2237 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2241 ((uint32_t*)(src+i*stride))[0]=
2242 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2246 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2252 dc0+= src[-1+i*stride];
2253 dc2+= src[-1+(i+4)*stride];
2255 dc0= 0x01010101*((dc0 + 2)>>2);
2256 dc2= 0x01010101*((dc2 + 2)>>2);
2259 ((uint32_t*)(src+i*stride))[0]=
2260 ((uint32_t*)(src+i*stride))[1]= dc0;
2263 ((uint32_t*)(src+i*stride))[0]=
2264 ((uint32_t*)(src+i*stride))[1]= dc2;
2268 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2274 dc0+= src[i-stride];
2275 dc1+= src[4+i-stride];
2277 dc0= 0x01010101*((dc0 + 2)>>2);
2278 dc1= 0x01010101*((dc1 + 2)>>2);
2281 ((uint32_t*)(src+i*stride))[0]= dc0;
2282 ((uint32_t*)(src+i*stride))[1]= dc1;
2285 ((uint32_t*)(src+i*stride))[0]= dc0;
2286 ((uint32_t*)(src+i*stride))[1]= dc1;
2291 static void pred8x8_dc_c(uint8_t *src, int stride){
2293 int dc0, dc1, dc2, dc3;
2297 dc0+= src[-1+i*stride] + src[i-stride];
2298 dc1+= src[4+i-stride];
2299 dc2+= src[-1+(i+4)*stride];
2301 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2302 dc0= 0x01010101*((dc0 + 4)>>3);
2303 dc1= 0x01010101*((dc1 + 2)>>2);
2304 dc2= 0x01010101*((dc2 + 2)>>2);
2307 ((uint32_t*)(src+i*stride))[0]= dc0;
2308 ((uint32_t*)(src+i*stride))[1]= dc1;
2311 ((uint32_t*)(src+i*stride))[0]= dc2;
2312 ((uint32_t*)(src+i*stride))[1]= dc3;
2316 static void pred8x8_plane_c(uint8_t *src, int stride){
2319 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2320 const uint8_t * const src0 = src+3-stride;
2321 const uint8_t *src1 = src+4*stride-1;
2322 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2323 int H = src0[1] - src0[-1];
2324 int V = src1[0] - src2[ 0];
2325 for(k=2; k<=4; ++k) {
2326 src1 += stride; src2 -= stride;
2327 H += k*(src0[k] - src0[-k]);
2328 V += k*(src1[0] - src2[ 0]);
2330 H = ( 17*H+16 ) >> 5;
2331 V = ( 17*V+16 ) >> 5;
2333 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2334 for(j=8; j>0; --j) {
2337 src[0] = cm[ (b ) >> 5 ];
2338 src[1] = cm[ (b+ H) >> 5 ];
2339 src[2] = cm[ (b+2*H) >> 5 ];
2340 src[3] = cm[ (b+3*H) >> 5 ];
2341 src[4] = cm[ (b+4*H) >> 5 ];
2342 src[5] = cm[ (b+5*H) >> 5 ];
2343 src[6] = cm[ (b+6*H) >> 5 ];
2344 src[7] = cm[ (b+7*H) >> 5 ];
2349 #define SRC(x,y) src[(x)+(y)*stride]
2351 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2352 #define PREDICT_8x8_LOAD_LEFT \
2353 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2354 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2355 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2356 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2359 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2360 #define PREDICT_8x8_LOAD_TOP \
2361 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2362 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2363 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2364 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2365 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2368 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2369 #define PREDICT_8x8_LOAD_TOPRIGHT \
2370 int t8, t9, t10, t11, t12, t13, t14, t15; \
2371 if(has_topright) { \
2372 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2373 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2374 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2376 #define PREDICT_8x8_LOAD_TOPLEFT \
2377 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2379 #define PREDICT_8x8_DC(v) \
2381 for( y = 0; y < 8; y++ ) { \
2382 ((uint32_t*)src)[0] = \
2383 ((uint32_t*)src)[1] = v; \
2387 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2389 PREDICT_8x8_DC(0x80808080);
2391 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2393 PREDICT_8x8_LOAD_LEFT;
2394 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2397 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2399 PREDICT_8x8_LOAD_TOP;
2400 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2403 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2405 PREDICT_8x8_LOAD_LEFT;
2406 PREDICT_8x8_LOAD_TOP;
2407 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2408 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2411 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2413 PREDICT_8x8_LOAD_LEFT;
2414 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2415 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2416 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2419 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2422 PREDICT_8x8_LOAD_TOP;
2431 for( y = 1; y < 8; y++ )
2432 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2434 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2436 PREDICT_8x8_LOAD_TOP;
2437 PREDICT_8x8_LOAD_TOPRIGHT;
2438 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2439 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2440 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2441 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2442 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2443 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2444 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2445 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2446 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2447 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2448 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2449 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2450 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2451 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2452 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2454 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2456 PREDICT_8x8_LOAD_TOP;
2457 PREDICT_8x8_LOAD_LEFT;
2458 PREDICT_8x8_LOAD_TOPLEFT;
2459 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2460 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2461 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2462 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2463 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2464 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2465 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2466 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2467 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2468 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2469 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2470 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2471 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2472 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2473 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2476 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2478 PREDICT_8x8_LOAD_TOP;
2479 PREDICT_8x8_LOAD_LEFT;
2480 PREDICT_8x8_LOAD_TOPLEFT;
2481 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2482 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2483 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2484 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2485 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2486 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2487 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2488 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2489 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2490 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2491 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2492 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2493 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2494 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2495 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2496 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2497 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2498 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2499 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2500 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2501 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2502 SRC(7,0)= (t6 + t7 + 1) >> 1;
2504 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2506 PREDICT_8x8_LOAD_TOP;
2507 PREDICT_8x8_LOAD_LEFT;
2508 PREDICT_8x8_LOAD_TOPLEFT;
2509 SRC(0,7)= (l6 + l7 + 1) >> 1;
2510 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2511 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2512 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2513 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2514 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2515 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2516 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2517 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2518 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2519 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2520 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2521 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2522 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2523 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2524 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2525 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2526 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2527 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2528 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2529 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2530 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2532 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2534 PREDICT_8x8_LOAD_TOP;
2535 PREDICT_8x8_LOAD_TOPRIGHT;
2536 SRC(0,0)= (t0 + t1 + 1) >> 1;
2537 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2538 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2539 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2540 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2541 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2542 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2543 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2544 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2545 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2546 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2547 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2548 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2549 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2550 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2551 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2552 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2553 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2554 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2555 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2556 SRC(7,6)= (t10 + t11 + 1) >> 1;
2557 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2559 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2561 PREDICT_8x8_LOAD_LEFT;
2562 SRC(0,0)= (l0 + l1 + 1) >> 1;
2563 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2564 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2565 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2566 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2567 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2568 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2569 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2570 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2571 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2572 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2573 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2574 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2575 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2576 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2577 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2578 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2579 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2581 #undef PREDICT_8x8_LOAD_LEFT
2582 #undef PREDICT_8x8_LOAD_TOP
2583 #undef PREDICT_8x8_LOAD_TOPLEFT
2584 #undef PREDICT_8x8_LOAD_TOPRIGHT
2585 #undef PREDICT_8x8_DC
2591 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2592 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2593 int src_x_offset, int src_y_offset,
2594 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2595 MpegEncContext * const s = &h->s;
2596 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2597 const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2598 const int luma_xy= (mx&3) + ((my&3)<<2);
2599 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
2600 uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
2601 uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
2602 int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
2603 int extra_height= extra_width;
2605 const int full_mx= mx>>2;
2606 const int full_my= my>>2;
2607 const int pic_width = 16*s->mb_width;
2608 const int pic_height = 16*s->mb_height;
2610 assert(pic->data[0]);
2612 if(mx&7) extra_width -= 3;
2613 if(my&7) extra_height -= 3;
2615 if( full_mx < 0-extra_width
2616 || full_my < 0-extra_height
2617 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2618 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2619 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2620 src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
2624 qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
2626 qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
2629 if(s->flags&CODEC_FLAG_GRAY) return;
2632 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2633 src_cb= s->edge_emu_buffer;
2635 chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
2638 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2639 src_cr= s->edge_emu_buffer;
2641 chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
2644 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2645 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2646 int x_offset, int y_offset,
2647 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2648 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2649 int list0, int list1){
2650 MpegEncContext * const s = &h->s;
2651 qpel_mc_func *qpix_op= qpix_put;
2652 h264_chroma_mc_func chroma_op= chroma_put;
2654 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2655 dest_cb += x_offset + y_offset*s->uvlinesize;
2656 dest_cr += x_offset + y_offset*s->uvlinesize;
2657 x_offset += 8*s->mb_x;
2658 y_offset += 8*s->mb_y;
2661 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2662 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2663 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2664 qpix_op, chroma_op);
2667 chroma_op= chroma_avg;
2671 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2672 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2673 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2674 qpix_op, chroma_op);
2678 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2679 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2680 int x_offset, int y_offset,
2681 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2682 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2683 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2684 int list0, int list1){
2685 MpegEncContext * const s = &h->s;
2687 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2688 dest_cb += x_offset + y_offset*s->uvlinesize;
2689 dest_cr += x_offset + y_offset*s->uvlinesize;
2690 x_offset += 8*s->mb_x;
2691 y_offset += 8*s->mb_y;
2694 /* don't optimize for luma-only case, since B-frames usually
2695 * use implicit weights => chroma too. */
2696 uint8_t *tmp_cb = s->obmc_scratchpad;
2697 uint8_t *tmp_cr = tmp_cb + 8*s->uvlinesize;
2698 uint8_t *tmp_y = tmp_cr + 8*s->uvlinesize;
2699 int refn0 = h->ref_cache[0][ scan8[n] ];
2700 int refn1 = h->ref_cache[1][ scan8[n] ];
2702 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2703 dest_y, dest_cb, dest_cr,
2704 x_offset, y_offset, qpix_put, chroma_put);
2705 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2706 tmp_y, tmp_cb, tmp_cr,
2707 x_offset, y_offset, qpix_put, chroma_put);
2709 if(h->use_weight == 2){
2710 int weight0 = h->implicit_weight[refn0][refn1];
2711 int weight1 = 64 - weight0;
2712 luma_weight_avg( dest_y, tmp_y, s-> linesize, 5, weight0, weight1, 0, 0);
2713 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0, 0);
2714 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0, 0);
2716 luma_weight_avg(dest_y, tmp_y, s->linesize, h->luma_log2_weight_denom,
2717 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2718 h->luma_offset[0][refn0], h->luma_offset[1][refn1]);
2719 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2720 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2721 h->chroma_offset[0][refn0][0], h->chroma_offset[1][refn1][0]);
2722 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2723 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2724 h->chroma_offset[0][refn0][1], h->chroma_offset[1][refn1][1]);
2727 int list = list1 ? 1 : 0;
2728 int refn = h->ref_cache[list][ scan8[n] ];
2729 Picture *ref= &h->ref_list[list][refn];
2730 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2731 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2732 qpix_put, chroma_put);
2734 luma_weight_op(dest_y, s->linesize, h->luma_log2_weight_denom,
2735 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2736 if(h->use_weight_chroma){
2737 chroma_weight_op(dest_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2738 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2739 chroma_weight_op(dest_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2740 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2745 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2746 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2747 int x_offset, int y_offset,
2748 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2749 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2750 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2751 int list0, int list1){
2752 if((h->use_weight==2 && list0 && list1
2753 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2754 || h->use_weight==1)
2755 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2756 x_offset, y_offset, qpix_put, chroma_put,
2757 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2759 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2760 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2763 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2764 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2765 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2766 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2767 MpegEncContext * const s = &h->s;
2768 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2769 const int mb_type= s->current_picture.mb_type[mb_xy];
2771 assert(IS_INTER(mb_type));
2773 if(IS_16X16(mb_type)){
2774 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2775 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2776 &weight_op[0], &weight_avg[0],
2777 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2778 }else if(IS_16X8(mb_type)){
2779 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2780 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2781 &weight_op[1], &weight_avg[1],
2782 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2783 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2784 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2785 &weight_op[1], &weight_avg[1],
2786 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2787 }else if(IS_8X16(mb_type)){
2788 mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
2789 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2790 &weight_op[2], &weight_avg[2],
2791 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2792 mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
2793 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2794 &weight_op[2], &weight_avg[2],
2795 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2799 assert(IS_8X8(mb_type));
2802 const int sub_mb_type= h->sub_mb_type[i];
2804 int x_offset= (i&1)<<2;
2805 int y_offset= (i&2)<<1;
2807 if(IS_SUB_8X8(sub_mb_type)){
2808 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2809 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2810 &weight_op[3], &weight_avg[3],
2811 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2812 }else if(IS_SUB_8X4(sub_mb_type)){
2813 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2814 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2815 &weight_op[4], &weight_avg[4],
2816 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2817 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2818 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2819 &weight_op[4], &weight_avg[4],
2820 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2821 }else if(IS_SUB_4X8(sub_mb_type)){
2822 mc_part(h, n , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2823 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2824 &weight_op[5], &weight_avg[5],
2825 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2826 mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2827 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2828 &weight_op[5], &weight_avg[5],
2829 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2832 assert(IS_SUB_4X4(sub_mb_type));
2834 int sub_x_offset= x_offset + 2*(j&1);
2835 int sub_y_offset= y_offset + (j&2);
2836 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2837 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2838 &weight_op[6], &weight_avg[6],
2839 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2846 static void decode_init_vlc(H264Context *h){
2847 static int done = 0;
2853 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
2854 &chroma_dc_coeff_token_len [0], 1, 1,
2855 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
2858 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
2859 &coeff_token_len [i][0], 1, 1,
2860 &coeff_token_bits[i][0], 1, 1, 1);
2864 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2865 &chroma_dc_total_zeros_len [i][0], 1, 1,
2866 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
2868 for(i=0; i<15; i++){
2869 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
2870 &total_zeros_len [i][0], 1, 1,
2871 &total_zeros_bits[i][0], 1, 1, 1);
2875 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2876 &run_len [i][0], 1, 1,
2877 &run_bits[i][0], 1, 1, 1);
2879 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2880 &run_len [6][0], 1, 1,
2881 &run_bits[6][0], 1, 1, 1);
2886 * Sets the intra prediction function pointers.
2888 static void init_pred_ptrs(H264Context *h){
2889 // MpegEncContext * const s = &h->s;
2891 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
2892 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
2893 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
2894 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2895 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2896 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
2897 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
2898 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
2899 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
2900 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
2901 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
2902 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
2904 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
2905 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
2906 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
2907 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
2908 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
2909 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
2910 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
2911 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
2912 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
2913 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
2914 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
2915 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
2917 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
2918 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
2919 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
2920 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
2921 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
2922 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
2923 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
2925 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
2926 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
2927 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
2928 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
2929 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
2930 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
2931 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
2934 static void free_tables(H264Context *h){
2935 av_freep(&h->intra4x4_pred_mode);
2936 av_freep(&h->chroma_pred_mode_table);
2937 av_freep(&h->cbp_table);
2938 av_freep(&h->mvd_table[0]);
2939 av_freep(&h->mvd_table[1]);
2940 av_freep(&h->direct_table);
2941 av_freep(&h->non_zero_count);
2942 av_freep(&h->slice_table_base);
2943 av_freep(&h->top_borders[1]);
2944 av_freep(&h->top_borders[0]);
2945 h->slice_table= NULL;
2947 av_freep(&h->mb2b_xy);
2948 av_freep(&h->mb2b8_xy);
2950 av_freep(&h->s.obmc_scratchpad);
2953 static void init_dequant8_coeff_table(H264Context *h){
2955 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2956 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2958 for(i=0; i<2; i++ ){
2959 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2960 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2964 for(q=0; q<52; q++){
2965 int shift = div6[q];
2968 h->dequant8_coeff[i][q][x] = ((uint32_t)dequant8_coeff_init[idx][
2969 dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * h->pps.scaling_matrix8[i][x]) << shift;
2974 static void init_dequant4_coeff_table(H264Context *h){
2976 for(i=0; i<6; i++ ){
2977 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2979 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2980 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2987 for(q=0; q<52; q++){
2988 int shift = div6[q] + 2;
2991 h->dequant4_coeff[i][q][x] = ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2992 h->pps.scaling_matrix4[i][x]) << shift;
2997 static void init_dequant_tables(H264Context *h){
2999 init_dequant4_coeff_table(h);
3000 if(h->pps.transform_8x8_mode)
3001 init_dequant8_coeff_table(h);
3002 if(h->sps.transform_bypass){
3005 h->dequant4_coeff[i][0][x] = 1<<6;
3006 if(h->pps.transform_8x8_mode)
3009 h->dequant8_coeff[i][0][x] = 1<<6;
3016 * needs width/height
3018 static int alloc_tables(H264Context *h){
3019 MpegEncContext * const s = &h->s;
3020 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3023 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3025 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3026 CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t))
3027 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3028 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3029 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3031 if( h->pps.cabac ) {
3032 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3033 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3034 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3035 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3038 memset(h->slice_table_base, -1, big_mb_num * sizeof(uint8_t));
3039 h->slice_table= h->slice_table_base + s->mb_stride + 1;
3041 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3042 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3043 for(y=0; y<s->mb_height; y++){
3044 for(x=0; x<s->mb_width; x++){
3045 const int mb_xy= x + y*s->mb_stride;
3046 const int b_xy = 4*x + 4*y*h->b_stride;
3047 const int b8_xy= 2*x + 2*y*h->b8_stride;
3049 h->mb2b_xy [mb_xy]= b_xy;
3050 h->mb2b8_xy[mb_xy]= b8_xy;
3054 s->obmc_scratchpad = NULL;
3056 if(!h->dequant4_coeff[0])
3057 init_dequant_tables(h);
3065 static void common_init(H264Context *h){
3066 MpegEncContext * const s = &h->s;
3068 s->width = s->avctx->width;
3069 s->height = s->avctx->height;
3070 s->codec_id= s->avctx->codec->id;
3074 h->dequant_coeff_pps= -1;
3075 s->unrestricted_mv=1;
3076 s->decode=1; //FIXME
3078 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3079 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3082 static int decode_init(AVCodecContext *avctx){
3083 H264Context *h= avctx->priv_data;
3084 MpegEncContext * const s = &h->s;
3086 MPV_decode_defaults(s);
3091 s->out_format = FMT_H264;
3092 s->workaround_bugs= avctx->workaround_bugs;
3095 // s->decode_mb= ff_h263_decode_mb;
3097 avctx->pix_fmt= PIX_FMT_YUV420P;
3101 if(avctx->extradata_size > 0 && avctx->extradata &&
3102 *(char *)avctx->extradata == 1){
3112 static int frame_start(H264Context *h){
3113 MpegEncContext * const s = &h->s;
3116 if(MPV_frame_start(s, s->avctx) < 0)
3118 ff_er_frame_start(s);
3120 assert(s->linesize && s->uvlinesize);
3122 for(i=0; i<16; i++){
3123 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3124 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3127 h->block_offset[16+i]=
3128 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3129 h->block_offset[24+16+i]=
3130 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3133 /* can't be in alloc_tables because linesize isn't known there.
3134 * FIXME: redo bipred weight to not require extra buffer? */
3135 if(!s->obmc_scratchpad)
3136 s->obmc_scratchpad = av_malloc(16*s->linesize + 2*8*s->uvlinesize);
3138 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3142 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3143 MpegEncContext * const s = &h->s;
3147 src_cb -= uvlinesize;
3148 src_cr -= uvlinesize;
3150 // There are two lines saved, the line above the the top macroblock of a pair,
3151 // and the line above the bottom macroblock
3152 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3153 for(i=1; i<17; i++){
3154 h->left_border[i]= src_y[15+i* linesize];
3157 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3158 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3160 if(!(s->flags&CODEC_FLAG_GRAY)){
3161 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3162 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3164 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3165 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3167 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3168 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3172 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3173 MpegEncContext * const s = &h->s;
3176 int deblock_left = (s->mb_x > 0);
3177 int deblock_top = (s->mb_y > 0);
3179 src_y -= linesize + 1;
3180 src_cb -= uvlinesize + 1;
3181 src_cr -= uvlinesize + 1;
3183 #define XCHG(a,b,t,xchg)\
3190 for(i = !deblock_top; i<17; i++){
3191 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3196 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3197 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3198 if(s->mb_x+1 < s->mb_width){
3199 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3203 if(!(s->flags&CODEC_FLAG_GRAY)){
3205 for(i = !deblock_top; i<9; i++){
3206 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3207 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3211 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3212 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3217 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3218 MpegEncContext * const s = &h->s;
3221 src_y -= 2 * linesize;
3222 src_cb -= 2 * uvlinesize;
3223 src_cr -= 2 * uvlinesize;
3225 // There are two lines saved, the line above the the top macroblock of a pair,
3226 // and the line above the bottom macroblock
3227 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3228 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3229 for(i=2; i<34; i++){
3230 h->left_border[i]= src_y[15+i* linesize];
3233 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3234 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3235 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3236 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3238 if(!(s->flags&CODEC_FLAG_GRAY)){
3239 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3240 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3241 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3242 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3243 for(i=2; i<18; i++){
3244 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3245 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3247 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3248 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3249 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3250 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3254 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3255 MpegEncContext * const s = &h->s;
3258 int deblock_left = (s->mb_x > 0);
3259 int deblock_top = (s->mb_y > 0);
3261 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3263 src_y -= 2 * linesize + 1;
3264 src_cb -= 2 * uvlinesize + 1;
3265 src_cr -= 2 * uvlinesize + 1;
3267 #define XCHG(a,b,t,xchg)\
3274 for(i = (!deblock_top)<<1; i<34; i++){
3275 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3280 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3281 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3282 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3283 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3286 if(!(s->flags&CODEC_FLAG_GRAY)){
3288 for(i = (!deblock_top) << 1; i<18; i++){
3289 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3290 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3294 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3295 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3296 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3297 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3302 static void hl_decode_mb(H264Context *h){
3303 MpegEncContext * const s = &h->s;
3304 const int mb_x= s->mb_x;
3305 const int mb_y= s->mb_y;
3306 const int mb_xy= mb_x + mb_y*s->mb_stride;
3307 const int mb_type= s->current_picture.mb_type[mb_xy];
3308 uint8_t *dest_y, *dest_cb, *dest_cr;
3309 int linesize, uvlinesize /*dct_offset*/;
3311 int *block_offset = &h->block_offset[0];
3312 const unsigned int bottom = mb_y & 1;
3313 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3314 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3319 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3320 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3321 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3323 if (h->mb_field_decoding_flag) {
3324 linesize = s->linesize * 2;
3325 uvlinesize = s->uvlinesize * 2;
3326 block_offset = &h->block_offset[24];
3327 if(mb_y&1){ //FIXME move out of this func?
3328 dest_y -= s->linesize*15;
3329 dest_cb-= s->uvlinesize*7;
3330 dest_cr-= s->uvlinesize*7;
3333 linesize = s->linesize;
3334 uvlinesize = s->uvlinesize;
3335 // dct_offset = s->linesize * 16;
3338 idct_add = transform_bypass
3339 ? IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4
3340 : IS_8x8DCT(mb_type) ? s->dsp.h264_idct8_add : s->dsp.h264_idct_add;
3342 if (IS_INTRA_PCM(mb_type)) {
3345 // The pixels are stored in h->mb array in the same order as levels,
3346 // copy them in output in the correct order.
3347 for(i=0; i<16; i++) {
3348 for (y=0; y<4; y++) {
3349 for (x=0; x<4; x++) {
3350 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3354 for(i=16; i<16+4; i++) {
3355 for (y=0; y<4; y++) {
3356 for (x=0; x<4; x++) {
3357 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3361 for(i=20; i<20+4; i++) {
3362 for (y=0; y<4; y++) {
3363 for (x=0; x<4; x++) {
3364 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3369 if(IS_INTRA(mb_type)){
3370 if(h->deblocking_filter) {
3371 if (h->mb_aff_frame) {
3373 xchg_pair_border(h, dest_y, dest_cb, dest_cr, s->linesize, s->uvlinesize, 1);
3375 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3379 if(!(s->flags&CODEC_FLAG_GRAY)){
3380 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3381 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3384 if(IS_INTRA4x4(mb_type)){
3386 if(IS_8x8DCT(mb_type)){
3387 for(i=0; i<16; i+=4){
3388 uint8_t * const ptr= dest_y + block_offset[i];
3389 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3390 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3391 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3392 if(h->non_zero_count_cache[ scan8[i] ])
3393 idct_add(ptr, h->mb + i*16, linesize);
3396 for(i=0; i<16; i++){
3397 uint8_t * const ptr= dest_y + block_offset[i];
3399 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3402 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3403 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3404 assert(mb_y || linesize <= block_offset[i]);
3405 if(!topright_avail){
3406 tr= ptr[3 - linesize]*0x01010101;
3407 topright= (uint8_t*) &tr;
3409 topright= ptr + 4 - linesize;
3413 h->pred4x4[ dir ](ptr, topright, linesize);
3414 if(h->non_zero_count_cache[ scan8[i] ]){
3415 if(s->codec_id == CODEC_ID_H264)
3416 idct_add(ptr, h->mb + i*16, linesize);
3418 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3423 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3424 if(s->codec_id == CODEC_ID_H264){
3425 if(!transform_bypass)
3426 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3428 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3430 if(h->deblocking_filter) {
3431 if (h->mb_aff_frame) {
3433 uint8_t *pair_dest_y = s->current_picture.data[0] + ((mb_y-1) * 16* s->linesize ) + mb_x * 16;
3434 uint8_t *pair_dest_cb = s->current_picture.data[1] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3435 uint8_t *pair_dest_cr = s->current_picture.data[2] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3437 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3441 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3444 }else if(s->codec_id == CODEC_ID_H264){
3445 hl_motion(h, dest_y, dest_cb, dest_cr,
3446 s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
3447 s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
3448 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3452 if(!IS_INTRA4x4(mb_type)){
3453 if(s->codec_id == CODEC_ID_H264){
3454 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3455 for(i=0; i<16; i+=di){
3456 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3457 uint8_t * const ptr= dest_y + block_offset[i];
3458 idct_add(ptr, h->mb + i*16, linesize);
3462 for(i=0; i<16; i++){
3463 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3464 uint8_t * const ptr= dest_y + block_offset[i];
3465 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3471 if(!(s->flags&CODEC_FLAG_GRAY)){
3472 idct_add = transform_bypass ? s->dsp.add_pixels4 : s->dsp.h264_idct_add;
3473 if(!transform_bypass){
3474 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3475 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3477 if(s->codec_id == CODEC_ID_H264){
3478 for(i=16; i<16+4; i++){
3479 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3480 uint8_t * const ptr= dest_cb + block_offset[i];
3481 idct_add(ptr, h->mb + i*16, uvlinesize);
3484 for(i=20; i<20+4; i++){
3485 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3486 uint8_t * const ptr= dest_cr + block_offset[i];
3487 idct_add(ptr, h->mb + i*16, uvlinesize);
3491 for(i=16; i<16+4; i++){
3492 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3493 uint8_t * const ptr= dest_cb + block_offset[i];
3494 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3497 for(i=20; i<20+4; i++){
3498 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3499 uint8_t * const ptr= dest_cr + block_offset[i];
3500 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3506 if(h->deblocking_filter) {
3507 if (h->mb_aff_frame) {
3508 const int mb_y = s->mb_y - 1;
3509 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3510 const int mb_xy= mb_x + mb_y*s->mb_stride;
3511 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3512 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3513 uint8_t tmp = s->current_picture.data[1][384];
3514 if (!bottom) return;
3515 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3516 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3517 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3519 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3520 // TODO deblock a pair
3523 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3524 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3525 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3526 if (tmp != s->current_picture.data[1][384]) {
3527 tprintf("modified pixel 8,1 (1)\n");
3531 tprintf("call mbaff filter_mb\n");
3532 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3533 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3534 if (tmp != s->current_picture.data[1][384]) {
3535 tprintf("modified pixel 8,1 (2)\n");
3538 tprintf("call filter_mb\n");
3539 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3540 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3541 filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3547 * fills the default_ref_list.
3549 static int fill_default_ref_list(H264Context *h){
3550 MpegEncContext * const s = &h->s;
3552 int smallest_poc_greater_than_current = -1;
3553 Picture sorted_short_ref[32];
3555 if(h->slice_type==B_TYPE){
3559 /* sort frame according to poc in B slice */
3560 for(out_i=0; out_i<h->short_ref_count; out_i++){
3562 int best_poc=INT_MAX;
3564 for(i=0; i<h->short_ref_count; i++){
3565 const int poc= h->short_ref[i]->poc;
3566 if(poc > limit && poc < best_poc){
3572 assert(best_i != INT_MIN);
3575 sorted_short_ref[out_i]= *h->short_ref[best_i];
3576 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3577 if (-1 == smallest_poc_greater_than_current) {
3578 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3579 smallest_poc_greater_than_current = out_i;
3585 if(s->picture_structure == PICT_FRAME){
3586 if(h->slice_type==B_TYPE){
3588 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3590 // find the largest poc
3591 for(list=0; list<2; list++){
3594 int step= list ? -1 : 1;
3596 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3597 while(j<0 || j>= h->short_ref_count){
3598 if(j != -99 && step == (list ? -1 : 1))
3601 j= smallest_poc_greater_than_current + (step>>1);
3603 if(sorted_short_ref[j].reference != 3) continue;
3604 h->default_ref_list[list][index ]= sorted_short_ref[j];
3605 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3608 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3609 if(h->long_ref[i] == NULL) continue;
3610 if(h->long_ref[i]->reference != 3) continue;
3612 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3613 h->default_ref_list[ list ][index++].pic_id= i;;
3616 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3617 // swap the two first elements of L1 when
3618 // L0 and L1 are identical
3619 Picture temp= h->default_ref_list[1][0];
3620 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3621 h->default_ref_list[1][1] = temp;
3624 if(index < h->ref_count[ list ])
3625 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3629 for(i=0; i<h->short_ref_count; i++){
3630 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3631 h->default_ref_list[0][index ]= *h->short_ref[i];
3632 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3634 for(i = 0; i < 16; i++){
3635 if(h->long_ref[i] == NULL) continue;
3636 if(h->long_ref[i]->reference != 3) continue;
3637 h->default_ref_list[0][index ]= *h->long_ref[i];
3638 h->default_ref_list[0][index++].pic_id= i;;
3640 if(index < h->ref_count[0])
3641 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3644 if(h->slice_type==B_TYPE){
3646 //FIXME second field balh
3650 for (i=0; i<h->ref_count[0]; i++) {
3651 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3653 if(h->slice_type==B_TYPE){
3654 for (i=0; i<h->ref_count[1]; i++) {
3655 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3662 static void print_short_term(H264Context *h);
3663 static void print_long_term(H264Context *h);
3665 static int decode_ref_pic_list_reordering(H264Context *h){
3666 MpegEncContext * const s = &h->s;
3669 print_short_term(h);
3671 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3673 for(list=0; list<2; list++){
3674 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3676 if(get_bits1(&s->gb)){
3677 int pred= h->curr_pic_num;
3679 for(index=0; ; index++){
3680 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3683 Picture *ref = NULL;
3685 if(reordering_of_pic_nums_idc==3)
3688 if(index >= h->ref_count[list]){
3689 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3693 if(reordering_of_pic_nums_idc<3){
3694 if(reordering_of_pic_nums_idc<2){
3695 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3697 if(abs_diff_pic_num >= h->max_pic_num){
3698 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3702 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3703 else pred+= abs_diff_pic_num;
3704 pred &= h->max_pic_num - 1;
3706 for(i= h->short_ref_count-1; i>=0; i--){
3707 ref = h->short_ref[i];
3708 assert(ref->reference == 3);
3709 assert(!ref->long_ref);
3710 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3714 ref->pic_id= ref->frame_num;
3716 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3717 ref = h->long_ref[pic_id];
3718 ref->pic_id= pic_id;
3719 assert(ref->reference == 3);
3720 assert(ref->long_ref);
3725 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3726 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3728 for(i=index; i+1<h->ref_count[list]; i++){
3729 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3732 for(; i > index; i--){
3733 h->ref_list[list][i]= h->ref_list[list][i-1];
3735 h->ref_list[list][index]= *ref;
3738 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3744 if(h->slice_type!=B_TYPE) break;
3746 for(list=0; list<2; list++){
3747 for(index= 0; index < h->ref_count[list]; index++){
3748 if(!h->ref_list[list][index].data[0])
3749 h->ref_list[list][index]= s->current_picture;
3751 if(h->slice_type!=B_TYPE) break;
3754 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3755 direct_dist_scale_factor(h);
3756 direct_ref_list_init(h);
3760 static int pred_weight_table(H264Context *h){
3761 MpegEncContext * const s = &h->s;
3763 int luma_def, chroma_def;
3766 h->use_weight_chroma= 0;
3767 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3768 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3769 luma_def = 1<<h->luma_log2_weight_denom;
3770 chroma_def = 1<<h->chroma_log2_weight_denom;
3772 for(list=0; list<2; list++){
3773 for(i=0; i<h->ref_count[list]; i++){
3774 int luma_weight_flag, chroma_weight_flag;
3776 luma_weight_flag= get_bits1(&s->gb);
3777 if(luma_weight_flag){
3778 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3779 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3780 if( h->luma_weight[list][i] != luma_def
3781 || h->luma_offset[list][i] != 0)
3784 h->luma_weight[list][i]= luma_def;
3785 h->luma_offset[list][i]= 0;
3788 chroma_weight_flag= get_bits1(&s->gb);
3789 if(chroma_weight_flag){
3792 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3793 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3794 if( h->chroma_weight[list][i][j] != chroma_def
3795 || h->chroma_offset[list][i][j] != 0)
3796 h->use_weight_chroma= 1;
3801 h->chroma_weight[list][i][j]= chroma_def;
3802 h->chroma_offset[list][i][j]= 0;
3806 if(h->slice_type != B_TYPE) break;
3808 h->use_weight= h->use_weight || h->use_weight_chroma;
3812 static void implicit_weight_table(H264Context *h){
3813 MpegEncContext * const s = &h->s;
3815 int cur_poc = s->current_picture_ptr->poc;
3817 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3818 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3820 h->use_weight_chroma= 0;
3825 h->use_weight_chroma= 2;
3826 h->luma_log2_weight_denom= 5;
3827 h->chroma_log2_weight_denom= 5;
3830 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3831 int poc0 = h->ref_list[0][ref0].poc;
3832 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3833 int poc1 = h->ref_list[1][ref1].poc;
3834 int td = clip(poc1 - poc0, -128, 127);
3836 int tb = clip(cur_poc - poc0, -128, 127);
3837 int tx = (16384 + (ABS(td) >> 1)) / td;
3838 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3839 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3840 h->implicit_weight[ref0][ref1] = 32;
3842 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3844 h->implicit_weight[ref0][ref1] = 32;
3849 static inline void unreference_pic(H264Context *h, Picture *pic){
3852 if(pic == h->delayed_output_pic)
3855 for(i = 0; h->delayed_pic[i]; i++)
3856 if(pic == h->delayed_pic[i]){
3864 * instantaneous decoder refresh.
3866 static void idr(H264Context *h){
3869 for(i=0; i<16; i++){
3870 if (h->long_ref[i] != NULL) {
3871 unreference_pic(h, h->long_ref[i]);
3872 h->long_ref[i]= NULL;
3875 h->long_ref_count=0;
3877 for(i=0; i<h->short_ref_count; i++){
3878 unreference_pic(h, h->short_ref[i]);
3879 h->short_ref[i]= NULL;
3881 h->short_ref_count=0;
3884 /* forget old pics after a seek */
3885 static void flush_dpb(AVCodecContext *avctx){
3886 H264Context *h= avctx->priv_data;
3889 h->delayed_pic[i]= NULL;
3890 h->delayed_output_pic= NULL;
3892 if(h->s.current_picture_ptr)
3893 h->s.current_picture_ptr->reference= 0;
3898 * @return the removed picture or NULL if an error occurs
3900 static Picture * remove_short(H264Context *h, int frame_num){
3901 MpegEncContext * const s = &h->s;
3904 if(s->avctx->debug&FF_DEBUG_MMCO)
3905 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3907 for(i=0; i<h->short_ref_count; i++){
3908 Picture *pic= h->short_ref[i];
3909 if(s->avctx->debug&FF_DEBUG_MMCO)
3910 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3911 if(pic->frame_num == frame_num){
3912 h->short_ref[i]= NULL;
3913 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
3914 h->short_ref_count--;
3923 * @return the removed picture or NULL if an error occurs
3925 static Picture * remove_long(H264Context *h, int i){
3928 pic= h->long_ref[i];
3929 h->long_ref[i]= NULL;
3930 if(pic) h->long_ref_count--;
3936 * print short term list
3938 static void print_short_term(H264Context *h) {
3940 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3941 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3942 for(i=0; i<h->short_ref_count; i++){
3943 Picture *pic= h->short_ref[i];
3944 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3950 * print long term list
3952 static void print_long_term(H264Context *h) {
3954 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3955 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3956 for(i = 0; i < 16; i++){
3957 Picture *pic= h->long_ref[i];
3959 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3966 * Executes the reference picture marking (memory management control operations).
3968 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3969 MpegEncContext * const s = &h->s;
3971 int current_is_long=0;
3974 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3975 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3977 for(i=0; i<mmco_count; i++){
3978 if(s->avctx->debug&FF_DEBUG_MMCO)
3979 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
3981 switch(mmco[i].opcode){
3982 case MMCO_SHORT2UNUSED:
3983 pic= remove_short(h, mmco[i].short_frame_num);
3985 unreference_pic(h, pic);
3986 else if(s->avctx->debug&FF_DEBUG_MMCO)
3987 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
3989 case MMCO_SHORT2LONG:
3990 pic= remove_long(h, mmco[i].long_index);
3991 if(pic) unreference_pic(h, pic);
3993 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
3994 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3995 h->long_ref_count++;
3997 case MMCO_LONG2UNUSED:
3998 pic= remove_long(h, mmco[i].long_index);
4000 unreference_pic(h, pic);
4001 else if(s->avctx->debug&FF_DEBUG_MMCO)
4002 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4005 pic= remove_long(h, mmco[i].long_index);
4006 if(pic) unreference_pic(h, pic);
4008 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4009 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4010 h->long_ref_count++;
4014 case MMCO_SET_MAX_LONG:
4015 assert(mmco[i].long_index <= 16);
4016 // just remove the long term which index is greater than new max
4017 for(j = mmco[i].long_index; j<16; j++){
4018 pic = remove_long(h, j);
4019 if (pic) unreference_pic(h, pic);
4023 while(h->short_ref_count){
4024 pic= remove_short(h, h->short_ref[0]->frame_num);
4025 unreference_pic(h, pic);
4027 for(j = 0; j < 16; j++) {
4028 pic= remove_long(h, j);
4029 if(pic) unreference_pic(h, pic);
4036 if(!current_is_long){
4037 pic= remove_short(h, s->current_picture_ptr->frame_num);
4039 unreference_pic(h, pic);
4040 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4043 if(h->short_ref_count)
4044 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4046 h->short_ref[0]= s->current_picture_ptr;
4047 h->short_ref[0]->long_ref=0;
4048 h->short_ref_count++;
4051 print_short_term(h);
4056 static int decode_ref_pic_marking(H264Context *h){
4057 MpegEncContext * const s = &h->s;
4060 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4061 s->broken_link= get_bits1(&s->gb) -1;
4062 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4063 if(h->mmco[0].long_index == -1)
4066 h->mmco[0].opcode= MMCO_LONG;
4070 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4071 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4072 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4074 h->mmco[i].opcode= opcode;
4075 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4076 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4077 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4078 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4082 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4083 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4084 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4085 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4090 if(opcode > MMCO_LONG){
4091 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4094 if(opcode == MMCO_END)
4099 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4101 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4102 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4103 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4113 static int init_poc(H264Context *h){
4114 MpegEncContext * const s = &h->s;
4115 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4118 if(h->nal_unit_type == NAL_IDR_SLICE){
4119 h->frame_num_offset= 0;
4121 if(h->frame_num < h->prev_frame_num)
4122 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4124 h->frame_num_offset= h->prev_frame_num_offset;
4127 if(h->sps.poc_type==0){
4128 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4130 if(h->nal_unit_type == NAL_IDR_SLICE){
4135 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4136 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4137 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4138 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4140 h->poc_msb = h->prev_poc_msb;
4141 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4143 field_poc[1] = h->poc_msb + h->poc_lsb;
4144 if(s->picture_structure == PICT_FRAME)
4145 field_poc[1] += h->delta_poc_bottom;
4146 }else if(h->sps.poc_type==1){
4147 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4150 if(h->sps.poc_cycle_length != 0)
4151 abs_frame_num = h->frame_num_offset + h->frame_num;
4155 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4158 expected_delta_per_poc_cycle = 0;
4159 for(i=0; i < h->sps.poc_cycle_length; i++)
4160 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4162 if(abs_frame_num > 0){
4163 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4164 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4166 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4167 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4168 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4172 if(h->nal_ref_idc == 0)
4173 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4175 field_poc[0] = expectedpoc + h->delta_poc[0];
4176 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4178 if(s->picture_structure == PICT_FRAME)
4179 field_poc[1] += h->delta_poc[1];
4182 if(h->nal_unit_type == NAL_IDR_SLICE){
4185 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4186 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4192 if(s->picture_structure != PICT_BOTTOM_FIELD)
4193 s->current_picture_ptr->field_poc[0]= field_poc[0];
4194 if(s->picture_structure != PICT_TOP_FIELD)
4195 s->current_picture_ptr->field_poc[1]= field_poc[1];
4196 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4197 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4203 * decodes a slice header.
4204 * this will allso call MPV_common_init() and frame_start() as needed
4206 static int decode_slice_header(H264Context *h){
4207 MpegEncContext * const s = &h->s;
4208 int first_mb_in_slice, pps_id;
4209 int num_ref_idx_active_override_flag;
4210 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4212 int default_ref_list_done = 0;
4214 s->current_picture.reference= h->nal_ref_idc != 0;
4215 s->dropable= h->nal_ref_idc == 0;
4217 first_mb_in_slice= get_ue_golomb(&s->gb);
4219 slice_type= get_ue_golomb(&s->gb);
4221 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4226 h->slice_type_fixed=1;
4228 h->slice_type_fixed=0;
4230 slice_type= slice_type_map[ slice_type ];
4231 if (slice_type == I_TYPE
4232 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4233 default_ref_list_done = 1;
4235 h->slice_type= slice_type;
4237 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4239 pps_id= get_ue_golomb(&s->gb);
4241 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4244 h->pps= h->pps_buffer[pps_id];
4245 if(h->pps.slice_group_count == 0){
4246 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4250 h->sps= h->sps_buffer[ h->pps.sps_id ];
4251 if(h->sps.log2_max_frame_num == 0){
4252 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4256 if(h->dequant_coeff_pps != pps_id){
4257 h->dequant_coeff_pps = pps_id;
4258 init_dequant_tables(h);
4261 s->mb_width= h->sps.mb_width;
4262 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4264 h->b_stride= s->mb_width*4 + 1;
4265 h->b8_stride= s->mb_width*2 + 1;
4267 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4268 if(h->sps.frame_mbs_only_flag)
4269 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4271 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4273 if (s->context_initialized
4274 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4278 if (!s->context_initialized) {
4279 if (MPV_common_init(s) < 0)
4282 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4283 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4284 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4287 for(i=0; i<16; i++){
4288 #define T(x) (x>>2) | ((x<<2) & 0xF)
4289 h->zigzag_scan[i] = T(zigzag_scan[i]);
4290 h-> field_scan[i] = T( field_scan[i]);
4293 if(h->sps.transform_bypass){ //FIXME same ugly
4294 h->zigzag_scan_q0 = zigzag_scan;
4295 h->field_scan_q0 = field_scan;
4297 h->zigzag_scan_q0 = h->zigzag_scan;
4298 h->field_scan_q0 = h->field_scan;
4303 s->avctx->width = s->width;
4304 s->avctx->height = s->height;
4305 s->avctx->sample_aspect_ratio= h->sps.sar;
4306 if(!s->avctx->sample_aspect_ratio.den)
4307 s->avctx->sample_aspect_ratio.den = 1;
4309 if(h->sps.timing_info_present_flag){
4310 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
4314 if(h->slice_num == 0){
4315 if(frame_start(h) < 0)
4319 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4320 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4322 h->mb_aff_frame = 0;
4323 if(h->sps.frame_mbs_only_flag){
4324 s->picture_structure= PICT_FRAME;
4326 if(get_bits1(&s->gb)) { //field_pic_flag
4327 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4329 s->picture_structure= PICT_FRAME;
4330 first_mb_in_slice <<= h->sps.mb_aff;
4331 h->mb_aff_frame = h->sps.mb_aff;
4335 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4336 s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width;
4337 if(s->mb_y >= s->mb_height){
4341 if(s->picture_structure==PICT_FRAME){
4342 h->curr_pic_num= h->frame_num;
4343 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4345 h->curr_pic_num= 2*h->frame_num;
4346 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4349 if(h->nal_unit_type == NAL_IDR_SLICE){
4350 get_ue_golomb(&s->gb); /* idr_pic_id */
4353 if(h->sps.poc_type==0){
4354 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4356 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4357 h->delta_poc_bottom= get_se_golomb(&s->gb);
4361 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4362 h->delta_poc[0]= get_se_golomb(&s->gb);
4364 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4365 h->delta_poc[1]= get_se_golomb(&s->gb);
4370 if(h->pps.redundant_pic_cnt_present){
4371 h->redundant_pic_count= get_ue_golomb(&s->gb);
4374 //set defaults, might be overriden a few line later
4375 h->ref_count[0]= h->pps.ref_count[0];
4376 h->ref_count[1]= h->pps.ref_count[1];
4378 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4379 if(h->slice_type == B_TYPE){
4380 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4382 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4384 if(num_ref_idx_active_override_flag){
4385 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4386 if(h->slice_type==B_TYPE)
4387 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4389 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4390 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4396 if(!default_ref_list_done){
4397 fill_default_ref_list(h);
4400 if(decode_ref_pic_list_reordering(h) < 0)
4403 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4404 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4405 pred_weight_table(h);
4406 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4407 implicit_weight_table(h);
4411 if(s->current_picture.reference)
4412 decode_ref_pic_marking(h);
4414 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4415 h->cabac_init_idc = get_ue_golomb(&s->gb);
4417 h->last_qscale_diff = 0;
4418 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4419 if(s->qscale<0 || s->qscale>51){
4420 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4423 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4424 //FIXME qscale / qp ... stuff
4425 if(h->slice_type == SP_TYPE){
4426 get_bits1(&s->gb); /* sp_for_switch_flag */
4428 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4429 get_se_golomb(&s->gb); /* slice_qs_delta */
4432 h->deblocking_filter = 1;
4433 h->slice_alpha_c0_offset = 0;
4434 h->slice_beta_offset = 0;
4435 if( h->pps.deblocking_filter_parameters_present ) {
4436 h->deblocking_filter= get_ue_golomb(&s->gb);
4437 if(h->deblocking_filter < 2)
4438 h->deblocking_filter^= 1; // 1<->0
4440 if( h->deblocking_filter ) {
4441 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4442 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4445 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4446 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4447 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4448 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4449 h->deblocking_filter= 0;
4452 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4453 slice_group_change_cycle= get_bits(&s->gb, ?);
4458 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4459 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4461 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4463 av_get_pict_type_char(h->slice_type),
4464 pps_id, h->frame_num,
4465 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4466 h->ref_count[0], h->ref_count[1],
4468 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4470 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4480 static inline int get_level_prefix(GetBitContext *gb){
4484 OPEN_READER(re, gb);
4485 UPDATE_CACHE(re, gb);
4486 buf=GET_CACHE(re, gb);
4488 log= 32 - av_log2(buf);
4490 print_bin(buf>>(32-log), log);
4491 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4494 LAST_SKIP_BITS(re, gb, log);
4495 CLOSE_READER(re, gb);
4500 static inline int get_dct8x8_allowed(H264Context *h){
4503 if(!IS_SUB_8X8(h->sub_mb_type[i])
4504 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4511 * decodes a residual block.
4512 * @param n block index
4513 * @param scantable scantable
4514 * @param max_coeff number of coefficients in the block
4515 * @return <0 if an error occured
4517 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4518 MpegEncContext * const s = &h->s;
4519 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4521 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4523 //FIXME put trailing_onex into the context
4525 if(n == CHROMA_DC_BLOCK_INDEX){
4526 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4527 total_coeff= coeff_token>>2;
4529 if(n == LUMA_DC_BLOCK_INDEX){
4530 total_coeff= pred_non_zero_count(h, 0);
4531 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4532 total_coeff= coeff_token>>2;
4534 total_coeff= pred_non_zero_count(h, n);
4535 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4536 total_coeff= coeff_token>>2;
4537 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4541 //FIXME set last_non_zero?
4546 trailing_ones= coeff_token&3;
4547 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4548 assert(total_coeff<=16);
4550 for(i=0; i<trailing_ones; i++){
4551 level[i]= 1 - 2*get_bits1(gb);
4555 int level_code, mask;
4556 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4557 int prefix= get_level_prefix(gb);
4559 //first coefficient has suffix_length equal to 0 or 1
4560 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4562 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4564 level_code= (prefix<<suffix_length); //part
4565 }else if(prefix==14){
4567 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4569 level_code= prefix + get_bits(gb, 4); //part
4570 }else if(prefix==15){
4571 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4572 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4574 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4578 if(trailing_ones < 3) level_code += 2;
4583 mask= -(level_code&1);
4584 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4587 //remaining coefficients have suffix_length > 0
4588 for(;i<total_coeff;i++) {
4589 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4590 prefix = get_level_prefix(gb);
4592 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4593 }else if(prefix==15){
4594 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4596 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4599 mask= -(level_code&1);
4600 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4601 if(level_code > suffix_limit[suffix_length])
4606 if(total_coeff == max_coeff)
4609 if(n == CHROMA_DC_BLOCK_INDEX)
4610 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4612 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4615 coeff_num = zeros_left + total_coeff - 1;
4616 j = scantable[coeff_num];
4618 block[j] = level[0];
4619 for(i=1;i<total_coeff;i++) {
4622 else if(zeros_left < 7){
4623 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4625 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4627 zeros_left -= run_before;
4628 coeff_num -= 1 + run_before;
4629 j= scantable[ coeff_num ];
4634 block[j] = (level[0] * qmul[j] + 32)>>6;
4635 for(i=1;i<total_coeff;i++) {
4638 else if(zeros_left < 7){
4639 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4641 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4643 zeros_left -= run_before;
4644 coeff_num -= 1 + run_before;
4645 j= scantable[ coeff_num ];
4647 block[j]= (level[i] * qmul[j] + 32)>>6;
4652 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4660 * decodes a P_SKIP or B_SKIP macroblock
4662 static void decode_mb_skip(H264Context *h){
4663 MpegEncContext * const s = &h->s;
4664 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4667 memset(h->non_zero_count[mb_xy], 0, 16);
4668 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4670 if(h->mb_aff_frame && s->mb_skip_run==0 && (s->mb_y&1)==0){
4671 h->mb_field_decoding_flag= get_bits1(&s->gb);
4673 if(h->mb_field_decoding_flag)
4674 mb_type|= MB_TYPE_INTERLACED;
4676 if( h->slice_type == B_TYPE )
4678 // just for fill_caches. pred_direct_motion will set the real mb_type
4679 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4681 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4682 pred_direct_motion(h, &mb_type);
4684 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4685 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
4691 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4693 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4694 pred_pskip_motion(h, &mx, &my);
4695 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4696 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4698 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4701 write_back_motion(h, mb_type);
4702 s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP;
4703 s->current_picture.qscale_table[mb_xy]= s->qscale;
4704 h->slice_table[ mb_xy ]= h->slice_num;
4705 h->prev_mb_skipped= 1;
4709 * decodes a macroblock
4710 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4712 static int decode_mb_cavlc(H264Context *h){
4713 MpegEncContext * const s = &h->s;
4714 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4715 int mb_type, partition_count, cbp;
4716 int dct8x8_allowed= h->pps.transform_8x8_mode;
4718 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4720 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4721 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4723 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4724 if(s->mb_skip_run==-1)
4725 s->mb_skip_run= get_ue_golomb(&s->gb);
4727 if (s->mb_skip_run--) {
4732 if(h->mb_aff_frame){
4733 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
4734 h->mb_field_decoding_flag = get_bits1(&s->gb);
4736 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4738 h->prev_mb_skipped= 0;
4740 mb_type= get_ue_golomb(&s->gb);
4741 if(h->slice_type == B_TYPE){
4743 partition_count= b_mb_type_info[mb_type].partition_count;
4744 mb_type= b_mb_type_info[mb_type].type;
4747 goto decode_intra_mb;
4749 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4751 partition_count= p_mb_type_info[mb_type].partition_count;
4752 mb_type= p_mb_type_info[mb_type].type;
4755 goto decode_intra_mb;
4758 assert(h->slice_type == I_TYPE);
4761 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4765 cbp= i_mb_type_info[mb_type].cbp;
4766 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4767 mb_type= i_mb_type_info[mb_type].type;
4770 if(h->mb_field_decoding_flag)
4771 mb_type |= MB_TYPE_INTERLACED;
4773 h->slice_table[ mb_xy ]= h->slice_num;
4775 if(IS_INTRA_PCM(mb_type)){
4778 // we assume these blocks are very rare so we dont optimize it
4779 align_get_bits(&s->gb);
4781 // The pixels are stored in the same order as levels in h->mb array.
4782 for(y=0; y<16; y++){
4783 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4784 for(x=0; x<16; x++){
4785 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4786 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4790 const int index= 256 + 4*(y&3) + 32*(y>>2);
4792 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4793 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4797 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4799 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4800 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4804 // In deblocking, the quantizer is 0
4805 s->current_picture.qscale_table[mb_xy]= 0;
4806 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
4807 // All coeffs are present
4808 memset(h->non_zero_count[mb_xy], 16, 16);
4810 s->current_picture.mb_type[mb_xy]= mb_type;
4814 fill_caches(h, mb_type, 0);
4817 if(IS_INTRA(mb_type)){
4818 // init_top_left_availability(h);
4819 if(IS_INTRA4x4(mb_type)){
4822 if(dct8x8_allowed && get_bits1(&s->gb)){
4823 mb_type |= MB_TYPE_8x8DCT;
4827 // fill_intra4x4_pred_table(h);
4828 for(i=0; i<16; i+=di){
4829 const int mode_coded= !get_bits1(&s->gb);
4830 const int predicted_mode= pred_intra_mode(h, i);
4834 const int rem_mode= get_bits(&s->gb, 3);
4835 if(rem_mode<predicted_mode)
4840 mode= predicted_mode;
4844 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4846 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4848 write_back_intra_pred_mode(h);
4849 if( check_intra4x4_pred_mode(h) < 0)
4852 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4853 if(h->intra16x16_pred_mode < 0)
4856 h->chroma_pred_mode= get_ue_golomb(&s->gb);
4858 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
4859 if(h->chroma_pred_mode < 0)
4861 }else if(partition_count==4){
4862 int i, j, sub_partition_count[4], list, ref[2][4];
4864 if(h->slice_type == B_TYPE){
4866 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4867 if(h->sub_mb_type[i] >=13){
4868 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4871 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4872 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4874 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4875 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3]))
4876 pred_direct_motion(h, &mb_type);
4878 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4880 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4881 if(h->sub_mb_type[i] >=4){
4882 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4885 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4886 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4890 for(list=0; list<2; list++){
4891 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4892 if(ref_count == 0) continue;
4893 if (h->mb_aff_frame && h->mb_field_decoding_flag) {
4897 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4898 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4899 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4908 dct8x8_allowed = get_dct8x8_allowed(h);
4910 for(list=0; list<2; list++){
4911 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4912 if(ref_count == 0) continue;
4915 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4916 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4917 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4919 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4920 const int sub_mb_type= h->sub_mb_type[i];
4921 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4922 for(j=0; j<sub_partition_count[i]; j++){
4924 const int index= 4*i + block_width*j;
4925 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4926 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4927 mx += get_se_golomb(&s->gb);
4928 my += get_se_golomb(&s->gb);
4929 tprintf("final mv:%d %d\n", mx, my);
4931 if(IS_SUB_8X8(sub_mb_type)){
4932 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
4933 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4934 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
4935 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4936 }else if(IS_SUB_8X4(sub_mb_type)){
4937 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
4938 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
4939 }else if(IS_SUB_4X8(sub_mb_type)){
4940 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
4941 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
4943 assert(IS_SUB_4X4(sub_mb_type));
4944 mv_cache[ 0 ][0]= mx;
4945 mv_cache[ 0 ][1]= my;
4949 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4955 }else if(IS_DIRECT(mb_type)){
4956 pred_direct_motion(h, &mb_type);
4957 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4959 int list, mx, my, i;
4960 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4961 if(IS_16X16(mb_type)){
4962 for(list=0; list<2; list++){
4963 if(h->ref_count[list]>0){
4964 if(IS_DIR(mb_type, 0, list)){
4965 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4966 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4968 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
4971 for(list=0; list<2; list++){
4972 if(IS_DIR(mb_type, 0, list)){
4973 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4974 mx += get_se_golomb(&s->gb);
4975 my += get_se_golomb(&s->gb);
4976 tprintf("final mv:%d %d\n", mx, my);
4978 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
4980 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
4983 else if(IS_16X8(mb_type)){
4984 for(list=0; list<2; list++){
4985 if(h->ref_count[list]>0){
4987 if(IS_DIR(mb_type, i, list)){
4988 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4989 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4991 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
4995 for(list=0; list<2; list++){
4997 if(IS_DIR(mb_type, i, list)){
4998 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4999 mx += get_se_golomb(&s->gb);
5000 my += get_se_golomb(&s->gb);
5001 tprintf("final mv:%d %d\n", mx, my);
5003 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5005 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5009 assert(IS_8X16(mb_type));
5010 for(list=0; list<2; list++){
5011 if(h->ref_count[list]>0){
5013 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5014 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5015 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5017 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5021 for(list=0; list<2; list++){
5023 if(IS_DIR(mb_type, i, list)){
5024 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5025 mx += get_se_golomb(&s->gb);
5026 my += get_se_golomb(&s->gb);
5027 tprintf("final mv:%d %d\n", mx, my);
5029 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5031 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5037 if(IS_INTER(mb_type))
5038 write_back_motion(h, mb_type);
5040 if(!IS_INTRA16x16(mb_type)){
5041 cbp= get_ue_golomb(&s->gb);
5043 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5047 if(IS_INTRA4x4(mb_type))
5048 cbp= golomb_to_intra4x4_cbp[cbp];
5050 cbp= golomb_to_inter_cbp[cbp];
5053 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5054 if(get_bits1(&s->gb))
5055 mb_type |= MB_TYPE_8x8DCT;
5057 s->current_picture.mb_type[mb_xy]= mb_type;
5059 if(cbp || IS_INTRA16x16(mb_type)){
5060 int i8x8, i4x4, chroma_idx;
5061 int chroma_qp, dquant;
5062 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5063 const uint8_t *scan, *dc_scan;
5065 // fill_non_zero_count_cache(h);
5067 if(IS_INTERLACED(mb_type)){
5068 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5069 dc_scan= luma_dc_field_scan;
5071 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5072 dc_scan= luma_dc_zigzag_scan;
5075 dquant= get_se_golomb(&s->gb);
5077 if( dquant > 25 || dquant < -26 ){
5078 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5082 s->qscale += dquant;
5083 if(((unsigned)s->qscale) > 51){
5084 if(s->qscale<0) s->qscale+= 52;
5085 else s->qscale-= 52;
5088 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5089 if(IS_INTRA16x16(mb_type)){
5090 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5091 return -1; //FIXME continue if partitioned and other return -1 too
5094 assert((cbp&15) == 0 || (cbp&15) == 15);
5097 for(i8x8=0; i8x8<4; i8x8++){
5098 for(i4x4=0; i4x4<4; i4x4++){
5099 const int index= i4x4 + 4*i8x8;
5100 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5106 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5109 for(i8x8=0; i8x8<4; i8x8++){
5110 if(cbp & (1<<i8x8)){
5111 if(IS_8x8DCT(mb_type)){
5112 DCTELEM *buf = &h->mb[64*i8x8];
5114 for(i4x4=0; i4x4<4; i4x4++){
5115 if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4,
5116 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5119 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5120 nnz[0] |= nnz[1] | nnz[8] | nnz[9];
5122 for(i4x4=0; i4x4<4; i4x4++){
5123 const int index= i4x4 + 4*i8x8;
5125 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5131 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5132 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5138 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5139 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5145 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5146 for(i4x4=0; i4x4<4; i4x4++){
5147 const int index= 16 + 4*chroma_idx + i4x4;
5148 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5154 uint8_t * const nnz= &h->non_zero_count_cache[0];
5155 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5156 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5159 uint8_t * const nnz= &h->non_zero_count_cache[0];
5160 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5161 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5162 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5164 s->current_picture.qscale_table[mb_xy]= s->qscale;
5165 write_back_non_zero_count(h);
5170 static int decode_cabac_field_decoding_flag(H264Context *h) {
5171 MpegEncContext * const s = &h->s;
5172 const int mb_x = s->mb_x;
5173 const int mb_y = s->mb_y & ~1;
5174 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5175 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5177 unsigned int ctx = 0;
5179 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5182 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5186 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5189 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5190 uint8_t *state= &h->cabac_state[ctx_base];
5194 MpegEncContext * const s = &h->s;
5195 const int mba_xy = h->left_mb_xy[0];
5196 const int mbb_xy = h->top_mb_xy;
5198 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5200 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5202 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5203 return 0; /* I4x4 */
5206 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5207 return 0; /* I4x4 */
5210 if( get_cabac_terminate( &h->cabac ) )
5211 return 25; /* PCM */
5213 mb_type = 1; /* I16x16 */
5214 if( get_cabac( &h->cabac, &state[1] ) )
5215 mb_type += 12; /* cbp_luma != 0 */
5217 if( get_cabac( &h->cabac, &state[2] ) ) {
5218 if( get_cabac( &h->cabac, &state[2+intra_slice] ) )
5219 mb_type += 4 * 2; /* cbp_chroma == 2 */
5221 mb_type += 4 * 1; /* cbp_chroma == 1 */
5223 if( get_cabac( &h->cabac, &state[3+intra_slice] ) )
5225 if( get_cabac( &h->cabac, &state[3+2*intra_slice] ) )
5230 static int decode_cabac_mb_type( H264Context *h ) {
5231 MpegEncContext * const s = &h->s;
5233 if( h->slice_type == I_TYPE ) {
5234 return decode_cabac_intra_mb_type(h, 3, 1);
5235 } else if( h->slice_type == P_TYPE ) {
5236 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5238 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5239 if( get_cabac( &h->cabac, &h->cabac_state[16] ) == 0 )
5240 return 0; /* P_L0_D16x16; */
5242 return 3; /* P_8x8; */
5244 if( get_cabac( &h->cabac, &h->cabac_state[17] ) == 0 )
5245 return 2; /* P_L0_D8x16; */
5247 return 1; /* P_L0_D16x8; */
5250 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5252 } else if( h->slice_type == B_TYPE ) {
5253 const int mba_xy = h->left_mb_xy[0];
5254 const int mbb_xy = h->top_mb_xy;
5258 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] )
5259 && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5261 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )
5262 && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5265 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5266 return 0; /* B_Direct_16x16 */
5268 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5269 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5272 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5273 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5274 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5275 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5277 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5278 else if( bits == 13 ) {
5279 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5280 } else if( bits == 14 )
5281 return 11; /* B_L1_L0_8x16 */
5282 else if( bits == 15 )
5283 return 22; /* B_8x8 */
5285 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5286 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5288 /* TODO SI/SP frames? */
5293 static int decode_cabac_mb_skip( H264Context *h) {
5294 MpegEncContext * const s = &h->s;
5295 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5296 const int mba_xy = mb_xy - 1;
5297 const int mbb_xy = mb_xy - s->mb_stride;
5300 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5302 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5305 if( h->slice_type == P_TYPE || h->slice_type == SP_TYPE)
5306 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5308 return get_cabac( &h->cabac, &h->cabac_state[24+ctx] );
5311 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5314 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5317 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5318 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5319 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5321 if( mode >= pred_mode )
5327 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5328 const int mba_xy = h->left_mb_xy[0];
5329 const int mbb_xy = h->top_mb_xy;
5333 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5334 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5337 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5340 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5343 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5345 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5351 static const uint8_t block_idx_x[16] = {
5352 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5354 static const uint8_t block_idx_y[16] = {
5355 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5357 static const uint8_t block_idx_xy[4][4] = {
5364 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5365 MpegEncContext * const s = &h->s;
5370 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5376 x = block_idx_x[4*i8x8];
5377 y = block_idx_y[4*i8x8];
5381 else if( s->mb_x > 0 && (h->slice_table[h->left_mb_xy[0]] == h->slice_num)) {
5382 cbp_a = h->left_cbp;
5383 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5388 else if( s->mb_y > 0 && (h->slice_table[h->top_mb_xy] == h->slice_num)) {
5390 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5393 /* No need to test for skip as we put 0 for skip block */
5394 /* No need to test for IPCM as we put 1 for IPCM block */
5396 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5397 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5402 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5403 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5407 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5413 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5417 cbp_a = (h->left_cbp>>4)&0x03;
5418 cbp_b = (h-> top_cbp>>4)&0x03;
5421 if( cbp_a > 0 ) ctx++;
5422 if( cbp_b > 0 ) ctx += 2;
5423 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5427 if( cbp_a == 2 ) ctx++;
5428 if( cbp_b == 2 ) ctx += 2;
5429 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5431 static int decode_cabac_mb_dqp( H264Context *h) {
5432 MpegEncContext * const s = &h->s;
5438 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5440 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5442 if( h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) )
5445 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5451 if(val > 52) //prevent infinite loop
5458 return -(val + 1)/2;
5460 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5461 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5463 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5465 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5469 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5471 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5472 return 0; /* B_Direct_8x8 */
5473 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5474 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5476 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5477 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5478 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5481 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5482 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5486 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5487 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5490 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5491 int refa = h->ref_cache[list][scan8[n] - 1];
5492 int refb = h->ref_cache[list][scan8[n] - 8];
5496 if( h->slice_type == B_TYPE) {
5497 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5499 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5508 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5518 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5519 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5520 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5521 int ctxbase = (l == 0) ? 40 : 47;
5526 else if( amvd > 32 )
5531 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5536 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5544 while( get_cabac_bypass( &h->cabac ) ) {
5549 if( get_cabac_bypass( &h->cabac ) )
5553 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
5557 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5562 nza = h->left_cbp&0x100;
5563 nzb = h-> top_cbp&0x100;
5564 } else if( cat == 1 || cat == 2 ) {
5565 nza = h->non_zero_count_cache[scan8[idx] - 1];
5566 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5567 } else if( cat == 3 ) {
5568 nza = (h->left_cbp>>(6+idx))&0x01;
5569 nzb = (h-> top_cbp>>(6+idx))&0x01;
5572 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5573 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5582 return ctx + 4 * cat;
5585 static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5586 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5587 static const int significant_coeff_flag_field_offset[2] = { 105, 277 };
5588 static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 };
5589 static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 };
5590 static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 };
5591 static const int coeff_abs_level_m1_offset[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 };
5592 static const int significant_coeff_flag_offset_8x8[63] = {
5593 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5594 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5595 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5596 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
5598 static const int last_coeff_flag_offset_8x8[63] = {
5599 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5600 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5601 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5602 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5608 int coeff_count = 0;
5611 int abslevelgt1 = 0;
5613 uint8_t *significant_coeff_ctx_base;
5614 uint8_t *last_coeff_ctx_base;
5615 uint8_t *abs_level_m1_ctx_base;
5617 /* cat: 0-> DC 16x16 n = 0
5618 * 1-> AC 16x16 n = luma4x4idx
5619 * 2-> Luma4x4 n = luma4x4idx
5620 * 3-> DC Chroma n = iCbCr
5621 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5622 * 5-> Luma8x8 n = 4 * luma8x8idx
5625 /* read coded block flag */
5627 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5628 if( cat == 1 || cat == 2 )
5629 h->non_zero_count_cache[scan8[n]] = 0;
5631 h->non_zero_count_cache[scan8[16+n]] = 0;
5637 significant_coeff_ctx_base = h->cabac_state
5638 + significant_coeff_flag_offset[cat]
5639 + significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5640 last_coeff_ctx_base = h->cabac_state
5641 + last_significant_coeff_flag_offset[cat]
5642 + last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5643 abs_level_m1_ctx_base = h->cabac_state
5644 + coeff_abs_level_m1_offset[cat];
5647 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5648 for(last= 0; last < coefs; last++) { \
5649 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5650 if( get_cabac( &h->cabac, sig_ctx )) { \
5651 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5652 index[coeff_count++] = last; \
5653 if( get_cabac( &h->cabac, last_ctx ) ) { \
5659 DECODE_SIGNIFICANCE( 63, significant_coeff_flag_offset_8x8[last],
5660 last_coeff_flag_offset_8x8[last] );
5662 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5664 if( last == max_coeff -1 ) {
5665 index[coeff_count++] = last;
5667 assert(coeff_count > 0);
5670 h->cbp_table[mb_xy] |= 0x100;
5671 else if( cat == 1 || cat == 2 )
5672 h->non_zero_count_cache[scan8[n]] = coeff_count;
5674 h->cbp_table[mb_xy] |= 0x40 << n;
5676 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5679 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, 1, 1);
5682 for( i = coeff_count - 1; i >= 0; i-- ) {
5683 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5684 int j= scantable[index[i]];
5686 if( get_cabac( &h->cabac, ctx ) == 0 ) {
5688 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
5691 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6;
5692 else block[j] = ( qmul[j] + 32) >> 6;
5698 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5699 while( coeff_abs < 15 && get_cabac( &h->cabac, ctx ) ) {
5703 if( coeff_abs >= 15 ) {
5705 while( get_cabac_bypass( &h->cabac ) ) {
5706 coeff_abs += 1 << j;
5711 if( get_cabac_bypass( &h->cabac ) )
5712 coeff_abs += 1 << j ;
5717 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
5718 else block[j] = coeff_abs;
5720 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5721 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5730 void inline compute_mb_neighboors(H264Context *h)
5732 MpegEncContext * const s = &h->s;
5733 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5734 h->top_mb_xy = mb_xy - s->mb_stride;
5735 h->left_mb_xy[0] = mb_xy - 1;
5736 if(h->mb_aff_frame){
5737 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5738 const int top_pair_xy = pair_xy - s->mb_stride;
5739 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5740 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5741 const int curr_mb_frame_flag = !h->mb_field_decoding_flag;
5742 const int bottom = (s->mb_y & 1);
5744 ? !curr_mb_frame_flag // bottom macroblock
5745 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5747 h->top_mb_xy -= s->mb_stride;
5749 if (left_mb_frame_flag != curr_mb_frame_flag) {
5750 h->left_mb_xy[0] = pair_xy - 1;
5757 * decodes a macroblock
5758 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5760 static int decode_mb_cabac(H264Context *h) {
5761 MpegEncContext * const s = &h->s;
5762 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5763 int mb_type, partition_count, cbp = 0;
5764 int dct8x8_allowed= h->pps.transform_8x8_mode;
5766 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5768 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5769 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5770 /* read skip flags */
5771 if( decode_cabac_mb_skip( h ) ) {
5774 h->cbp_table[mb_xy] = 0;
5775 h->chroma_pred_mode_table[mb_xy] = 0;
5776 h->last_qscale_diff = 0;
5782 if(h->mb_aff_frame){
5783 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
5784 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5786 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5788 h->prev_mb_skipped = 0;
5790 compute_mb_neighboors(h);
5791 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5792 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5796 if( h->slice_type == B_TYPE ) {
5798 partition_count= b_mb_type_info[mb_type].partition_count;
5799 mb_type= b_mb_type_info[mb_type].type;
5802 goto decode_intra_mb;
5804 } else if( h->slice_type == P_TYPE ) {
5806 partition_count= p_mb_type_info[mb_type].partition_count;
5807 mb_type= p_mb_type_info[mb_type].type;
5810 goto decode_intra_mb;
5813 assert(h->slice_type == I_TYPE);
5815 partition_count = 0;
5816 cbp= i_mb_type_info[mb_type].cbp;
5817 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5818 mb_type= i_mb_type_info[mb_type].type;
5820 if(h->mb_field_decoding_flag)
5821 mb_type |= MB_TYPE_INTERLACED;
5823 h->slice_table[ mb_xy ]= h->slice_num;
5825 if(IS_INTRA_PCM(mb_type)) {
5829 // We assume these blocks are very rare so we dont optimize it.
5830 // FIXME The two following lines get the bitstream position in the cabac
5831 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5832 ptr= h->cabac.bytestream;
5833 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
5835 // The pixels are stored in the same order as levels in h->mb array.
5836 for(y=0; y<16; y++){
5837 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5838 for(x=0; x<16; x++){
5839 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
5840 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5844 const int index= 256 + 4*(y&3) + 32*(y>>2);
5846 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5847 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5851 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5853 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5854 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5858 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5860 // All blocks are present
5861 h->cbp_table[mb_xy] = 0x1ef;
5862 h->chroma_pred_mode_table[mb_xy] = 0;
5863 // In deblocking, the quantizer is 0
5864 s->current_picture.qscale_table[mb_xy]= 0;
5865 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5866 // All coeffs are present
5867 memset(h->non_zero_count[mb_xy], 16, 16);
5868 s->current_picture.mb_type[mb_xy]= mb_type;
5872 fill_caches(h, mb_type, 0);
5874 if( IS_INTRA( mb_type ) ) {
5876 if( IS_INTRA4x4( mb_type ) ) {
5877 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5878 mb_type |= MB_TYPE_8x8DCT;
5879 for( i = 0; i < 16; i+=4 ) {
5880 int pred = pred_intra_mode( h, i );
5881 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5882 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5885 for( i = 0; i < 16; i++ ) {
5886 int pred = pred_intra_mode( h, i );
5887 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5889 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5892 write_back_intra_pred_mode(h);
5893 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5895 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5896 if( h->intra16x16_pred_mode < 0 ) return -1;
5898 h->chroma_pred_mode_table[mb_xy] =
5899 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5901 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
5902 if( h->chroma_pred_mode < 0 ) return -1;
5903 } else if( partition_count == 4 ) {
5904 int i, j, sub_partition_count[4], list, ref[2][4];
5906 if( h->slice_type == B_TYPE ) {
5907 for( i = 0; i < 4; i++ ) {
5908 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5909 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5910 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5912 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5913 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5914 pred_direct_motion(h, &mb_type);
5915 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5916 for( i = 0; i < 4; i++ )
5917 if( IS_DIRECT(h->sub_mb_type[i]) )
5918 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5922 for( i = 0; i < 4; i++ ) {
5923 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5924 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5925 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5929 for( list = 0; list < 2; list++ ) {
5930 if( h->ref_count[list] > 0 ) {
5931 for( i = 0; i < 4; i++ ) {
5932 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5933 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5934 if( h->ref_count[list] > 1 )
5935 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5941 h->ref_cache[list][ scan8[4*i]+1 ]=
5942 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5948 dct8x8_allowed = get_dct8x8_allowed(h);
5950 for(list=0; list<2; list++){
5952 if(IS_DIRECT(h->sub_mb_type[i])){
5953 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5956 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5958 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5959 const int sub_mb_type= h->sub_mb_type[i];
5960 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5961 for(j=0; j<sub_partition_count[i]; j++){
5964 const int index= 4*i + block_width*j;
5965 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5966 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5967 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5969 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5970 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5971 tprintf("final mv:%d %d\n", mx, my);
5973 if(IS_SUB_8X8(sub_mb_type)){
5974 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5975 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5976 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5977 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5979 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
5980 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5981 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
5982 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5983 }else if(IS_SUB_8X4(sub_mb_type)){
5984 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5985 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5987 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
5988 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
5989 }else if(IS_SUB_4X8(sub_mb_type)){
5990 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5991 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5993 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
5994 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
5996 assert(IS_SUB_4X4(sub_mb_type));
5997 mv_cache[ 0 ][0]= mx;
5998 mv_cache[ 0 ][1]= my;
6000 mvd_cache[ 0 ][0]= mx - mpx;
6001 mvd_cache[ 0 ][1]= my - mpy;
6005 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6006 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6007 p[0] = p[1] = p[8] = p[9] = 0;
6008 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6012 } else if( IS_DIRECT(mb_type) ) {
6013 pred_direct_motion(h, &mb_type);
6014 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6015 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6016 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6018 int list, mx, my, i, mpx, mpy;
6019 if(IS_16X16(mb_type)){
6020 for(list=0; list<2; list++){
6021 if(IS_DIR(mb_type, 0, list)){
6022 if(h->ref_count[list] > 0 ){
6023 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6024 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6027 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6029 for(list=0; list<2; list++){
6030 if(IS_DIR(mb_type, 0, list)){
6031 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6033 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6034 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6035 tprintf("final mv:%d %d\n", mx, my);
6037 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6038 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6040 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6043 else if(IS_16X8(mb_type)){
6044 for(list=0; list<2; list++){
6045 if(h->ref_count[list]>0){
6047 if(IS_DIR(mb_type, i, list)){
6048 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6049 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6051 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6055 for(list=0; list<2; list++){
6057 if(IS_DIR(mb_type, i, list)){
6058 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6059 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6060 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6061 tprintf("final mv:%d %d\n", mx, my);
6063 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6064 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6066 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6067 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6072 assert(IS_8X16(mb_type));
6073 for(list=0; list<2; list++){
6074 if(h->ref_count[list]>0){
6076 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6077 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6078 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6080 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6084 for(list=0; list<2; list++){
6086 if(IS_DIR(mb_type, i, list)){
6087 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6088 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6089 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6091 tprintf("final mv:%d %d\n", mx, my);
6092 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6093 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6095 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6096 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6103 if( IS_INTER( mb_type ) ) {
6104 h->chroma_pred_mode_table[mb_xy] = 0;
6105 write_back_motion( h, mb_type );
6108 if( !IS_INTRA16x16( mb_type ) ) {
6109 cbp = decode_cabac_mb_cbp_luma( h );
6110 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6113 h->cbp_table[mb_xy] = cbp;
6115 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6116 if( decode_cabac_mb_transform_size( h ) )
6117 mb_type |= MB_TYPE_8x8DCT;
6119 s->current_picture.mb_type[mb_xy]= mb_type;
6121 if( cbp || IS_INTRA16x16( mb_type ) ) {
6122 const uint8_t *scan, *dc_scan;
6125 if(IS_INTERLACED(mb_type)){
6126 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6127 dc_scan= luma_dc_field_scan;
6129 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6130 dc_scan= luma_dc_zigzag_scan;
6133 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6134 if( dqp == INT_MIN ){
6135 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6139 if(((unsigned)s->qscale) > 51){
6140 if(s->qscale<0) s->qscale+= 52;
6141 else s->qscale-= 52;
6143 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6145 if( IS_INTRA16x16( mb_type ) ) {
6147 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6148 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6151 for( i = 0; i < 16; i++ ) {
6152 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6153 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6157 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6161 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6162 if( cbp & (1<<i8x8) ) {
6163 if( IS_8x8DCT(mb_type) ) {
6164 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6165 zigzag_scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6168 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6169 const int index = 4*i8x8 + i4x4;
6170 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6171 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6175 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6176 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6183 for( c = 0; c < 2; c++ ) {
6184 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6185 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6192 for( c = 0; c < 2; c++ ) {
6193 for( i = 0; i < 4; i++ ) {
6194 const int index = 16 + 4 * c + i;
6195 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6196 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6201 uint8_t * const nnz= &h->non_zero_count_cache[0];
6202 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6203 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6206 uint8_t * const nnz= &h->non_zero_count_cache[0];
6207 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6208 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6209 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6212 s->current_picture.qscale_table[mb_xy]= s->qscale;
6213 write_back_non_zero_count(h);
6219 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6221 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6222 const int alpha = alpha_table[index_a];
6223 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6228 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6229 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6231 /* 16px edge length, because bS=4 is triggered by being at
6232 * the edge of an intra MB, so all 4 bS are the same */
6233 for( d = 0; d < 16; d++ ) {
6234 const int p0 = pix[-1];
6235 const int p1 = pix[-2];
6236 const int p2 = pix[-3];
6238 const int q0 = pix[0];
6239 const int q1 = pix[1];
6240 const int q2 = pix[2];
6242 if( ABS( p0 - q0 ) < alpha &&
6243 ABS( p1 - p0 ) < beta &&
6244 ABS( q1 - q0 ) < beta ) {
6246 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6247 if( ABS( p2 - p0 ) < beta)
6249 const int p3 = pix[-4];
6251 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6252 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6253 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6256 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6258 if( ABS( q2 - q0 ) < beta)
6260 const int q3 = pix[3];
6262 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6263 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6264 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6267 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6271 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6272 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6274 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6280 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6282 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6283 const int alpha = alpha_table[index_a];
6284 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6289 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6290 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6292 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6296 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6298 for( i = 0; i < 16; i++, pix += stride) {
6304 int bS_index = (i >> 1);
6305 if (h->mb_field_decoding_flag) {
6307 bS_index |= (i & 1);
6310 if( bS[bS_index] == 0 ) {
6314 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6315 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6316 alpha = alpha_table[index_a];
6317 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6320 if( bS[bS_index] < 4 ) {
6321 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6322 /* 4px edge length */
6323 const int p0 = pix[-1];
6324 const int p1 = pix[-2];
6325 const int p2 = pix[-3];
6326 const int q0 = pix[0];
6327 const int q1 = pix[1];
6328 const int q2 = pix[2];
6330 if( ABS( p0 - q0 ) < alpha &&
6331 ABS( p1 - p0 ) < beta &&
6332 ABS( q1 - q0 ) < beta ) {
6336 if( ABS( p2 - p0 ) < beta ) {
6337 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6340 if( ABS( q2 - q0 ) < beta ) {
6341 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6345 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6346 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6347 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6348 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6351 /* 4px edge length */
6352 const int p0 = pix[-1];
6353 const int p1 = pix[-2];
6354 const int p2 = pix[-3];
6356 const int q0 = pix[0];
6357 const int q1 = pix[1];
6358 const int q2 = pix[2];
6360 if( ABS( p0 - q0 ) < alpha &&
6361 ABS( p1 - p0 ) < beta &&
6362 ABS( q1 - q0 ) < beta ) {
6364 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6365 if( ABS( p2 - p0 ) < beta)
6367 const int p3 = pix[-4];
6369 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6370 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6371 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6374 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6376 if( ABS( q2 - q0 ) < beta)
6378 const int q3 = pix[3];
6380 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6381 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6382 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6385 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6389 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6390 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6392 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6397 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp[2] ) {
6399 for( i = 0; i < 8; i++, pix += stride) {
6407 if( bS[bS_index] == 0 ) {
6411 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6412 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6413 alpha = alpha_table[index_a];
6414 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6415 if( bS[bS_index] < 4 ) {
6416 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6417 /* 2px edge length (because we use same bS than the one for luma) */
6418 const int p0 = pix[-1];
6419 const int p1 = pix[-2];
6420 const int q0 = pix[0];
6421 const int q1 = pix[1];
6423 if( ABS( p0 - q0 ) < alpha &&
6424 ABS( p1 - p0 ) < beta &&
6425 ABS( q1 - q0 ) < beta ) {
6426 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6428 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6429 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6430 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6433 const int p0 = pix[-1];
6434 const int p1 = pix[-2];
6435 const int q0 = pix[0];
6436 const int q1 = pix[1];
6438 if( ABS( p0 - q0 ) < alpha &&
6439 ABS( p1 - p0 ) < beta &&
6440 ABS( q1 - q0 ) < beta ) {
6442 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6443 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6444 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6450 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6452 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6453 const int alpha = alpha_table[index_a];
6454 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6455 const int pix_next = stride;
6460 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6461 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6463 /* 16px edge length, see filter_mb_edgev */
6464 for( d = 0; d < 16; d++ ) {
6465 const int p0 = pix[-1*pix_next];
6466 const int p1 = pix[-2*pix_next];
6467 const int p2 = pix[-3*pix_next];
6468 const int q0 = pix[0];
6469 const int q1 = pix[1*pix_next];
6470 const int q2 = pix[2*pix_next];
6472 if( ABS( p0 - q0 ) < alpha &&
6473 ABS( p1 - p0 ) < beta &&
6474 ABS( q1 - q0 ) < beta ) {
6476 const int p3 = pix[-4*pix_next];
6477 const int q3 = pix[ 3*pix_next];
6479 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6480 if( ABS( p2 - p0 ) < beta) {
6482 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6483 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6484 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6487 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6489 if( ABS( q2 - q0 ) < beta) {
6491 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6492 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6493 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6496 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6500 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6501 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6503 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6510 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6512 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6513 const int alpha = alpha_table[index_a];
6514 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6519 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6520 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6522 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6526 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6527 MpegEncContext * const s = &h->s;
6528 const int mb_xy= mb_x + mb_y*s->mb_stride;
6529 int first_vertical_edge_done = 0;
6531 /* FIXME: A given frame may occupy more than one position in
6532 * the reference list. So ref2frm should be populated with
6533 * frame numbers, not indices. */
6534 static const int ref2frm[18] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6537 // left mb is in picture
6538 && h->slice_table[mb_xy-1] != 255
6539 // and current and left pair do not have the same interlaced type
6540 && (IS_INTERLACED(s->current_picture.mb_type[mb_xy]) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6541 // and left mb is in the same slice if deblocking_filter == 2
6542 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6543 /* First vertical edge is different in MBAFF frames
6544 * There are 8 different bS to compute and 2 different Qp
6551 first_vertical_edge_done = 1;
6552 for( i = 0; i < 8; i++ ) {
6554 int b_idx= 8 + 4 + 8*y;
6555 int bn_idx= b_idx - 1;
6557 int mbn_xy = h->mb_field_decoding_flag ? h->left_mb_xy[i>>2] : h->left_mb_xy[i&1];
6559 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6560 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6562 } else if( h->non_zero_count_cache[b_idx] != 0 ||
6563 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6564 h->non_zero_count_cache[bn_idx] != 0 ) {
6569 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6570 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6571 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6572 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6579 if(bS[0]+bS[1]+bS[2]+bS[3] != 0) {
6580 // Do not use s->qscale as luma quantizer because it has not the same
6581 // value in IPCM macroblocks.
6582 qp[0] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[0]] + 1 ) >> 1;
6583 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6584 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[0]] ) + 1 ) >> 1;
6585 qp[1] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[1]] + 1 ) >> 1;
6586 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6587 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[1]] ) + 1 ) >> 1;
6590 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
6591 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6592 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6593 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
6594 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
6597 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6598 for( dir = 0; dir < 2; dir++ )
6601 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6602 const int mb_type = s->current_picture.mb_type[mb_xy];
6603 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6604 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6606 const int edges = ((mb_type & mbm_type) & (MB_TYPE_16x16|MB_TYPE_SKIP))
6607 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6608 // how often to recheck mv-based bS when iterating between edges
6609 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6610 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6611 // how often to recheck mv-based bS when iterating along each edge
6612 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6614 if (first_vertical_edge_done) {
6616 first_vertical_edge_done = 0;
6619 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6623 for( edge = start; edge < edges; edge++ ) {
6624 /* mbn_xy: neighbor macroblock */
6625 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6626 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6630 if( (edge&1) && IS_8x8DCT(mb_type) )
6633 if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0)
6634 && !IS_INTERLACED(mb_type)
6635 && IS_INTERLACED(mbn_type)
6637 // This is a special case in the norm where the filtering must
6638 // be done twice (one each of the field) even if we are in a
6639 // frame macroblock.
6641 unsigned int tmp_linesize = 2 * linesize;
6642 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6643 int mbn_xy = mb_xy - 2 * s->mb_stride;
6647 if( IS_INTRA(mb_type) ||
6648 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6649 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6652 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6655 // Do not use s->qscale as luma quantizer because it has not the same
6656 // value in IPCM macroblocks.
6657 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6658 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6659 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6660 filter_mb_edgeh( h, &img_y[0], tmp_linesize, bS, qp );
6661 chroma_qp = ( h->chroma_qp +
6662 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6663 filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp );
6664 filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp );
6667 mbn_xy += s->mb_stride;
6668 if( IS_INTRA(mb_type) ||
6669 IS_INTRA(mbn_type) ) {
6670 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6673 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6676 // Do not use s->qscale as luma quantizer because it has not the same
6677 // value in IPCM macroblocks.
6678 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6679 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6680 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6681 filter_mb_edgeh( h, &img_y[linesize], tmp_linesize, bS, qp );
6682 chroma_qp = ( h->chroma_qp +
6683 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6684 filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6685 filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6688 if( IS_INTRA(mb_type) ||
6689 IS_INTRA(mbn_type) ) {
6692 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6693 || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6702 bS[0] = bS[1] = bS[2] = bS[3] = value;
6707 if( edge & mask_edge ) {
6708 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6711 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6712 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6713 int bn_idx= b_idx - (dir ? 8:1);
6715 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6716 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6717 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6718 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4;
6720 bS[0] = bS[1] = bS[2] = bS[3] = v;
6726 for( i = 0; i < 4; i++ ) {
6727 int x = dir == 0 ? edge : i;
6728 int y = dir == 0 ? i : edge;
6729 int b_idx= 8 + 4 + x + 8*y;
6730 int bn_idx= b_idx - (dir ? 8:1);
6732 if( h->non_zero_count_cache[b_idx] != 0 ||
6733 h->non_zero_count_cache[bn_idx] != 0 ) {
6739 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6740 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6741 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6742 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6750 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6755 // Do not use s->qscale as luma quantizer because it has not the same
6756 // value in IPCM macroblocks.
6757 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6758 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6759 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6760 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6762 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6763 if( (edge&1) == 0 ) {
6764 int chroma_qp = ( h->chroma_qp +
6765 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6766 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
6767 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
6770 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6771 if( (edge&1) == 0 ) {
6772 int chroma_qp = ( h->chroma_qp +
6773 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6774 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6775 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6782 static int decode_slice(H264Context *h){
6783 MpegEncContext * const s = &h->s;
6784 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6788 if( h->pps.cabac ) {
6792 align_get_bits( &s->gb );
6795 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
6796 ff_init_cabac_decoder( &h->cabac,
6797 s->gb.buffer + get_bits_count(&s->gb)/8,
6798 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6799 /* calculate pre-state */
6800 for( i= 0; i < 460; i++ ) {
6802 if( h->slice_type == I_TYPE )
6803 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6805 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6808 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6810 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6814 int ret = decode_mb_cabac(h);
6817 if(ret>=0) hl_decode_mb(h);
6819 /* XXX: useless as decode_mb_cabac it doesn't support that ... */
6820 if( ret >= 0 && h->mb_aff_frame ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6823 if(ret>=0) ret = decode_mb_cabac(h);
6825 if(ret>=0) hl_decode_mb(h);
6828 eos = get_cabac_terminate( &h->cabac );
6830 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
6831 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6832 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6836 if( ++s->mb_x >= s->mb_width ) {
6838 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6840 if(h->mb_aff_frame) {
6845 if( eos || s->mb_y >= s->mb_height ) {
6846 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6847 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6854 int ret = decode_mb_cavlc(h);
6856 if(ret>=0) hl_decode_mb(h);
6858 if(ret>=0 && h->mb_aff_frame){ //FIXME optimal? or let mb_decode decode 16x32 ?
6860 ret = decode_mb_cavlc(h);
6862 if(ret>=0) hl_decode_mb(h);
6867 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6868 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6873 if(++s->mb_x >= s->mb_width){
6875 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6877 if(h->mb_aff_frame) {
6880 if(s->mb_y >= s->mb_height){
6881 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6883 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6884 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6888 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6895 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6896 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6897 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6898 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6902 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6911 for(;s->mb_y < s->mb_height; s->mb_y++){
6912 for(;s->mb_x < s->mb_width; s->mb_x++){
6913 int ret= decode_mb(h);
6918 fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6919 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6924 if(++s->mb_x >= s->mb_width){
6926 if(++s->mb_y >= s->mb_height){
6927 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6928 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6932 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6939 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6940 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6941 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6945 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6952 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6955 return -1; //not reached
6958 static int decode_unregistered_user_data(H264Context *h, int size){
6959 MpegEncContext * const s = &h->s;
6960 uint8_t user_data[16+256];
6966 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6967 user_data[i]= get_bits(&s->gb, 8);
6971 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6972 if(e==1 && build>=0)
6973 h->x264_build= build;
6975 if(s->avctx->debug & FF_DEBUG_BUGS)
6976 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6979 skip_bits(&s->gb, 8);
6984 static int decode_sei(H264Context *h){
6985 MpegEncContext * const s = &h->s;
6987 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6992 type+= show_bits(&s->gb, 8);
6993 }while(get_bits(&s->gb, 8) == 255);
6997 size+= show_bits(&s->gb, 8);
6998 }while(get_bits(&s->gb, 8) == 255);
7002 if(decode_unregistered_user_data(h, size) < 0);
7006 skip_bits(&s->gb, 8*size);
7009 //FIXME check bits here
7010 align_get_bits(&s->gb);
7016 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7017 MpegEncContext * const s = &h->s;
7019 cpb_count = get_ue_golomb(&s->gb) + 1;
7020 get_bits(&s->gb, 4); /* bit_rate_scale */
7021 get_bits(&s->gb, 4); /* cpb_size_scale */
7022 for(i=0; i<cpb_count; i++){
7023 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7024 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7025 get_bits1(&s->gb); /* cbr_flag */
7027 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7028 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7029 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7030 get_bits(&s->gb, 5); /* time_offset_length */
7033 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7034 MpegEncContext * const s = &h->s;
7035 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7036 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7038 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7040 if( aspect_ratio_info_present_flag ) {
7041 aspect_ratio_idc= get_bits(&s->gb, 8);
7042 if( aspect_ratio_idc == EXTENDED_SAR ) {
7043 sps->sar.num= get_bits(&s->gb, 16);
7044 sps->sar.den= get_bits(&s->gb, 16);
7045 }else if(aspect_ratio_idc < 16){
7046 sps->sar= pixel_aspect[aspect_ratio_idc];
7048 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7055 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7057 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7058 get_bits1(&s->gb); /* overscan_appropriate_flag */
7061 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7062 get_bits(&s->gb, 3); /* video_format */
7063 get_bits1(&s->gb); /* video_full_range_flag */
7064 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7065 get_bits(&s->gb, 8); /* colour_primaries */
7066 get_bits(&s->gb, 8); /* transfer_characteristics */
7067 get_bits(&s->gb, 8); /* matrix_coefficients */
7071 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7072 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7073 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7076 sps->timing_info_present_flag = get_bits1(&s->gb);
7077 if(sps->timing_info_present_flag){
7078 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7079 sps->time_scale = get_bits_long(&s->gb, 32);
7080 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7083 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7084 if(nal_hrd_parameters_present_flag)
7085 decode_hrd_parameters(h, sps);
7086 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7087 if(vcl_hrd_parameters_present_flag)
7088 decode_hrd_parameters(h, sps);
7089 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7090 get_bits1(&s->gb); /* low_delay_hrd_flag */
7091 get_bits1(&s->gb); /* pic_struct_present_flag */
7093 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7094 if(sps->bitstream_restriction_flag){
7095 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7096 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7097 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7098 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7099 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7100 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7101 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7107 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size, const uint8_t *default_list){
7108 MpegEncContext * const s = &h->s;
7109 int i, last = 8, next = 8;
7110 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7111 if(!get_bits1(&s->gb)) /* matrix not written, we use the default one */
7112 memcpy(factors, default_list, size*sizeof(uint8_t));
7114 for(i=0;i<size;i++){
7116 next = (last + get_se_golomb(&s->gb)) & 0xff;
7117 if(!i && !next){ /* matrix not written, we use the default one */
7118 memcpy(factors, default_list, size*sizeof(uint8_t));
7121 last = factors[scan[i]] = next ? next : last;
7125 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7126 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7127 MpegEncContext * const s = &h->s;
7128 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7129 const uint8_t *fallback[4] = {
7130 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7131 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7132 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7133 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7135 if(get_bits1(&s->gb)){
7136 sps->scaling_matrix_present |= is_sps;
7137 decode_scaling_list(h,scaling_matrix4[0],16,fallback[0]); // Intra, Y
7138 decode_scaling_list(h,scaling_matrix4[1],16,scaling_matrix4[0]); // Intra, Cr
7139 decode_scaling_list(h,scaling_matrix4[2],16,scaling_matrix4[1]); // Intra, Cb
7140 decode_scaling_list(h,scaling_matrix4[3],16,fallback[1]); // Inter, Y
7141 decode_scaling_list(h,scaling_matrix4[4],16,scaling_matrix4[3]); // Inter, Cr
7142 decode_scaling_list(h,scaling_matrix4[5],16,scaling_matrix4[4]); // Inter, Cb
7143 if(is_sps || pps->transform_8x8_mode){
7144 decode_scaling_list(h,scaling_matrix8[0],64,fallback[2]); // Intra, Y
7145 decode_scaling_list(h,scaling_matrix8[1],64,fallback[3]); // Inter, Y
7147 } else if(fallback_sps) {
7148 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7149 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7153 static inline int decode_seq_parameter_set(H264Context *h){
7154 MpegEncContext * const s = &h->s;
7155 int profile_idc, level_idc;
7159 profile_idc= get_bits(&s->gb, 8);
7160 get_bits1(&s->gb); //constraint_set0_flag
7161 get_bits1(&s->gb); //constraint_set1_flag
7162 get_bits1(&s->gb); //constraint_set2_flag
7163 get_bits1(&s->gb); //constraint_set3_flag
7164 get_bits(&s->gb, 4); // reserved
7165 level_idc= get_bits(&s->gb, 8);
7166 sps_id= get_ue_golomb(&s->gb);
7168 sps= &h->sps_buffer[ sps_id ];
7169 sps->profile_idc= profile_idc;
7170 sps->level_idc= level_idc;
7172 if(sps->profile_idc >= 100){ //high profile
7173 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7174 get_bits1(&s->gb); //residual_color_transform_flag
7175 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7176 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7177 sps->transform_bypass = get_bits1(&s->gb);
7178 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7180 sps->scaling_matrix_present = 0;
7182 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7183 sps->poc_type= get_ue_golomb(&s->gb);
7185 if(sps->poc_type == 0){ //FIXME #define
7186 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7187 } else if(sps->poc_type == 1){//FIXME #define
7188 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7189 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7190 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7191 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7193 for(i=0; i<sps->poc_cycle_length; i++)
7194 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7196 if(sps->poc_type > 2){
7197 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7201 sps->ref_frame_count= get_ue_golomb(&s->gb);
7202 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7203 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7205 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7206 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7207 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7208 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7209 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7212 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7213 if(!sps->frame_mbs_only_flag)
7214 sps->mb_aff= get_bits1(&s->gb);
7218 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7220 sps->crop= get_bits1(&s->gb);
7222 sps->crop_left = get_ue_golomb(&s->gb);
7223 sps->crop_right = get_ue_golomb(&s->gb);
7224 sps->crop_top = get_ue_golomb(&s->gb);
7225 sps->crop_bottom= get_ue_golomb(&s->gb);
7226 if(sps->crop_left || sps->crop_top){
7227 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7233 sps->crop_bottom= 0;
7236 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7237 if( sps->vui_parameters_present_flag )
7238 decode_vui_parameters(h, sps);
7240 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7241 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7242 sps_id, sps->profile_idc, sps->level_idc,
7244 sps->ref_frame_count,
7245 sps->mb_width, sps->mb_height,
7246 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7247 sps->direct_8x8_inference_flag ? "8B8" : "",
7248 sps->crop_left, sps->crop_right,
7249 sps->crop_top, sps->crop_bottom,
7250 sps->vui_parameters_present_flag ? "VUI" : ""
7256 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7257 MpegEncContext * const s = &h->s;
7258 int pps_id= get_ue_golomb(&s->gb);
7259 PPS *pps= &h->pps_buffer[pps_id];
7261 pps->sps_id= get_ue_golomb(&s->gb);
7262 pps->cabac= get_bits1(&s->gb);
7263 pps->pic_order_present= get_bits1(&s->gb);
7264 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7265 if(pps->slice_group_count > 1 ){
7266 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7267 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7268 switch(pps->mb_slice_group_map_type){
7271 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7272 | run_length[ i ] |1 |ue(v) |
7277 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7279 | top_left_mb[ i ] |1 |ue(v) |
7280 | bottom_right_mb[ i ] |1 |ue(v) |
7288 | slice_group_change_direction_flag |1 |u(1) |
7289 | slice_group_change_rate_minus1 |1 |ue(v) |
7294 | slice_group_id_cnt_minus1 |1 |ue(v) |
7295 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7297 | slice_group_id[ i ] |1 |u(v) |
7302 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7303 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7304 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7305 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7309 pps->weighted_pred= get_bits1(&s->gb);
7310 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7311 pps->init_qp= get_se_golomb(&s->gb) + 26;
7312 pps->init_qs= get_se_golomb(&s->gb) + 26;
7313 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7314 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7315 pps->constrained_intra_pred= get_bits1(&s->gb);
7316 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7318 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7319 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7321 if(get_bits_count(&s->gb) < bit_length){
7322 pps->transform_8x8_mode= get_bits1(&s->gb);
7323 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7324 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7327 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7328 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7329 pps_id, pps->sps_id,
7330 pps->cabac ? "CABAC" : "CAVLC",
7331 pps->slice_group_count,
7332 pps->ref_count[0], pps->ref_count[1],
7333 pps->weighted_pred ? "weighted" : "",
7334 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7335 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7336 pps->constrained_intra_pred ? "CONSTR" : "",
7337 pps->redundant_pic_cnt_present ? "REDU" : "",
7338 pps->transform_8x8_mode ? "8x8DCT" : ""
7346 * finds the end of the current frame in the bitstream.
7347 * @return the position of the first byte of the next frame, or -1
7349 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7352 ParseContext *pc = &(h->s.parse_context);
7353 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7354 // mb_addr= pc->mb_addr - 1;
7356 for(i=0; i<=buf_size; i++){
7357 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7358 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7359 if(pc->frame_start_found){
7360 // If there isn't one more byte in the buffer
7361 // the test on first_mb_in_slice cannot be done yet
7362 // do it at next call.
7363 if (i >= buf_size) break;
7364 if (buf[i] & 0x80) {
7365 // first_mb_in_slice is 0, probably the first nal of a new
7367 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7369 pc->frame_start_found= 0;
7373 pc->frame_start_found = 1;
7375 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7376 if(pc->frame_start_found){
7378 pc->frame_start_found= 0;
7383 state= (state<<8) | buf[i];
7387 return END_NOT_FOUND;
7390 static int h264_parse(AVCodecParserContext *s,
7391 AVCodecContext *avctx,
7392 uint8_t **poutbuf, int *poutbuf_size,
7393 const uint8_t *buf, int buf_size)
7395 H264Context *h = s->priv_data;
7396 ParseContext *pc = &h->s.parse_context;
7399 next= find_frame_end(h, buf, buf_size);
7401 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
7407 *poutbuf = (uint8_t *)buf;
7408 *poutbuf_size = buf_size;
7412 static int h264_split(AVCodecContext *avctx,
7413 const uint8_t *buf, int buf_size)
7416 uint32_t state = -1;
7419 for(i=0; i<=buf_size; i++){
7420 if((state&0xFFFFFF1F) == 0x107)
7422 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7424 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
7426 while(i>4 && buf[i-5]==0) i--;
7431 state= (state<<8) | buf[i];
7437 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7438 MpegEncContext * const s = &h->s;
7439 AVCodecContext * const avctx= s->avctx;
7443 for(i=0; i<50; i++){
7444 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7448 s->current_picture_ptr= NULL;
7457 if(buf_index >= buf_size) break;
7459 for(i = 0; i < h->nal_length_size; i++)
7460 nalsize = (nalsize << 8) | buf[buf_index++];
7462 // start code prefix search
7463 for(; buf_index + 3 < buf_size; buf_index++){
7464 // this should allways succeed in the first iteration
7465 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7469 if(buf_index+3 >= buf_size) break;
7474 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7475 if(ptr[dst_length - 1] == 0) dst_length--;
7476 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
7478 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7479 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7482 if (h->is_avc && (nalsize != consumed))
7483 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7485 buf_index += consumed;
7487 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
7488 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7491 switch(h->nal_unit_type){
7493 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7495 init_get_bits(&s->gb, ptr, bit_length);
7497 h->inter_gb_ptr= &s->gb;
7498 s->data_partitioning = 0;
7500 if(decode_slice_header(h) < 0){
7501 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7504 if(h->redundant_pic_count==0 && s->hurry_up < 5
7505 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7506 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7507 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7508 && avctx->skip_frame < AVDISCARD_ALL)
7512 init_get_bits(&s->gb, ptr, bit_length);
7514 h->inter_gb_ptr= NULL;
7515 s->data_partitioning = 1;
7517 if(decode_slice_header(h) < 0){
7518 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7522 init_get_bits(&h->intra_gb, ptr, bit_length);
7523 h->intra_gb_ptr= &h->intra_gb;
7526 init_get_bits(&h->inter_gb, ptr, bit_length);
7527 h->inter_gb_ptr= &h->inter_gb;
7529 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
7531 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7532 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7533 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7534 && avctx->skip_frame < AVDISCARD_ALL)
7538 init_get_bits(&s->gb, ptr, bit_length);
7542 init_get_bits(&s->gb, ptr, bit_length);
7543 decode_seq_parameter_set(h);
7545 if(s->flags& CODEC_FLAG_LOW_DELAY)
7548 if(avctx->has_b_frames < 2)
7549 avctx->has_b_frames= !s->low_delay;
7552 init_get_bits(&s->gb, ptr, bit_length);
7554 decode_picture_parameter_set(h, bit_length);
7558 case NAL_END_SEQUENCE:
7559 case NAL_END_STREAM:
7560 case NAL_FILLER_DATA:
7562 case NAL_AUXILIARY_SLICE:
7565 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
7569 if(!s->current_picture_ptr) return buf_index; //no frame
7571 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7572 s->current_picture_ptr->pict_type= s->pict_type;
7573 s->current_picture_ptr->key_frame= s->pict_type == I_TYPE && h->nal_unit_type == NAL_IDR_SLICE;
7575 h->prev_frame_num_offset= h->frame_num_offset;
7576 h->prev_frame_num= h->frame_num;
7577 if(s->current_picture_ptr->reference){
7578 h->prev_poc_msb= h->poc_msb;
7579 h->prev_poc_lsb= h->poc_lsb;
7581 if(s->current_picture_ptr->reference)
7582 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7592 * returns the number of bytes consumed for building the current frame
7594 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7595 if(s->flags&CODEC_FLAG_TRUNCATED){
7596 pos -= s->parse_context.last_index;
7597 if(pos<0) pos=0; // FIXME remove (unneeded?)
7601 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
7602 if(pos+10>buf_size) pos=buf_size; // oops ;)
7608 static int decode_frame(AVCodecContext *avctx,
7609 void *data, int *data_size,
7610 uint8_t *buf, int buf_size)
7612 H264Context *h = avctx->priv_data;
7613 MpegEncContext *s = &h->s;
7614 AVFrame *pict = data;
7617 s->flags= avctx->flags;
7618 s->flags2= avctx->flags2;
7620 /* no supplementary picture */
7621 if (buf_size == 0) {
7625 if(s->flags&CODEC_FLAG_TRUNCATED){
7626 int next= find_frame_end(h, buf, buf_size);
7628 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7630 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7633 if(h->is_avc && !h->got_avcC) {
7634 int i, cnt, nalsize;
7635 unsigned char *p = avctx->extradata;
7636 if(avctx->extradata_size < 7) {
7637 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7641 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7644 /* sps and pps in the avcC always have length coded with 2 bytes,
7645 so put a fake nal_length_size = 2 while parsing them */
7646 h->nal_length_size = 2;
7647 // Decode sps from avcC
7648 cnt = *(p+5) & 0x1f; // Number of sps
7650 for (i = 0; i < cnt; i++) {
7651 nalsize = BE_16(p) + 2;
7652 if(decode_nal_units(h, p, nalsize) < 0) {
7653 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7658 // Decode pps from avcC
7659 cnt = *(p++); // Number of pps
7660 for (i = 0; i < cnt; i++) {
7661 nalsize = BE_16(p) + 2;
7662 if(decode_nal_units(h, p, nalsize) != nalsize) {
7663 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7668 // Now store right nal length size, that will be use to parse all other nals
7669 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7670 // Do not reparse avcC
7674 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
7675 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7679 buf_index=decode_nal_units(h, buf, buf_size);
7683 //FIXME do something with unavailable reference frames
7685 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
7686 if(!s->current_picture_ptr){
7687 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
7692 Picture *out = s->current_picture_ptr;
7693 #if 0 //decode order
7694 *data_size = sizeof(AVFrame);
7696 /* Sort B-frames into display order */
7697 Picture *cur = s->current_picture_ptr;
7698 Picture *prev = h->delayed_output_pic;
7703 int dropped_frame = 0;
7706 if(h->sps.bitstream_restriction_flag
7707 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7708 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7712 while(h->delayed_pic[pics]) pics++;
7713 h->delayed_pic[pics++] = cur;
7714 if(cur->reference == 0)
7717 for(i=0; h->delayed_pic[i]; i++)
7718 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7721 out = h->delayed_pic[0];
7722 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7723 if(h->delayed_pic[i]->poc < out->poc){
7724 out = h->delayed_pic[i];
7728 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7729 if(prev && pics <= s->avctx->has_b_frames)
7731 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7733 ((!cross_idr && prev && out->poc > prev->poc + 2)
7734 || cur->pict_type == B_TYPE)))
7737 s->avctx->has_b_frames++;
7740 else if(out_of_order)
7743 if(out_of_order || pics > s->avctx->has_b_frames){
7744 dropped_frame = (out != h->delayed_pic[out_idx]);
7745 for(i=out_idx; h->delayed_pic[i]; i++)
7746 h->delayed_pic[i] = h->delayed_pic[i+1];
7749 if(prev == out && !dropped_frame)
7752 *data_size = sizeof(AVFrame);
7753 if(prev && prev != out && prev->reference == 1)
7754 prev->reference = 0;
7755 h->delayed_output_pic = out;
7759 *pict= *(AVFrame*)out;
7761 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7764 assert(pict->data[0] || !*data_size);
7765 ff_print_debug_info(s, pict);
7766 //printf("out %d\n", (int)pict->data[0]);
7769 /* Return the Picture timestamp as the frame number */
7770 /* we substract 1 because it is added on utils.c */
7771 avctx->frame_number = s->picture_number - 1;
7773 return get_consumed_bytes(s, buf_index, buf_size);
7776 static inline void fill_mb_avail(H264Context *h){
7777 MpegEncContext * const s = &h->s;
7778 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7781 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7782 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7783 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7789 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7790 h->mb_avail[4]= 1; //FIXME move out
7791 h->mb_avail[5]= 0; //FIXME move out
7797 #define SIZE (COUNT*40)
7803 // int int_temp[10000];
7805 AVCodecContext avctx;
7807 dsputil_init(&dsp, &avctx);
7809 init_put_bits(&pb, temp, SIZE);
7810 printf("testing unsigned exp golomb\n");
7811 for(i=0; i<COUNT; i++){
7813 set_ue_golomb(&pb, i);
7814 STOP_TIMER("set_ue_golomb");
7816 flush_put_bits(&pb);
7818 init_get_bits(&gb, temp, 8*SIZE);
7819 for(i=0; i<COUNT; i++){
7822 s= show_bits(&gb, 24);
7825 j= get_ue_golomb(&gb);
7827 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7830 STOP_TIMER("get_ue_golomb");
7834 init_put_bits(&pb, temp, SIZE);
7835 printf("testing signed exp golomb\n");
7836 for(i=0; i<COUNT; i++){
7838 set_se_golomb(&pb, i - COUNT/2);
7839 STOP_TIMER("set_se_golomb");
7841 flush_put_bits(&pb);
7843 init_get_bits(&gb, temp, 8*SIZE);
7844 for(i=0; i<COUNT; i++){
7847 s= show_bits(&gb, 24);
7850 j= get_se_golomb(&gb);
7851 if(j != i - COUNT/2){
7852 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7855 STOP_TIMER("get_se_golomb");
7858 printf("testing 4x4 (I)DCT\n");
7861 uint8_t src[16], ref[16];
7862 uint64_t error= 0, max_error=0;
7864 for(i=0; i<COUNT; i++){
7866 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7867 for(j=0; j<16; j++){
7868 ref[j]= random()%255;
7869 src[j]= random()%255;
7872 h264_diff_dct_c(block, src, ref, 4);
7875 for(j=0; j<16; j++){
7876 // printf("%d ", block[j]);
7877 block[j]= block[j]*4;
7878 if(j&1) block[j]= (block[j]*4 + 2)/5;
7879 if(j&4) block[j]= (block[j]*4 + 2)/5;
7883 s->dsp.h264_idct_add(ref, block, 4);
7884 /* for(j=0; j<16; j++){
7885 printf("%d ", ref[j]);
7889 for(j=0; j<16; j++){
7890 int diff= ABS(src[j] - ref[j]);
7893 max_error= FFMAX(max_error, diff);
7896 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7898 printf("testing quantizer\n");
7899 for(qp=0; qp<52; qp++){
7901 src1_block[i]= src2_block[i]= random()%255;
7905 printf("Testing NAL layer\n");
7907 uint8_t bitstream[COUNT];
7908 uint8_t nal[COUNT*2];
7910 memset(&h, 0, sizeof(H264Context));
7912 for(i=0; i<COUNT; i++){
7920 for(j=0; j<COUNT; j++){
7921 bitstream[j]= (random() % 255) + 1;
7924 for(j=0; j<zeros; j++){
7925 int pos= random() % COUNT;
7926 while(bitstream[pos] == 0){
7935 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7937 printf("encoding failed\n");
7941 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7945 if(out_length != COUNT){
7946 printf("incorrect length %d %d\n", out_length, COUNT);
7950 if(consumed != nal_length){
7951 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7955 if(memcmp(bitstream, out, COUNT)){
7956 printf("missmatch\n");
7961 printf("Testing RBSP\n");
7969 static int decode_end(AVCodecContext *avctx)
7971 H264Context *h = avctx->priv_data;
7972 MpegEncContext *s = &h->s;
7974 av_freep(&h->rbsp_buffer);
7975 free_tables(h); //FIXME cleanup init stuff perhaps
7978 // memset(h, 0, sizeof(H264Context));
7984 AVCodec h264_decoder = {
7988 sizeof(H264Context),
7993 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
7997 AVCodecParser h264_parser = {
7999 sizeof(H264Context),