2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
58 * Sequence parameter set
64 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
65 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
66 int poc_type; ///< pic_order_cnt_type
67 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
68 int delta_pic_order_always_zero_flag;
69 int offset_for_non_ref_pic;
70 int offset_for_top_to_bottom_field;
71 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
72 int ref_frame_count; ///< num_ref_frames
73 int gaps_in_frame_num_allowed_flag;
74 int mb_width; ///< frame_width_in_mbs_minus1 + 1
75 int mb_height; ///< frame_height_in_mbs_minus1 + 1
76 int frame_mbs_only_flag;
77 int mb_aff; ///<mb_adaptive_frame_field_flag
78 int direct_8x8_inference_flag;
79 int crop; ///< frame_cropping_flag
80 int crop_left; ///< frame_cropping_rect_left_offset
81 int crop_right; ///< frame_cropping_rect_right_offset
82 int crop_top; ///< frame_cropping_rect_top_offset
83 int crop_bottom; ///< frame_cropping_rect_bottom_offset
84 int vui_parameters_present_flag;
86 int timing_info_present_flag;
87 uint32_t num_units_in_tick;
89 int fixed_frame_rate_flag;
90 short offset_for_ref_frame[256]; //FIXME dyn aloc?
91 int bitstream_restriction_flag;
92 int num_reorder_frames;
93 int scaling_matrix_present;
94 uint8_t scaling_matrix4[6][16];
95 uint8_t scaling_matrix8[2][64];
99 * Picture parameter set
103 int cabac; ///< entropy_coding_mode_flag
104 int pic_order_present; ///< pic_order_present_flag
105 int slice_group_count; ///< num_slice_groups_minus1 + 1
106 int mb_slice_group_map_type;
107 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
108 int weighted_pred; ///< weighted_pred_flag
109 int weighted_bipred_idc;
110 int init_qp; ///< pic_init_qp_minus26 + 26
111 int init_qs; ///< pic_init_qs_minus26 + 26
112 int chroma_qp_index_offset;
113 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
114 int constrained_intra_pred; ///< constrained_intra_pred_flag
115 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
116 int transform_8x8_mode; ///< transform_8x8_mode_flag
117 uint8_t scaling_matrix4[6][16];
118 uint8_t scaling_matrix8[2][64];
122 * Memory management control operation opcode.
124 typedef enum MMCOOpcode{
135 * Memory management control operation.
146 typedef struct H264Context{
154 #define NAL_IDR_SLICE 5
159 #define NAL_END_SEQUENCE 10
160 #define NAL_END_STREAM 11
161 #define NAL_FILLER_DATA 12
162 #define NAL_SPS_EXT 13
163 #define NAL_AUXILIARY_SLICE 19
164 uint8_t *rbsp_buffer;
165 int rbsp_buffer_size;
168 * Used to parse AVC variant of h264
170 int is_avc; ///< this flag is != 0 if codec is avc1
171 int got_avcC; ///< flag used to parse avcC data only once
172 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
176 int prev_mb_skipped; //FIXME remove (IMHO not used)
179 int chroma_pred_mode;
180 int intra16x16_pred_mode;
185 int8_t intra4x4_pred_mode_cache[5*8];
186 int8_t (*intra4x4_pred_mode)[8];
187 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
188 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
189 void (*pred8x8 [4+3])(uint8_t *src, int stride);
190 void (*pred16x16[4+3])(uint8_t *src, int stride);
191 unsigned int topleft_samples_available;
192 unsigned int top_samples_available;
193 unsigned int topright_samples_available;
194 unsigned int left_samples_available;
195 uint8_t (*top_borders[2])[16+2*8];
196 uint8_t left_border[2*(17+2*9)];
199 * non zero coeff count cache.
200 * is 64 if not available.
202 uint8_t non_zero_count_cache[6*8] __align8;
203 uint8_t (*non_zero_count)[16];
206 * Motion vector cache.
208 int16_t mv_cache[2][5*8][2] __align8;
209 int8_t ref_cache[2][5*8] __align8;
210 #define LIST_NOT_USED -1 //FIXME rename?
211 #define PART_NOT_AVAILABLE -2
214 * is 1 if the specific list MV&references are set to 0,0,-2.
216 int mv_cache_clean[2];
219 * number of neighbors (top and/or left) that used 8x8 dct
221 int neighbor_transform_size;
224 * block_offset[ 0..23] for frame macroblocks
225 * block_offset[24..47] for field macroblocks
227 int block_offset[2*(16+8)];
229 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
231 int b_stride; //FIXME use s->b4_stride
237 int unknown_svq3_flag;
238 int next_slice_index;
240 SPS sps_buffer[MAX_SPS_COUNT];
241 SPS sps; ///< current sps
243 PPS pps_buffer[MAX_PPS_COUNT];
247 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
249 uint32_t dequant4_buffer[6][52][16];
250 uint32_t dequant8_buffer[2][52][64];
251 uint32_t (*dequant4_coeff[6])[16];
252 uint32_t (*dequant8_coeff[2])[64];
253 int dequant_coeff_pps; ///< reinit tables when pps changes
256 uint8_t *slice_table_base;
257 uint8_t *slice_table; ///< slice_table_base + mb_stride + 1
259 int slice_type_fixed;
261 //interlacing specific flags
263 int mb_field_decoding_flag;
270 int delta_poc_bottom;
273 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
274 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
275 int frame_num_offset; ///< for POC type 2
276 int prev_frame_num_offset; ///< for POC type 2
277 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
280 * frame_num for frames or 2*frame_num for field pics.
285 * max_frame_num or 2*max_frame_num for field pics.
289 //Weighted pred stuff
291 int use_weight_chroma;
292 int luma_log2_weight_denom;
293 int chroma_log2_weight_denom;
294 int luma_weight[2][16];
295 int luma_offset[2][16];
296 int chroma_weight[2][16][2];
297 int chroma_offset[2][16][2];
298 int implicit_weight[16][16];
301 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
302 int slice_alpha_c0_offset;
303 int slice_beta_offset;
305 int redundant_pic_count;
307 int direct_spatial_mv_pred;
308 int dist_scale_factor[16];
309 int map_col_to_list0[2][16];
312 * num_ref_idx_l0/1_active_minus1 + 1
314 int ref_count[2];// FIXME split for AFF
315 Picture *short_ref[32];
316 Picture *long_ref[32];
317 Picture default_ref_list[2][32];
318 Picture ref_list[2][32]; //FIXME size?
319 Picture field_ref_list[2][32]; //FIXME size?
320 Picture *delayed_pic[16]; //FIXME size?
321 Picture *delayed_output_pic;
324 * memory management control operations buffer.
326 MMCO mmco[MAX_MMCO_COUNT];
329 int long_ref_count; ///< number of actual long term references
330 int short_ref_count; ///< number of actual short term references
333 GetBitContext intra_gb;
334 GetBitContext inter_gb;
335 GetBitContext *intra_gb_ptr;
336 GetBitContext *inter_gb_ptr;
338 DCTELEM mb[16*24] __align8;
344 uint8_t cabac_state[460];
347 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
351 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
352 uint8_t *chroma_pred_mode_table;
353 int last_qscale_diff;
354 int16_t (*mvd_table[2])[2];
355 int16_t mvd_cache[2][5*8][2] __align8;
356 uint8_t *direct_table;
357 uint8_t direct_cache[5*8];
359 uint8_t zigzag_scan[16];
360 uint8_t field_scan[16];
361 const uint8_t *zigzag_scan_q0;
362 const uint8_t *field_scan_q0;
367 static VLC coeff_token_vlc[4];
368 static VLC chroma_dc_coeff_token_vlc;
370 static VLC total_zeros_vlc[15];
371 static VLC chroma_dc_total_zeros_vlc[3];
373 static VLC run_vlc[6];
376 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
377 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
378 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
380 static inline uint32_t pack16to32(int a, int b){
381 #ifdef WORDS_BIGENDIAN
382 return (b&0xFFFF) + (a<<16);
384 return (a&0xFFFF) + (b<<16);
390 * @param h height of the rectangle, should be a constant
391 * @param w width of the rectangle, should be a constant
392 * @param size the size of val (1 or 4), should be a constant
394 static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
395 uint8_t *p= (uint8_t*)vp;
396 assert(size==1 || size==4);
401 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
402 assert((stride&(w-1))==0);
403 //FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it
406 *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
407 }else if(w==2 && h==4){
408 *(uint16_t*)(p + 0*stride)=
409 *(uint16_t*)(p + 1*stride)=
410 *(uint16_t*)(p + 2*stride)=
411 *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
412 }else if(w==4 && h==1){
413 *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
414 }else if(w==4 && h==2){
415 *(uint32_t*)(p + 0*stride)=
416 *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
417 }else if(w==4 && h==4){
418 *(uint32_t*)(p + 0*stride)=
419 *(uint32_t*)(p + 1*stride)=
420 *(uint32_t*)(p + 2*stride)=
421 *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
422 }else if(w==8 && h==1){
424 *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
425 }else if(w==8 && h==2){
426 *(uint32_t*)(p + 0 + 0*stride)=
427 *(uint32_t*)(p + 4 + 0*stride)=
428 *(uint32_t*)(p + 0 + 1*stride)=
429 *(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101;
430 }else if(w==8 && h==4){
431 *(uint64_t*)(p + 0*stride)=
432 *(uint64_t*)(p + 1*stride)=
433 *(uint64_t*)(p + 2*stride)=
434 *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
435 }else if(w==16 && h==2){
436 *(uint64_t*)(p + 0+0*stride)=
437 *(uint64_t*)(p + 8+0*stride)=
438 *(uint64_t*)(p + 0+1*stride)=
439 *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
440 }else if(w==16 && h==4){
441 *(uint64_t*)(p + 0+0*stride)=
442 *(uint64_t*)(p + 8+0*stride)=
443 *(uint64_t*)(p + 0+1*stride)=
444 *(uint64_t*)(p + 8+1*stride)=
445 *(uint64_t*)(p + 0+2*stride)=
446 *(uint64_t*)(p + 8+2*stride)=
447 *(uint64_t*)(p + 0+3*stride)=
448 *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
453 static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
454 MpegEncContext * const s = &h->s;
455 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
456 int topleft_xy, top_xy, topright_xy, left_xy[2];
457 int topleft_type, top_type, topright_type, left_type[2];
461 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
462 // the actual condition is whether we're on the edge of a slice,
463 // and even then the intra and nnz parts are unnecessary.
464 if(for_deblock && h->slice_num == 1)
467 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
469 top_xy = mb_xy - s->mb_stride;
470 topleft_xy = top_xy - 1;
471 topright_xy= top_xy + 1;
472 left_xy[1] = left_xy[0] = mb_xy-1;
482 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
483 const int top_pair_xy = pair_xy - s->mb_stride;
484 const int topleft_pair_xy = top_pair_xy - 1;
485 const int topright_pair_xy = top_pair_xy + 1;
486 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
487 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
488 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
489 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
490 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
491 const int bottom = (s->mb_y & 1);
492 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
494 ? !curr_mb_frame_flag // bottom macroblock
495 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
497 top_xy -= s->mb_stride;
500 ? !curr_mb_frame_flag // bottom macroblock
501 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
503 topleft_xy -= s->mb_stride;
506 ? !curr_mb_frame_flag // bottom macroblock
507 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
509 topright_xy -= s->mb_stride;
511 if (left_mb_frame_flag != curr_mb_frame_flag) {
512 left_xy[1] = left_xy[0] = pair_xy - 1;
513 if (curr_mb_frame_flag) {
534 left_xy[1] += s->mb_stride;
547 h->top_mb_xy = top_xy;
548 h->left_mb_xy[0] = left_xy[0];
549 h->left_mb_xy[1] = left_xy[1];
551 topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
552 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
553 topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
554 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
555 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
557 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
558 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
559 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
560 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
561 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
564 if(IS_INTRA(mb_type)){
565 h->topleft_samples_available=
566 h->top_samples_available=
567 h->left_samples_available= 0xFFFF;
568 h->topright_samples_available= 0xEEEA;
570 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
571 h->topleft_samples_available= 0xB3FF;
572 h->top_samples_available= 0x33FF;
573 h->topright_samples_available= 0x26EA;
576 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
577 h->topleft_samples_available&= 0xDF5F;
578 h->left_samples_available&= 0x5F5F;
582 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
583 h->topleft_samples_available&= 0x7FFF;
585 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
586 h->topright_samples_available&= 0xFBFF;
588 if(IS_INTRA4x4(mb_type)){
589 if(IS_INTRA4x4(top_type)){
590 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
591 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
592 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
593 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
596 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
601 h->intra4x4_pred_mode_cache[4+8*0]=
602 h->intra4x4_pred_mode_cache[5+8*0]=
603 h->intra4x4_pred_mode_cache[6+8*0]=
604 h->intra4x4_pred_mode_cache[7+8*0]= pred;
607 if(IS_INTRA4x4(left_type[i])){
608 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
609 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
612 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
617 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
618 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
633 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
635 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
636 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
637 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
638 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
640 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
641 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
643 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
644 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
647 h->non_zero_count_cache[4+8*0]=
648 h->non_zero_count_cache[5+8*0]=
649 h->non_zero_count_cache[6+8*0]=
650 h->non_zero_count_cache[7+8*0]=
652 h->non_zero_count_cache[1+8*0]=
653 h->non_zero_count_cache[2+8*0]=
655 h->non_zero_count_cache[1+8*3]=
656 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
660 for (i=0; i<2; i++) {
662 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
663 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
664 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
665 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
667 h->non_zero_count_cache[3+8*1 + 2*8*i]=
668 h->non_zero_count_cache[3+8*2 + 2*8*i]=
669 h->non_zero_count_cache[0+8*1 + 8*i]=
670 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
677 h->top_cbp = h->cbp_table[top_xy];
678 } else if(IS_INTRA(mb_type)) {
685 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
686 } else if(IS_INTRA(mb_type)) {
692 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
695 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
700 //FIXME direct mb can skip much of this
701 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
703 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
704 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
705 /*if(!h->mv_cache_clean[list]){
706 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
707 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
708 h->mv_cache_clean[list]= 1;
712 h->mv_cache_clean[list]= 0;
714 if(IS_INTER(top_type)){
715 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
716 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
717 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
718 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
719 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
720 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
721 h->ref_cache[list][scan8[0] + 0 - 1*8]=
722 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
723 h->ref_cache[list][scan8[0] + 2 - 1*8]=
724 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
726 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
727 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
728 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
729 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
730 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
733 //FIXME unify cleanup or sth
734 if(IS_INTER(left_type[0])){
735 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
736 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
737 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
738 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
739 h->ref_cache[list][scan8[0] - 1 + 0*8]=
740 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
742 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
743 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
744 h->ref_cache[list][scan8[0] - 1 + 0*8]=
745 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
748 if(IS_INTER(left_type[1])){
749 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
750 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
751 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
752 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
753 h->ref_cache[list][scan8[0] - 1 + 2*8]=
754 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
756 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
757 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
758 h->ref_cache[list][scan8[0] - 1 + 2*8]=
759 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
760 assert((!left_type[0]) == (!left_type[1]));
763 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
766 if(IS_INTER(topleft_type)){
767 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
768 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
769 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
770 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
772 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
773 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
776 if(IS_INTER(topright_type)){
777 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
778 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
779 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
780 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
782 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
783 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
787 h->ref_cache[list][scan8[5 ]+1] =
788 h->ref_cache[list][scan8[7 ]+1] =
789 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
790 h->ref_cache[list][scan8[4 ]] =
791 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
792 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
793 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
794 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
795 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
796 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
799 /* XXX beurk, Load mvd */
800 if(IS_INTER(topleft_type)){
801 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
802 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy];
804 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0;
807 if(IS_INTER(top_type)){
808 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
809 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
810 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
811 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
812 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
814 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
815 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
816 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
817 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
819 if(IS_INTER(left_type[0])){
820 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
821 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
822 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
824 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
825 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
827 if(IS_INTER(left_type[1])){
828 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
829 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
830 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
832 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
833 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
835 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
836 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
837 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
838 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
839 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
841 if(h->slice_type == B_TYPE){
842 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
844 if(IS_DIRECT(top_type)){
845 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
846 }else if(IS_8X8(top_type)){
847 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
848 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
849 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
851 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
855 if(IS_DIRECT(left_type[0])){
856 h->direct_cache[scan8[0] - 1 + 0*8]=
857 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
858 }else if(IS_8X8(left_type[0])){
859 int b8_xy = h->mb2b8_xy[left_xy[0]] + 1;
860 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[b8_xy];
861 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[b8_xy + h->b8_stride];
863 h->direct_cache[scan8[0] - 1 + 0*8]=
864 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
872 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
875 static inline void write_back_intra_pred_mode(H264Context *h){
876 MpegEncContext * const s = &h->s;
877 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
879 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
880 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
881 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
882 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
883 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
884 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
885 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
889 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
891 static inline int check_intra4x4_pred_mode(H264Context *h){
892 MpegEncContext * const s = &h->s;
893 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
894 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
897 if(!(h->top_samples_available&0x8000)){
899 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
901 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
904 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
909 if(!(h->left_samples_available&0x8000)){
911 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
913 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
916 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
922 } //FIXME cleanup like next
925 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
927 static inline int check_intra_pred_mode(H264Context *h, int mode){
928 MpegEncContext * const s = &h->s;
929 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
930 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
932 if(mode < 0 || mode > 6) {
933 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
937 if(!(h->top_samples_available&0x8000)){
940 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
945 if(!(h->left_samples_available&0x8000)){
948 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
957 * gets the predicted intra4x4 prediction mode.
959 static inline int pred_intra_mode(H264Context *h, int n){
960 const int index8= scan8[n];
961 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
962 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
963 const int min= FFMIN(left, top);
965 tprintf("mode:%d %d min:%d\n", left ,top, min);
967 if(min<0) return DC_PRED;
971 static inline void write_back_non_zero_count(H264Context *h){
972 MpegEncContext * const s = &h->s;
973 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
975 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
976 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
977 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
978 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
979 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
980 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
981 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
983 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
984 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
985 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
987 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
988 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
989 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
993 * gets the predicted number of non zero coefficients.
994 * @param n block index
996 static inline int pred_non_zero_count(H264Context *h, int n){
997 const int index8= scan8[n];
998 const int left= h->non_zero_count_cache[index8 - 1];
999 const int top = h->non_zero_count_cache[index8 - 8];
1002 if(i<64) i= (i+1)>>1;
1004 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1009 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1010 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1012 if(topright_ref != PART_NOT_AVAILABLE){
1013 *C= h->mv_cache[list][ i - 8 + part_width ];
1014 return topright_ref;
1016 tprintf("topright MV not available\n");
1018 *C= h->mv_cache[list][ i - 8 - 1 ];
1019 return h->ref_cache[list][ i - 8 - 1 ];
1024 * gets the predicted MV.
1025 * @param n the block index
1026 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1027 * @param mx the x component of the predicted motion vector
1028 * @param my the y component of the predicted motion vector
1030 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1031 const int index8= scan8[n];
1032 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1033 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1034 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1035 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1037 int diagonal_ref, match_count;
1039 assert(part_width==1 || part_width==2 || part_width==4);
1049 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1050 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1051 tprintf("pred_motion match_count=%d\n", match_count);
1052 if(match_count > 1){ //most common
1053 *mx= mid_pred(A[0], B[0], C[0]);
1054 *my= mid_pred(A[1], B[1], C[1]);
1055 }else if(match_count==1){
1059 }else if(top_ref==ref){
1067 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1071 *mx= mid_pred(A[0], B[0], C[0]);
1072 *my= mid_pred(A[1], B[1], C[1]);
1076 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1080 * gets the directionally predicted 16x8 MV.
1081 * @param n the block index
1082 * @param mx the x component of the predicted motion vector
1083 * @param my the y component of the predicted motion vector
1085 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1087 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1088 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1090 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1098 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1099 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1101 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1103 if(left_ref == ref){
1111 pred_motion(h, n, 4, list, ref, mx, my);
1115 * gets the directionally predicted 8x16 MV.
1116 * @param n the block index
1117 * @param mx the x component of the predicted motion vector
1118 * @param my the y component of the predicted motion vector
1120 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1122 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1123 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1125 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1127 if(left_ref == ref){
1136 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1138 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1140 if(diagonal_ref == ref){
1148 pred_motion(h, n, 2, list, ref, mx, my);
1151 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1152 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1153 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1155 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1157 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1158 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1159 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1165 pred_motion(h, 0, 4, 0, 0, mx, my);
1170 static inline void direct_dist_scale_factor(H264Context * const h){
1171 const int poc = h->s.current_picture_ptr->poc;
1172 const int poc1 = h->ref_list[1][0].poc;
1174 for(i=0; i<h->ref_count[0]; i++){
1175 int poc0 = h->ref_list[0][i].poc;
1176 int td = clip(poc1 - poc0, -128, 127);
1177 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1178 h->dist_scale_factor[i] = 256;
1180 int tb = clip(poc - poc0, -128, 127);
1181 int tx = (16384 + (ABS(td) >> 1)) / td;
1182 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1186 static inline void direct_ref_list_init(H264Context * const h){
1187 MpegEncContext * const s = &h->s;
1188 Picture * const ref1 = &h->ref_list[1][0];
1189 Picture * const cur = s->current_picture_ptr;
1191 if(cur->pict_type == I_TYPE)
1192 cur->ref_count[0] = 0;
1193 if(cur->pict_type != B_TYPE)
1194 cur->ref_count[1] = 0;
1195 for(list=0; list<2; list++){
1196 cur->ref_count[list] = h->ref_count[list];
1197 for(j=0; j<h->ref_count[list]; j++)
1198 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1200 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1202 for(list=0; list<2; list++){
1203 for(i=0; i<ref1->ref_count[list]; i++){
1204 const int poc = ref1->ref_poc[list][i];
1205 h->map_col_to_list0[list][i] = PART_NOT_AVAILABLE;
1206 for(j=0; j<h->ref_count[list]; j++)
1207 if(h->ref_list[list][j].poc == poc){
1208 h->map_col_to_list0[list][i] = j;
1215 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1216 MpegEncContext * const s = &h->s;
1217 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1218 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1219 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1220 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1221 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1222 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1223 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1224 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1225 const int is_b8x8 = IS_8X8(*mb_type);
1229 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1230 /* FIXME save sub mb types from previous frames (or derive from MVs)
1231 * so we know exactly what block size to use */
1232 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1233 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1234 }else if(!is_b8x8 && (IS_16X16(mb_type_col) || IS_INTRA(mb_type_col))){
1235 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1236 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1238 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1239 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1242 *mb_type |= MB_TYPE_DIRECT2;
1244 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1246 if(h->direct_spatial_mv_pred){
1251 /* ref = min(neighbors) */
1252 for(list=0; list<2; list++){
1253 int refa = h->ref_cache[list][scan8[0] - 1];
1254 int refb = h->ref_cache[list][scan8[0] - 8];
1255 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1257 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1259 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1261 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1267 if(ref[0] < 0 && ref[1] < 0){
1268 ref[0] = ref[1] = 0;
1269 mv[0][0] = mv[0][1] =
1270 mv[1][0] = mv[1][1] = 0;
1272 for(list=0; list<2; list++){
1274 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1276 mv[list][0] = mv[list][1] = 0;
1281 *mb_type &= ~MB_TYPE_P0L1;
1282 sub_mb_type &= ~MB_TYPE_P0L1;
1283 }else if(ref[0] < 0){
1284 *mb_type &= ~MB_TYPE_P0L0;
1285 sub_mb_type &= ~MB_TYPE_P0L0;
1288 if(IS_16X16(*mb_type)){
1289 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref[0], 1);
1290 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, ref[1], 1);
1291 if(!IS_INTRA(mb_type_col)
1292 && ( (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1)
1293 || (l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1
1294 && (h->x264_build>33 || !h->x264_build)))){
1296 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1298 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1300 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1302 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1304 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1305 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1308 for(i8=0; i8<4; i8++){
1309 const int x8 = i8&1;
1310 const int y8 = i8>>1;
1312 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1314 h->sub_mb_type[i8] = sub_mb_type;
1316 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1317 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1318 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref[0], 1);
1319 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, ref[1], 1);
1322 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1323 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1324 && (h->x264_build>33 || !h->x264_build)))){
1325 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1326 for(i4=0; i4<4; i4++){
1327 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1328 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1330 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1332 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1338 }else{ /* direct temporal mv pred */
1339 if(IS_16X16(*mb_type)){
1340 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1341 if(IS_INTRA(mb_type_col)){
1342 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1343 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1344 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1346 const int ref0 = l1ref0[0] >= 0 ? h->map_col_to_list0[0][l1ref0[0]]
1347 : h->map_col_to_list0[1][l1ref1[0]];
1348 const int dist_scale_factor = h->dist_scale_factor[ref0];
1349 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1351 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1352 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1353 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1354 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1355 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1358 for(i8=0; i8<4; i8++){
1359 const int x8 = i8&1;
1360 const int y8 = i8>>1;
1361 int ref0, dist_scale_factor;
1362 const int16_t (*l1mv)[2]= l1mv0;
1364 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1366 h->sub_mb_type[i8] = sub_mb_type;
1367 if(IS_INTRA(mb_type_col)){
1368 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1369 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1370 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1371 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1375 ref0 = l1ref0[x8 + y8*h->b8_stride];
1377 ref0 = h->map_col_to_list0[0][ref0];
1379 ref0 = h->map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1382 dist_scale_factor = h->dist_scale_factor[ref0];
1384 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1385 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1386 for(i4=0; i4<4; i4++){
1387 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1388 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1389 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1390 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1391 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1392 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1399 static inline void write_back_motion(H264Context *h, int mb_type){
1400 MpegEncContext * const s = &h->s;
1401 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1402 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1405 for(list=0; list<2; list++){
1407 if(!USES_LIST(mb_type, list)){
1408 if(1){ //FIXME skip or never read if mb_type doesn't use it
1410 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
1411 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
1413 if( h->pps.cabac ) {
1414 /* FIXME needed ? */
1416 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]=
1417 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= 0;
1421 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]=
1422 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= LIST_NOT_USED;
1429 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1430 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1432 if( h->pps.cabac ) {
1434 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1435 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1439 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
1440 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
1444 if(h->slice_type == B_TYPE && h->pps.cabac){
1445 if(IS_8X8(mb_type)){
1446 h->direct_table[b8_xy+1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1447 h->direct_table[b8_xy+0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1448 h->direct_table[b8_xy+1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1454 * Decodes a network abstraction layer unit.
1455 * @param consumed is the number of bytes used as input
1456 * @param length is the length of the array
1457 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1458 * @returns decoded bytes, might be src+1 if no escapes
1460 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1464 // src[0]&0x80; //forbidden bit
1465 h->nal_ref_idc= src[0]>>5;
1466 h->nal_unit_type= src[0]&0x1F;
1470 for(i=0; i<length; i++)
1471 printf("%2X ", src[i]);
1473 for(i=0; i+1<length; i+=2){
1474 if(src[i]) continue;
1475 if(i>0 && src[i-1]==0) i--;
1476 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1478 /* startcode, so we must be past the end */
1485 if(i>=length-1){ //no escaped 0
1486 *dst_length= length;
1487 *consumed= length+1; //+1 for the header
1491 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1492 dst= h->rbsp_buffer;
1494 //printf("decoding esc\n");
1497 //remove escapes (very rare 1:2^22)
1498 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1499 if(src[si+2]==3){ //escape
1504 }else //next start code
1508 dst[di++]= src[si++];
1512 *consumed= si + 1;//+1 for the header
1513 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1519 * @param src the data which should be escaped
1520 * @param dst the target buffer, dst+1 == src is allowed as a special case
1521 * @param length the length of the src data
1522 * @param dst_length the length of the dst array
1523 * @returns length of escaped data in bytes or -1 if an error occured
1525 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1526 int i, escape_count, si, di;
1530 assert(dst_length>0);
1532 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1534 if(length==0) return 1;
1537 for(i=0; i<length; i+=2){
1538 if(src[i]) continue;
1539 if(i>0 && src[i-1]==0)
1541 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1547 if(escape_count==0){
1549 memcpy(dst+1, src, length);
1553 if(length + escape_count + 1> dst_length)
1556 //this should be damn rare (hopefully)
1558 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1559 temp= h->rbsp_buffer;
1560 //printf("encoding esc\n");
1565 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1566 temp[di++]= 0; si++;
1567 temp[di++]= 0; si++;
1569 temp[di++]= src[si++];
1572 temp[di++]= src[si++];
1574 memcpy(dst+1, temp, length+escape_count);
1576 assert(di == length+escape_count);
1582 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1584 static void encode_rbsp_trailing(PutBitContext *pb){
1587 length= (-put_bits_count(pb))&7;
1588 if(length) put_bits(pb, length, 0);
1593 * identifies the exact end of the bitstream
1594 * @return the length of the trailing, or 0 if damaged
1596 static int decode_rbsp_trailing(uint8_t *src){
1600 tprintf("rbsp trailing %X\n", v);
1610 * idct tranforms the 16 dc values and dequantize them.
1611 * @param qp quantization parameter
1613 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1616 int temp[16]; //FIXME check if this is a good idea
1617 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1618 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1620 //memset(block, 64, 2*256);
1623 const int offset= y_offset[i];
1624 const int z0= block[offset+stride*0] + block[offset+stride*4];
1625 const int z1= block[offset+stride*0] - block[offset+stride*4];
1626 const int z2= block[offset+stride*1] - block[offset+stride*5];
1627 const int z3= block[offset+stride*1] + block[offset+stride*5];
1636 const int offset= x_offset[i];
1637 const int z0= temp[4*0+i] + temp[4*2+i];
1638 const int z1= temp[4*0+i] - temp[4*2+i];
1639 const int z2= temp[4*1+i] - temp[4*3+i];
1640 const int z3= temp[4*1+i] + temp[4*3+i];
1642 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1643 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1644 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1645 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1651 * dct tranforms the 16 dc values.
1652 * @param qp quantization parameter ??? FIXME
1654 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1655 // const int qmul= dequant_coeff[qp][0];
1657 int temp[16]; //FIXME check if this is a good idea
1658 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1659 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1662 const int offset= y_offset[i];
1663 const int z0= block[offset+stride*0] + block[offset+stride*4];
1664 const int z1= block[offset+stride*0] - block[offset+stride*4];
1665 const int z2= block[offset+stride*1] - block[offset+stride*5];
1666 const int z3= block[offset+stride*1] + block[offset+stride*5];
1675 const int offset= x_offset[i];
1676 const int z0= temp[4*0+i] + temp[4*2+i];
1677 const int z1= temp[4*0+i] - temp[4*2+i];
1678 const int z2= temp[4*1+i] - temp[4*3+i];
1679 const int z3= temp[4*1+i] + temp[4*3+i];
1681 block[stride*0 +offset]= (z0 + z3)>>1;
1682 block[stride*2 +offset]= (z1 + z2)>>1;
1683 block[stride*8 +offset]= (z1 - z2)>>1;
1684 block[stride*10+offset]= (z0 - z3)>>1;
1692 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1693 const int stride= 16*2;
1694 const int xStride= 16;
1697 a= block[stride*0 + xStride*0];
1698 b= block[stride*0 + xStride*1];
1699 c= block[stride*1 + xStride*0];
1700 d= block[stride*1 + xStride*1];
1707 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1708 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1709 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1710 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1714 static void chroma_dc_dct_c(DCTELEM *block){
1715 const int stride= 16*2;
1716 const int xStride= 16;
1719 a= block[stride*0 + xStride*0];
1720 b= block[stride*0 + xStride*1];
1721 c= block[stride*1 + xStride*0];
1722 d= block[stride*1 + xStride*1];
1729 block[stride*0 + xStride*0]= (a+c);
1730 block[stride*0 + xStride*1]= (e+b);
1731 block[stride*1 + xStride*0]= (a-c);
1732 block[stride*1 + xStride*1]= (e-b);
1737 * gets the chroma qp.
1739 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1741 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
1746 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1748 //FIXME try int temp instead of block
1751 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1752 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1753 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1754 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1755 const int z0= d0 + d3;
1756 const int z3= d0 - d3;
1757 const int z1= d1 + d2;
1758 const int z2= d1 - d2;
1760 block[0 + 4*i]= z0 + z1;
1761 block[1 + 4*i]= 2*z3 + z2;
1762 block[2 + 4*i]= z0 - z1;
1763 block[3 + 4*i]= z3 - 2*z2;
1767 const int z0= block[0*4 + i] + block[3*4 + i];
1768 const int z3= block[0*4 + i] - block[3*4 + i];
1769 const int z1= block[1*4 + i] + block[2*4 + i];
1770 const int z2= block[1*4 + i] - block[2*4 + i];
1772 block[0*4 + i]= z0 + z1;
1773 block[1*4 + i]= 2*z3 + z2;
1774 block[2*4 + i]= z0 - z1;
1775 block[3*4 + i]= z3 - 2*z2;
1780 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1781 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1782 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1784 const int * const quant_table= quant_coeff[qscale];
1785 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1786 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1787 const unsigned int threshold2= (threshold1<<1);
1793 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1794 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1795 const unsigned int dc_threshold2= (dc_threshold1<<1);
1797 int level= block[0]*quant_coeff[qscale+18][0];
1798 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1800 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1803 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1806 // last_non_zero = i;
1811 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1812 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1813 const unsigned int dc_threshold2= (dc_threshold1<<1);
1815 int level= block[0]*quant_table[0];
1816 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1818 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1821 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1824 // last_non_zero = i;
1837 const int j= scantable[i];
1838 int level= block[j]*quant_table[j];
1840 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1841 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1842 if(((unsigned)(level+threshold1))>threshold2){
1844 level= (bias + level)>>QUANT_SHIFT;
1847 level= (bias - level)>>QUANT_SHIFT;
1856 return last_non_zero;
1859 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1860 const uint32_t a= ((uint32_t*)(src-stride))[0];
1861 ((uint32_t*)(src+0*stride))[0]= a;
1862 ((uint32_t*)(src+1*stride))[0]= a;
1863 ((uint32_t*)(src+2*stride))[0]= a;
1864 ((uint32_t*)(src+3*stride))[0]= a;
1867 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1868 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1869 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1870 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1871 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1874 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1875 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1876 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1878 ((uint32_t*)(src+0*stride))[0]=
1879 ((uint32_t*)(src+1*stride))[0]=
1880 ((uint32_t*)(src+2*stride))[0]=
1881 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1884 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1885 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1887 ((uint32_t*)(src+0*stride))[0]=
1888 ((uint32_t*)(src+1*stride))[0]=
1889 ((uint32_t*)(src+2*stride))[0]=
1890 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1893 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1894 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1896 ((uint32_t*)(src+0*stride))[0]=
1897 ((uint32_t*)(src+1*stride))[0]=
1898 ((uint32_t*)(src+2*stride))[0]=
1899 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1902 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1903 ((uint32_t*)(src+0*stride))[0]=
1904 ((uint32_t*)(src+1*stride))[0]=
1905 ((uint32_t*)(src+2*stride))[0]=
1906 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1910 #define LOAD_TOP_RIGHT_EDGE\
1911 const int t4= topright[0];\
1912 const int t5= topright[1];\
1913 const int t6= topright[2];\
1914 const int t7= topright[3];\
1916 #define LOAD_LEFT_EDGE\
1917 const int l0= src[-1+0*stride];\
1918 const int l1= src[-1+1*stride];\
1919 const int l2= src[-1+2*stride];\
1920 const int l3= src[-1+3*stride];\
1922 #define LOAD_TOP_EDGE\
1923 const int t0= src[ 0-1*stride];\
1924 const int t1= src[ 1-1*stride];\
1925 const int t2= src[ 2-1*stride];\
1926 const int t3= src[ 3-1*stride];\
1928 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1929 const int lt= src[-1-1*stride];
1933 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
1935 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
1938 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
1942 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1945 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1947 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1948 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1951 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1956 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1958 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1961 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1965 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1968 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1970 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1971 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1974 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1975 const int lt= src[-1-1*stride];
1978 const __attribute__((unused)) int unu= l3;
1981 src[1+2*stride]=(lt + t0 + 1)>>1;
1983 src[2+2*stride]=(t0 + t1 + 1)>>1;
1985 src[3+2*stride]=(t1 + t2 + 1)>>1;
1986 src[3+0*stride]=(t2 + t3 + 1)>>1;
1988 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1990 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
1992 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1993 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1994 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1995 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1998 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2001 const __attribute__((unused)) int unu= t7;
2003 src[0+0*stride]=(t0 + t1 + 1)>>1;
2005 src[0+2*stride]=(t1 + t2 + 1)>>1;
2007 src[1+2*stride]=(t2 + t3 + 1)>>1;
2009 src[2+2*stride]=(t3 + t4+ 1)>>1;
2010 src[3+2*stride]=(t4 + t5+ 1)>>1;
2011 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2013 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2015 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2017 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2018 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2021 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2024 src[0+0*stride]=(l0 + l1 + 1)>>1;
2025 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2027 src[0+1*stride]=(l1 + l2 + 1)>>1;
2029 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2031 src[0+2*stride]=(l2 + l3 + 1)>>1;
2033 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2042 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2043 const int lt= src[-1-1*stride];
2046 const __attribute__((unused)) int unu= t3;
2049 src[2+1*stride]=(lt + l0 + 1)>>1;
2051 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2052 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2053 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2055 src[2+2*stride]=(l0 + l1 + 1)>>1;
2057 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2059 src[2+3*stride]=(l1 + l2+ 1)>>1;
2061 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2062 src[0+3*stride]=(l2 + l3 + 1)>>1;
2063 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2066 static void pred16x16_vertical_c(uint8_t *src, int stride){
2068 const uint32_t a= ((uint32_t*)(src-stride))[0];
2069 const uint32_t b= ((uint32_t*)(src-stride))[1];
2070 const uint32_t c= ((uint32_t*)(src-stride))[2];
2071 const uint32_t d= ((uint32_t*)(src-stride))[3];
2073 for(i=0; i<16; i++){
2074 ((uint32_t*)(src+i*stride))[0]= a;
2075 ((uint32_t*)(src+i*stride))[1]= b;
2076 ((uint32_t*)(src+i*stride))[2]= c;
2077 ((uint32_t*)(src+i*stride))[3]= d;
2081 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2084 for(i=0; i<16; i++){
2085 ((uint32_t*)(src+i*stride))[0]=
2086 ((uint32_t*)(src+i*stride))[1]=
2087 ((uint32_t*)(src+i*stride))[2]=
2088 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2092 static void pred16x16_dc_c(uint8_t *src, int stride){
2096 dc+= src[-1+i*stride];
2103 dc= 0x01010101*((dc + 16)>>5);
2105 for(i=0; i<16; i++){
2106 ((uint32_t*)(src+i*stride))[0]=
2107 ((uint32_t*)(src+i*stride))[1]=
2108 ((uint32_t*)(src+i*stride))[2]=
2109 ((uint32_t*)(src+i*stride))[3]= dc;
2113 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2117 dc+= src[-1+i*stride];
2120 dc= 0x01010101*((dc + 8)>>4);
2122 for(i=0; i<16; i++){
2123 ((uint32_t*)(src+i*stride))[0]=
2124 ((uint32_t*)(src+i*stride))[1]=
2125 ((uint32_t*)(src+i*stride))[2]=
2126 ((uint32_t*)(src+i*stride))[3]= dc;
2130 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2136 dc= 0x01010101*((dc + 8)>>4);
2138 for(i=0; i<16; i++){
2139 ((uint32_t*)(src+i*stride))[0]=
2140 ((uint32_t*)(src+i*stride))[1]=
2141 ((uint32_t*)(src+i*stride))[2]=
2142 ((uint32_t*)(src+i*stride))[3]= dc;
2146 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2149 for(i=0; i<16; i++){
2150 ((uint32_t*)(src+i*stride))[0]=
2151 ((uint32_t*)(src+i*stride))[1]=
2152 ((uint32_t*)(src+i*stride))[2]=
2153 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2157 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2160 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2161 const uint8_t * const src0 = src+7-stride;
2162 const uint8_t *src1 = src+8*stride-1;
2163 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2164 int H = src0[1] - src0[-1];
2165 int V = src1[0] - src2[ 0];
2166 for(k=2; k<=8; ++k) {
2167 src1 += stride; src2 -= stride;
2168 H += k*(src0[k] - src0[-k]);
2169 V += k*(src1[0] - src2[ 0]);
2172 H = ( 5*(H/4) ) / 16;
2173 V = ( 5*(V/4) ) / 16;
2175 /* required for 100% accuracy */
2176 i = H; H = V; V = i;
2178 H = ( 5*H+32 ) >> 6;
2179 V = ( 5*V+32 ) >> 6;
2182 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2183 for(j=16; j>0; --j) {
2186 for(i=-16; i<0; i+=4) {
2187 src[16+i] = cm[ (b ) >> 5 ];
2188 src[17+i] = cm[ (b+ H) >> 5 ];
2189 src[18+i] = cm[ (b+2*H) >> 5 ];
2190 src[19+i] = cm[ (b+3*H) >> 5 ];
2197 static void pred16x16_plane_c(uint8_t *src, int stride){
2198 pred16x16_plane_compat_c(src, stride, 0);
2201 static void pred8x8_vertical_c(uint8_t *src, int stride){
2203 const uint32_t a= ((uint32_t*)(src-stride))[0];
2204 const uint32_t b= ((uint32_t*)(src-stride))[1];
2207 ((uint32_t*)(src+i*stride))[0]= a;
2208 ((uint32_t*)(src+i*stride))[1]= b;
2212 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2216 ((uint32_t*)(src+i*stride))[0]=
2217 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2221 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2225 ((uint32_t*)(src+i*stride))[0]=
2226 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2230 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2236 dc0+= src[-1+i*stride];
2237 dc2+= src[-1+(i+4)*stride];
2239 dc0= 0x01010101*((dc0 + 2)>>2);
2240 dc2= 0x01010101*((dc2 + 2)>>2);
2243 ((uint32_t*)(src+i*stride))[0]=
2244 ((uint32_t*)(src+i*stride))[1]= dc0;
2247 ((uint32_t*)(src+i*stride))[0]=
2248 ((uint32_t*)(src+i*stride))[1]= dc2;
2252 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2258 dc0+= src[i-stride];
2259 dc1+= src[4+i-stride];
2261 dc0= 0x01010101*((dc0 + 2)>>2);
2262 dc1= 0x01010101*((dc1 + 2)>>2);
2265 ((uint32_t*)(src+i*stride))[0]= dc0;
2266 ((uint32_t*)(src+i*stride))[1]= dc1;
2269 ((uint32_t*)(src+i*stride))[0]= dc0;
2270 ((uint32_t*)(src+i*stride))[1]= dc1;
2275 static void pred8x8_dc_c(uint8_t *src, int stride){
2277 int dc0, dc1, dc2, dc3;
2281 dc0+= src[-1+i*stride] + src[i-stride];
2282 dc1+= src[4+i-stride];
2283 dc2+= src[-1+(i+4)*stride];
2285 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2286 dc0= 0x01010101*((dc0 + 4)>>3);
2287 dc1= 0x01010101*((dc1 + 2)>>2);
2288 dc2= 0x01010101*((dc2 + 2)>>2);
2291 ((uint32_t*)(src+i*stride))[0]= dc0;
2292 ((uint32_t*)(src+i*stride))[1]= dc1;
2295 ((uint32_t*)(src+i*stride))[0]= dc2;
2296 ((uint32_t*)(src+i*stride))[1]= dc3;
2300 static void pred8x8_plane_c(uint8_t *src, int stride){
2303 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2304 const uint8_t * const src0 = src+3-stride;
2305 const uint8_t *src1 = src+4*stride-1;
2306 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2307 int H = src0[1] - src0[-1];
2308 int V = src1[0] - src2[ 0];
2309 for(k=2; k<=4; ++k) {
2310 src1 += stride; src2 -= stride;
2311 H += k*(src0[k] - src0[-k]);
2312 V += k*(src1[0] - src2[ 0]);
2314 H = ( 17*H+16 ) >> 5;
2315 V = ( 17*V+16 ) >> 5;
2317 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2318 for(j=8; j>0; --j) {
2321 src[0] = cm[ (b ) >> 5 ];
2322 src[1] = cm[ (b+ H) >> 5 ];
2323 src[2] = cm[ (b+2*H) >> 5 ];
2324 src[3] = cm[ (b+3*H) >> 5 ];
2325 src[4] = cm[ (b+4*H) >> 5 ];
2326 src[5] = cm[ (b+5*H) >> 5 ];
2327 src[6] = cm[ (b+6*H) >> 5 ];
2328 src[7] = cm[ (b+7*H) >> 5 ];
2333 #define SRC(x,y) src[(x)+(y)*stride]
2335 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2336 #define PREDICT_8x8_LOAD_LEFT \
2337 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2338 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2339 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2340 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2343 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2344 #define PREDICT_8x8_LOAD_TOP \
2345 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2346 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2347 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2348 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2349 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2352 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2353 #define PREDICT_8x8_LOAD_TOPRIGHT \
2354 int t8, t9, t10, t11, t12, t13, t14, t15; \
2355 if(has_topright) { \
2356 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2357 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2358 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2360 #define PREDICT_8x8_LOAD_TOPLEFT \
2361 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2363 #define PREDICT_8x8_DC(v) \
2365 for( y = 0; y < 8; y++ ) { \
2366 ((uint32_t*)src)[0] = \
2367 ((uint32_t*)src)[1] = v; \
2371 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2373 PREDICT_8x8_DC(0x80808080);
2375 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2377 PREDICT_8x8_LOAD_LEFT;
2378 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2381 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2383 PREDICT_8x8_LOAD_TOP;
2384 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2387 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2389 PREDICT_8x8_LOAD_LEFT;
2390 PREDICT_8x8_LOAD_TOP;
2391 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2392 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2395 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2397 PREDICT_8x8_LOAD_LEFT;
2398 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2399 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2400 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2403 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2406 PREDICT_8x8_LOAD_TOP;
2415 for( y = 1; y < 8; y++ )
2416 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2418 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2420 PREDICT_8x8_LOAD_TOP;
2421 PREDICT_8x8_LOAD_TOPRIGHT;
2422 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2423 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2424 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2425 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2426 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2427 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2428 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2429 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2430 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2431 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2432 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2433 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2434 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2435 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2436 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2438 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2440 PREDICT_8x8_LOAD_TOP;
2441 PREDICT_8x8_LOAD_LEFT;
2442 PREDICT_8x8_LOAD_TOPLEFT;
2443 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2444 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2445 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2446 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2447 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2448 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2449 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2450 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2451 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2452 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2453 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2454 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2455 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2456 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2457 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2460 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2462 PREDICT_8x8_LOAD_TOP;
2463 PREDICT_8x8_LOAD_LEFT;
2464 PREDICT_8x8_LOAD_TOPLEFT;
2465 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2466 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2467 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2468 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2469 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2470 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2471 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2472 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2473 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2474 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2475 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2476 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2477 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2478 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2479 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2480 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2481 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2482 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2483 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2484 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2485 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2486 SRC(7,0)= (t6 + t7 + 1) >> 1;
2488 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2490 PREDICT_8x8_LOAD_TOP;
2491 PREDICT_8x8_LOAD_LEFT;
2492 PREDICT_8x8_LOAD_TOPLEFT;
2493 SRC(0,7)= (l6 + l7 + 1) >> 1;
2494 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2495 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2496 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2497 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2498 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2499 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2500 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2501 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2502 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2503 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2504 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2505 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2506 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2507 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2508 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2509 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2510 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2511 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2512 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2513 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2514 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2516 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2518 PREDICT_8x8_LOAD_TOP;
2519 PREDICT_8x8_LOAD_TOPRIGHT;
2520 SRC(0,0)= (t0 + t1 + 1) >> 1;
2521 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2522 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2523 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2524 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2525 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2526 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2527 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2528 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2529 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2530 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2531 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2532 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2533 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2534 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2535 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2536 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2537 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2538 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2539 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2540 SRC(7,6)= (t10 + t11 + 1) >> 1;
2541 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2543 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2545 PREDICT_8x8_LOAD_LEFT;
2546 SRC(0,0)= (l0 + l1 + 1) >> 1;
2547 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2548 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2549 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2550 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2551 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2552 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2553 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2554 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2555 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2556 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2557 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2558 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2559 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2560 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2561 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2562 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2563 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2565 #undef PREDICT_8x8_LOAD_LEFT
2566 #undef PREDICT_8x8_LOAD_TOP
2567 #undef PREDICT_8x8_LOAD_TOPLEFT
2568 #undef PREDICT_8x8_LOAD_TOPRIGHT
2569 #undef PREDICT_8x8_DC
2575 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2576 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2577 int src_x_offset, int src_y_offset,
2578 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2579 MpegEncContext * const s = &h->s;
2580 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2581 const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2582 const int luma_xy= (mx&3) + ((my&3)<<2);
2583 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
2584 uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
2585 uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
2586 int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
2587 int extra_height= extra_width;
2589 const int full_mx= mx>>2;
2590 const int full_my= my>>2;
2591 const int pic_width = 16*s->mb_width;
2592 const int pic_height = 16*s->mb_height;
2594 assert(pic->data[0]);
2596 if(mx&7) extra_width -= 3;
2597 if(my&7) extra_height -= 3;
2599 if( full_mx < 0-extra_width
2600 || full_my < 0-extra_height
2601 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2602 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2603 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2604 src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
2608 qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
2610 qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
2613 if(s->flags&CODEC_FLAG_GRAY) return;
2616 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2617 src_cb= s->edge_emu_buffer;
2619 chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
2622 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2623 src_cr= s->edge_emu_buffer;
2625 chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
2628 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2629 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2630 int x_offset, int y_offset,
2631 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2632 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2633 int list0, int list1){
2634 MpegEncContext * const s = &h->s;
2635 qpel_mc_func *qpix_op= qpix_put;
2636 h264_chroma_mc_func chroma_op= chroma_put;
2638 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2639 dest_cb += x_offset + y_offset*s->uvlinesize;
2640 dest_cr += x_offset + y_offset*s->uvlinesize;
2641 x_offset += 8*s->mb_x;
2642 y_offset += 8*s->mb_y;
2645 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2646 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2647 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2648 qpix_op, chroma_op);
2651 chroma_op= chroma_avg;
2655 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2656 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2657 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2658 qpix_op, chroma_op);
2662 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2663 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2664 int x_offset, int y_offset,
2665 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2666 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2667 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2668 int list0, int list1){
2669 MpegEncContext * const s = &h->s;
2671 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2672 dest_cb += x_offset + y_offset*s->uvlinesize;
2673 dest_cr += x_offset + y_offset*s->uvlinesize;
2674 x_offset += 8*s->mb_x;
2675 y_offset += 8*s->mb_y;
2678 /* don't optimize for luma-only case, since B-frames usually
2679 * use implicit weights => chroma too. */
2680 uint8_t *tmp_cb = s->obmc_scratchpad;
2681 uint8_t *tmp_cr = tmp_cb + 8*s->uvlinesize;
2682 uint8_t *tmp_y = tmp_cr + 8*s->uvlinesize;
2683 int refn0 = h->ref_cache[0][ scan8[n] ];
2684 int refn1 = h->ref_cache[1][ scan8[n] ];
2686 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2687 dest_y, dest_cb, dest_cr,
2688 x_offset, y_offset, qpix_put, chroma_put);
2689 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2690 tmp_y, tmp_cb, tmp_cr,
2691 x_offset, y_offset, qpix_put, chroma_put);
2693 if(h->use_weight == 2){
2694 int weight0 = h->implicit_weight[refn0][refn1];
2695 int weight1 = 64 - weight0;
2696 luma_weight_avg( dest_y, tmp_y, s-> linesize, 5, weight0, weight1, 0, 0);
2697 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0, 0);
2698 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0, 0);
2700 luma_weight_avg(dest_y, tmp_y, s->linesize, h->luma_log2_weight_denom,
2701 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2702 h->luma_offset[0][refn0], h->luma_offset[1][refn1]);
2703 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2704 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2705 h->chroma_offset[0][refn0][0], h->chroma_offset[1][refn1][0]);
2706 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2707 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2708 h->chroma_offset[0][refn0][1], h->chroma_offset[1][refn1][1]);
2711 int list = list1 ? 1 : 0;
2712 int refn = h->ref_cache[list][ scan8[n] ];
2713 Picture *ref= &h->ref_list[list][refn];
2714 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2715 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2716 qpix_put, chroma_put);
2718 luma_weight_op(dest_y, s->linesize, h->luma_log2_weight_denom,
2719 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2720 if(h->use_weight_chroma){
2721 chroma_weight_op(dest_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2722 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2723 chroma_weight_op(dest_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2724 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2729 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2730 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2731 int x_offset, int y_offset,
2732 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2733 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2734 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2735 int list0, int list1){
2736 if((h->use_weight==2 && list0 && list1
2737 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2738 || h->use_weight==1)
2739 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2740 x_offset, y_offset, qpix_put, chroma_put,
2741 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2743 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2744 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2747 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2748 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2749 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2750 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2751 MpegEncContext * const s = &h->s;
2752 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2753 const int mb_type= s->current_picture.mb_type[mb_xy];
2755 assert(IS_INTER(mb_type));
2757 if(IS_16X16(mb_type)){
2758 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2759 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2760 &weight_op[0], &weight_avg[0],
2761 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2762 }else if(IS_16X8(mb_type)){
2763 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2764 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2765 &weight_op[1], &weight_avg[1],
2766 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2767 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2768 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2769 &weight_op[1], &weight_avg[1],
2770 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2771 }else if(IS_8X16(mb_type)){
2772 mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
2773 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2774 &weight_op[2], &weight_avg[2],
2775 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2776 mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
2777 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2778 &weight_op[2], &weight_avg[2],
2779 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2783 assert(IS_8X8(mb_type));
2786 const int sub_mb_type= h->sub_mb_type[i];
2788 int x_offset= (i&1)<<2;
2789 int y_offset= (i&2)<<1;
2791 if(IS_SUB_8X8(sub_mb_type)){
2792 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2793 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2794 &weight_op[3], &weight_avg[3],
2795 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2796 }else if(IS_SUB_8X4(sub_mb_type)){
2797 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2798 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2799 &weight_op[4], &weight_avg[4],
2800 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2801 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2802 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2803 &weight_op[4], &weight_avg[4],
2804 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2805 }else if(IS_SUB_4X8(sub_mb_type)){
2806 mc_part(h, n , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2807 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2808 &weight_op[5], &weight_avg[5],
2809 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2810 mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2811 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2812 &weight_op[5], &weight_avg[5],
2813 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2816 assert(IS_SUB_4X4(sub_mb_type));
2818 int sub_x_offset= x_offset + 2*(j&1);
2819 int sub_y_offset= y_offset + (j&2);
2820 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2821 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2822 &weight_op[6], &weight_avg[6],
2823 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2830 static void decode_init_vlc(H264Context *h){
2831 static int done = 0;
2837 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
2838 &chroma_dc_coeff_token_len [0], 1, 1,
2839 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
2842 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
2843 &coeff_token_len [i][0], 1, 1,
2844 &coeff_token_bits[i][0], 1, 1, 1);
2848 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2849 &chroma_dc_total_zeros_len [i][0], 1, 1,
2850 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
2852 for(i=0; i<15; i++){
2853 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
2854 &total_zeros_len [i][0], 1, 1,
2855 &total_zeros_bits[i][0], 1, 1, 1);
2859 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2860 &run_len [i][0], 1, 1,
2861 &run_bits[i][0], 1, 1, 1);
2863 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2864 &run_len [6][0], 1, 1,
2865 &run_bits[6][0], 1, 1, 1);
2870 * Sets the intra prediction function pointers.
2872 static void init_pred_ptrs(H264Context *h){
2873 // MpegEncContext * const s = &h->s;
2875 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
2876 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
2877 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
2878 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2879 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2880 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
2881 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
2882 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
2883 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
2884 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
2885 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
2886 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
2888 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
2889 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
2890 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
2891 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
2892 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
2893 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
2894 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
2895 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
2896 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
2897 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
2898 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
2899 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
2901 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
2902 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
2903 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
2904 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
2905 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
2906 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
2907 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
2909 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
2910 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
2911 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
2912 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
2913 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
2914 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
2915 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
2918 static void free_tables(H264Context *h){
2919 av_freep(&h->intra4x4_pred_mode);
2920 av_freep(&h->chroma_pred_mode_table);
2921 av_freep(&h->cbp_table);
2922 av_freep(&h->mvd_table[0]);
2923 av_freep(&h->mvd_table[1]);
2924 av_freep(&h->direct_table);
2925 av_freep(&h->non_zero_count);
2926 av_freep(&h->slice_table_base);
2927 av_freep(&h->top_borders[1]);
2928 av_freep(&h->top_borders[0]);
2929 h->slice_table= NULL;
2931 av_freep(&h->mb2b_xy);
2932 av_freep(&h->mb2b8_xy);
2934 av_freep(&h->s.obmc_scratchpad);
2937 static void init_dequant8_coeff_table(H264Context *h){
2939 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2940 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2942 for(i=0; i<2; i++ ){
2943 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2944 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2948 for(q=0; q<52; q++){
2949 int shift = div6[q];
2952 h->dequant8_coeff[i][q][x] = ((uint32_t)dequant8_coeff_init[idx][
2953 dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * h->pps.scaling_matrix8[i][x]) << shift;
2958 static void init_dequant4_coeff_table(H264Context *h){
2960 for(i=0; i<6; i++ ){
2961 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2963 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2964 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2971 for(q=0; q<52; q++){
2972 int shift = div6[q] + 2;
2975 h->dequant4_coeff[i][q][x] = ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2976 h->pps.scaling_matrix4[i][x]) << shift;
2981 static void init_dequant_tables(H264Context *h){
2983 init_dequant4_coeff_table(h);
2984 if(h->pps.transform_8x8_mode)
2985 init_dequant8_coeff_table(h);
2986 if(h->sps.transform_bypass){
2989 h->dequant4_coeff[i][0][x] = 1<<6;
2990 if(h->pps.transform_8x8_mode)
2993 h->dequant8_coeff[i][0][x] = 1<<6;
3000 * needs width/height
3002 static int alloc_tables(H264Context *h){
3003 MpegEncContext * const s = &h->s;
3004 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3007 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3009 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3010 CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t))
3011 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3012 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3013 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3015 if( h->pps.cabac ) {
3016 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3017 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3018 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3019 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3022 memset(h->slice_table_base, -1, big_mb_num * sizeof(uint8_t));
3023 h->slice_table= h->slice_table_base + s->mb_stride + 1;
3025 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3026 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3027 for(y=0; y<s->mb_height; y++){
3028 for(x=0; x<s->mb_width; x++){
3029 const int mb_xy= x + y*s->mb_stride;
3030 const int b_xy = 4*x + 4*y*h->b_stride;
3031 const int b8_xy= 2*x + 2*y*h->b8_stride;
3033 h->mb2b_xy [mb_xy]= b_xy;
3034 h->mb2b8_xy[mb_xy]= b8_xy;
3038 s->obmc_scratchpad = NULL;
3040 if(!h->dequant4_coeff[0])
3041 init_dequant_tables(h);
3049 static void common_init(H264Context *h){
3050 MpegEncContext * const s = &h->s;
3052 s->width = s->avctx->width;
3053 s->height = s->avctx->height;
3054 s->codec_id= s->avctx->codec->id;
3058 h->dequant_coeff_pps= -1;
3059 s->unrestricted_mv=1;
3060 s->decode=1; //FIXME
3062 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3063 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3066 static int decode_init(AVCodecContext *avctx){
3067 H264Context *h= avctx->priv_data;
3068 MpegEncContext * const s = &h->s;
3070 MPV_decode_defaults(s);
3075 s->out_format = FMT_H264;
3076 s->workaround_bugs= avctx->workaround_bugs;
3079 // s->decode_mb= ff_h263_decode_mb;
3081 avctx->pix_fmt= PIX_FMT_YUV420P;
3085 if(avctx->extradata_size > 0 && avctx->extradata &&
3086 *(char *)avctx->extradata == 1){
3096 static int frame_start(H264Context *h){
3097 MpegEncContext * const s = &h->s;
3100 if(MPV_frame_start(s, s->avctx) < 0)
3102 ff_er_frame_start(s);
3104 assert(s->linesize && s->uvlinesize);
3106 for(i=0; i<16; i++){
3107 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3108 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3111 h->block_offset[16+i]=
3112 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3113 h->block_offset[24+16+i]=
3114 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3117 /* can't be in alloc_tables because linesize isn't known there.
3118 * FIXME: redo bipred weight to not require extra buffer? */
3119 if(!s->obmc_scratchpad)
3120 s->obmc_scratchpad = av_malloc(16*s->linesize + 2*8*s->uvlinesize);
3122 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3126 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3127 MpegEncContext * const s = &h->s;
3131 src_cb -= uvlinesize;
3132 src_cr -= uvlinesize;
3134 // There are two lines saved, the line above the the top macroblock of a pair,
3135 // and the line above the bottom macroblock
3136 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3137 for(i=1; i<17; i++){
3138 h->left_border[i]= src_y[15+i* linesize];
3141 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3142 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3144 if(!(s->flags&CODEC_FLAG_GRAY)){
3145 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3146 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3148 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3149 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3151 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3152 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3156 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3157 MpegEncContext * const s = &h->s;
3160 int deblock_left = (s->mb_x > 0);
3161 int deblock_top = (s->mb_y > 0);
3163 src_y -= linesize + 1;
3164 src_cb -= uvlinesize + 1;
3165 src_cr -= uvlinesize + 1;
3167 #define XCHG(a,b,t,xchg)\
3174 for(i = !deblock_top; i<17; i++){
3175 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3180 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3181 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3182 if(s->mb_x+1 < s->mb_width){
3183 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3187 if(!(s->flags&CODEC_FLAG_GRAY)){
3189 for(i = !deblock_top; i<9; i++){
3190 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3191 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3195 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3196 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3201 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3202 MpegEncContext * const s = &h->s;
3205 src_y -= 2 * linesize;
3206 src_cb -= 2 * uvlinesize;
3207 src_cr -= 2 * uvlinesize;
3209 // There are two lines saved, the line above the the top macroblock of a pair,
3210 // and the line above the bottom macroblock
3211 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3212 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3213 for(i=2; i<34; i++){
3214 h->left_border[i]= src_y[15+i* linesize];
3217 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3218 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3219 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3220 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3222 if(!(s->flags&CODEC_FLAG_GRAY)){
3223 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3224 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3225 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3226 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3227 for(i=2; i<18; i++){
3228 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3229 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3231 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3232 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3233 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3234 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3238 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3239 MpegEncContext * const s = &h->s;
3242 int deblock_left = (s->mb_x > 0);
3243 int deblock_top = (s->mb_y > 0);
3245 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3247 src_y -= 2 * linesize + 1;
3248 src_cb -= 2 * uvlinesize + 1;
3249 src_cr -= 2 * uvlinesize + 1;
3251 #define XCHG(a,b,t,xchg)\
3258 for(i = (!deblock_top)<<1; i<34; i++){
3259 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3264 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3265 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3266 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3267 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3270 if(!(s->flags&CODEC_FLAG_GRAY)){
3272 for(i = (!deblock_top) << 1; i<18; i++){
3273 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3274 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3278 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3279 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3280 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3281 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3286 static void hl_decode_mb(H264Context *h){
3287 MpegEncContext * const s = &h->s;
3288 const int mb_x= s->mb_x;
3289 const int mb_y= s->mb_y;
3290 const int mb_xy= mb_x + mb_y*s->mb_stride;
3291 const int mb_type= s->current_picture.mb_type[mb_xy];
3292 uint8_t *dest_y, *dest_cb, *dest_cr;
3293 int linesize, uvlinesize /*dct_offset*/;
3295 int *block_offset = &h->block_offset[0];
3296 const unsigned int bottom = mb_y & 1;
3297 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3298 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3303 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3304 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3305 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3307 if (h->mb_field_decoding_flag) {
3308 linesize = s->linesize * 2;
3309 uvlinesize = s->uvlinesize * 2;
3310 block_offset = &h->block_offset[24];
3311 if(mb_y&1){ //FIXME move out of this func?
3312 dest_y -= s->linesize*15;
3313 dest_cb-= s->uvlinesize*7;
3314 dest_cr-= s->uvlinesize*7;
3317 linesize = s->linesize;
3318 uvlinesize = s->uvlinesize;
3319 // dct_offset = s->linesize * 16;
3322 idct_add = transform_bypass
3323 ? IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4
3324 : IS_8x8DCT(mb_type) ? s->dsp.h264_idct8_add : s->dsp.h264_idct_add;
3326 if (IS_INTRA_PCM(mb_type)) {
3329 // The pixels are stored in h->mb array in the same order as levels,
3330 // copy them in output in the correct order.
3331 for(i=0; i<16; i++) {
3332 for (y=0; y<4; y++) {
3333 for (x=0; x<4; x++) {
3334 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3338 for(i=16; i<16+4; i++) {
3339 for (y=0; y<4; y++) {
3340 for (x=0; x<4; x++) {
3341 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3345 for(i=20; i<20+4; i++) {
3346 for (y=0; y<4; y++) {
3347 for (x=0; x<4; x++) {
3348 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3353 if(IS_INTRA(mb_type)){
3354 if(h->deblocking_filter) {
3355 if (h->mb_aff_frame) {
3357 xchg_pair_border(h, dest_y, dest_cb, dest_cr, s->linesize, s->uvlinesize, 1);
3359 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3363 if(!(s->flags&CODEC_FLAG_GRAY)){
3364 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3365 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3368 if(IS_INTRA4x4(mb_type)){
3370 if(IS_8x8DCT(mb_type)){
3371 for(i=0; i<16; i+=4){
3372 uint8_t * const ptr= dest_y + block_offset[i];
3373 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3374 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3375 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3376 if(h->non_zero_count_cache[ scan8[i] ])
3377 idct_add(ptr, h->mb + i*16, linesize);
3380 for(i=0; i<16; i++){
3381 uint8_t * const ptr= dest_y + block_offset[i];
3383 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3386 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3387 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3388 assert(mb_y || linesize <= block_offset[i]);
3389 if(!topright_avail){
3390 tr= ptr[3 - linesize]*0x01010101;
3391 topright= (uint8_t*) &tr;
3393 topright= ptr + 4 - linesize;
3397 h->pred4x4[ dir ](ptr, topright, linesize);
3398 if(h->non_zero_count_cache[ scan8[i] ]){
3399 if(s->codec_id == CODEC_ID_H264)
3400 idct_add(ptr, h->mb + i*16, linesize);
3402 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3407 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3408 if(s->codec_id == CODEC_ID_H264){
3409 if(!transform_bypass)
3410 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3412 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3414 if(h->deblocking_filter) {
3415 if (h->mb_aff_frame) {
3417 uint8_t *pair_dest_y = s->current_picture.data[0] + ((mb_y-1) * 16* s->linesize ) + mb_x * 16;
3418 uint8_t *pair_dest_cb = s->current_picture.data[1] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3419 uint8_t *pair_dest_cr = s->current_picture.data[2] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3421 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3425 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3428 }else if(s->codec_id == CODEC_ID_H264){
3429 hl_motion(h, dest_y, dest_cb, dest_cr,
3430 s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
3431 s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
3432 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3436 if(!IS_INTRA4x4(mb_type)){
3437 if(s->codec_id == CODEC_ID_H264){
3438 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3439 for(i=0; i<16; i+=di){
3440 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3441 uint8_t * const ptr= dest_y + block_offset[i];
3442 idct_add(ptr, h->mb + i*16, linesize);
3446 for(i=0; i<16; i++){
3447 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3448 uint8_t * const ptr= dest_y + block_offset[i];
3449 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3455 if(!(s->flags&CODEC_FLAG_GRAY)){
3456 idct_add = transform_bypass ? s->dsp.add_pixels4 : s->dsp.h264_idct_add;
3457 if(!transform_bypass){
3458 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3459 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3461 if(s->codec_id == CODEC_ID_H264){
3462 for(i=16; i<16+4; i++){
3463 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3464 uint8_t * const ptr= dest_cb + block_offset[i];
3465 idct_add(ptr, h->mb + i*16, uvlinesize);
3468 for(i=20; i<20+4; i++){
3469 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3470 uint8_t * const ptr= dest_cr + block_offset[i];
3471 idct_add(ptr, h->mb + i*16, uvlinesize);
3475 for(i=16; i<16+4; i++){
3476 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3477 uint8_t * const ptr= dest_cb + block_offset[i];
3478 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3481 for(i=20; i<20+4; i++){
3482 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3483 uint8_t * const ptr= dest_cr + block_offset[i];
3484 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3490 if(h->deblocking_filter) {
3491 if (h->mb_aff_frame) {
3492 const int mb_y = s->mb_y - 1;
3493 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3494 const int mb_xy= mb_x + mb_y*s->mb_stride;
3495 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3496 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3497 uint8_t tmp = s->current_picture.data[1][384];
3498 if (!bottom) return;
3499 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3500 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3501 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3503 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3504 // TODO deblock a pair
3507 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3508 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3509 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3510 if (tmp != s->current_picture.data[1][384]) {
3511 tprintf("modified pixel 8,1 (1)\n");
3515 tprintf("call mbaff filter_mb\n");
3516 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3517 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3518 if (tmp != s->current_picture.data[1][384]) {
3519 tprintf("modified pixel 8,1 (2)\n");
3522 tprintf("call filter_mb\n");
3523 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3524 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3525 filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3531 * fills the default_ref_list.
3533 static int fill_default_ref_list(H264Context *h){
3534 MpegEncContext * const s = &h->s;
3536 int smallest_poc_greater_than_current = -1;
3537 Picture sorted_short_ref[32];
3539 if(h->slice_type==B_TYPE){
3543 /* sort frame according to poc in B slice */
3544 for(out_i=0; out_i<h->short_ref_count; out_i++){
3546 int best_poc=INT_MAX;
3548 for(i=0; i<h->short_ref_count; i++){
3549 const int poc= h->short_ref[i]->poc;
3550 if(poc > limit && poc < best_poc){
3556 assert(best_i != INT_MIN);
3559 sorted_short_ref[out_i]= *h->short_ref[best_i];
3560 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3561 if (-1 == smallest_poc_greater_than_current) {
3562 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3563 smallest_poc_greater_than_current = out_i;
3569 if(s->picture_structure == PICT_FRAME){
3570 if(h->slice_type==B_TYPE){
3572 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3574 // find the largest poc
3575 for(list=0; list<2; list++){
3578 int step= list ? -1 : 1;
3580 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3581 while(j<0 || j>= h->short_ref_count){
3582 if(j != -99 && step == (list ? -1 : 1))
3585 j= smallest_poc_greater_than_current + (step>>1);
3587 if(sorted_short_ref[j].reference != 3) continue;
3588 h->default_ref_list[list][index ]= sorted_short_ref[j];
3589 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3592 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3593 if(h->long_ref[i] == NULL) continue;
3594 if(h->long_ref[i]->reference != 3) continue;
3596 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3597 h->default_ref_list[ list ][index++].pic_id= i;;
3600 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3601 // swap the two first elements of L1 when
3602 // L0 and L1 are identical
3603 Picture temp= h->default_ref_list[1][0];
3604 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3605 h->default_ref_list[1][1] = temp;
3608 if(index < h->ref_count[ list ])
3609 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3613 for(i=0; i<h->short_ref_count; i++){
3614 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3615 h->default_ref_list[0][index ]= *h->short_ref[i];
3616 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3618 for(i = 0; i < 16; i++){
3619 if(h->long_ref[i] == NULL) continue;
3620 if(h->long_ref[i]->reference != 3) continue;
3621 h->default_ref_list[0][index ]= *h->long_ref[i];
3622 h->default_ref_list[0][index++].pic_id= i;;
3624 if(index < h->ref_count[0])
3625 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3628 if(h->slice_type==B_TYPE){
3630 //FIXME second field balh
3634 for (i=0; i<h->ref_count[0]; i++) {
3635 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3637 if(h->slice_type==B_TYPE){
3638 for (i=0; i<h->ref_count[1]; i++) {
3639 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3646 static void print_short_term(H264Context *h);
3647 static void print_long_term(H264Context *h);
3649 static int decode_ref_pic_list_reordering(H264Context *h){
3650 MpegEncContext * const s = &h->s;
3653 print_short_term(h);
3655 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3657 for(list=0; list<2; list++){
3658 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3660 if(get_bits1(&s->gb)){
3661 int pred= h->curr_pic_num;
3663 for(index=0; ; index++){
3664 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3667 Picture *ref = NULL;
3669 if(reordering_of_pic_nums_idc==3)
3672 if(index >= h->ref_count[list]){
3673 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3677 if(reordering_of_pic_nums_idc<3){
3678 if(reordering_of_pic_nums_idc<2){
3679 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3681 if(abs_diff_pic_num >= h->max_pic_num){
3682 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3686 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3687 else pred+= abs_diff_pic_num;
3688 pred &= h->max_pic_num - 1;
3690 for(i= h->short_ref_count-1; i>=0; i--){
3691 ref = h->short_ref[i];
3692 assert(ref->reference == 3);
3693 assert(!ref->long_ref);
3694 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3698 ref->pic_id= ref->frame_num;
3700 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3701 ref = h->long_ref[pic_id];
3702 ref->pic_id= pic_id;
3703 assert(ref->reference == 3);
3704 assert(ref->long_ref);
3709 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3710 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3712 for(i=index; i+1<h->ref_count[list]; i++){
3713 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3716 for(; i > index; i--){
3717 h->ref_list[list][i]= h->ref_list[list][i-1];
3719 h->ref_list[list][index]= *ref;
3722 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3728 if(h->slice_type!=B_TYPE) break;
3730 for(list=0; list<2; list++){
3731 for(index= 0; index < h->ref_count[list]; index++){
3732 if(!h->ref_list[list][index].data[0])
3733 h->ref_list[list][index]= s->current_picture;
3735 if(h->slice_type!=B_TYPE) break;
3738 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3739 direct_dist_scale_factor(h);
3740 direct_ref_list_init(h);
3744 static int pred_weight_table(H264Context *h){
3745 MpegEncContext * const s = &h->s;
3747 int luma_def, chroma_def;
3750 h->use_weight_chroma= 0;
3751 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3752 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3753 luma_def = 1<<h->luma_log2_weight_denom;
3754 chroma_def = 1<<h->chroma_log2_weight_denom;
3756 for(list=0; list<2; list++){
3757 for(i=0; i<h->ref_count[list]; i++){
3758 int luma_weight_flag, chroma_weight_flag;
3760 luma_weight_flag= get_bits1(&s->gb);
3761 if(luma_weight_flag){
3762 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3763 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3764 if( h->luma_weight[list][i] != luma_def
3765 || h->luma_offset[list][i] != 0)
3768 h->luma_weight[list][i]= luma_def;
3769 h->luma_offset[list][i]= 0;
3772 chroma_weight_flag= get_bits1(&s->gb);
3773 if(chroma_weight_flag){
3776 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3777 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3778 if( h->chroma_weight[list][i][j] != chroma_def
3779 || h->chroma_offset[list][i][j] != 0)
3780 h->use_weight_chroma= 1;
3785 h->chroma_weight[list][i][j]= chroma_def;
3786 h->chroma_offset[list][i][j]= 0;
3790 if(h->slice_type != B_TYPE) break;
3792 h->use_weight= h->use_weight || h->use_weight_chroma;
3796 static void implicit_weight_table(H264Context *h){
3797 MpegEncContext * const s = &h->s;
3799 int cur_poc = s->current_picture_ptr->poc;
3801 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3802 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3804 h->use_weight_chroma= 0;
3809 h->use_weight_chroma= 2;
3810 h->luma_log2_weight_denom= 5;
3811 h->chroma_log2_weight_denom= 5;
3814 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3815 int poc0 = h->ref_list[0][ref0].poc;
3816 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3817 int poc1 = h->ref_list[1][ref1].poc;
3818 int td = clip(poc1 - poc0, -128, 127);
3820 int tb = clip(cur_poc - poc0, -128, 127);
3821 int tx = (16384 + (ABS(td) >> 1)) / td;
3822 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3823 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3824 h->implicit_weight[ref0][ref1] = 32;
3826 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3828 h->implicit_weight[ref0][ref1] = 32;
3833 static inline void unreference_pic(H264Context *h, Picture *pic){
3836 if(pic == h->delayed_output_pic)
3839 for(i = 0; h->delayed_pic[i]; i++)
3840 if(pic == h->delayed_pic[i]){
3848 * instantaneous decoder refresh.
3850 static void idr(H264Context *h){
3853 for(i=0; i<16; i++){
3854 if (h->long_ref[i] != NULL) {
3855 unreference_pic(h, h->long_ref[i]);
3856 h->long_ref[i]= NULL;
3859 h->long_ref_count=0;
3861 for(i=0; i<h->short_ref_count; i++){
3862 unreference_pic(h, h->short_ref[i]);
3863 h->short_ref[i]= NULL;
3865 h->short_ref_count=0;
3868 /* forget old pics after a seek */
3869 static void flush_dpb(AVCodecContext *avctx){
3870 H264Context *h= avctx->priv_data;
3873 h->delayed_pic[i]= NULL;
3874 h->delayed_output_pic= NULL;
3876 if(h->s.current_picture_ptr)
3877 h->s.current_picture_ptr->reference= 0;
3882 * @return the removed picture or NULL if an error occurs
3884 static Picture * remove_short(H264Context *h, int frame_num){
3885 MpegEncContext * const s = &h->s;
3888 if(s->avctx->debug&FF_DEBUG_MMCO)
3889 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3891 for(i=0; i<h->short_ref_count; i++){
3892 Picture *pic= h->short_ref[i];
3893 if(s->avctx->debug&FF_DEBUG_MMCO)
3894 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3895 if(pic->frame_num == frame_num){
3896 h->short_ref[i]= NULL;
3897 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
3898 h->short_ref_count--;
3907 * @return the removed picture or NULL if an error occurs
3909 static Picture * remove_long(H264Context *h, int i){
3912 pic= h->long_ref[i];
3913 h->long_ref[i]= NULL;
3914 if(pic) h->long_ref_count--;
3920 * print short term list
3922 static void print_short_term(H264Context *h) {
3924 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3925 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3926 for(i=0; i<h->short_ref_count; i++){
3927 Picture *pic= h->short_ref[i];
3928 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3934 * print long term list
3936 static void print_long_term(H264Context *h) {
3938 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3939 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3940 for(i = 0; i < 16; i++){
3941 Picture *pic= h->long_ref[i];
3943 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3950 * Executes the reference picture marking (memory management control operations).
3952 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3953 MpegEncContext * const s = &h->s;
3955 int current_is_long=0;
3958 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3959 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3961 for(i=0; i<mmco_count; i++){
3962 if(s->avctx->debug&FF_DEBUG_MMCO)
3963 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
3965 switch(mmco[i].opcode){
3966 case MMCO_SHORT2UNUSED:
3967 pic= remove_short(h, mmco[i].short_frame_num);
3969 unreference_pic(h, pic);
3970 else if(s->avctx->debug&FF_DEBUG_MMCO)
3971 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
3973 case MMCO_SHORT2LONG:
3974 pic= remove_long(h, mmco[i].long_index);
3975 if(pic) unreference_pic(h, pic);
3977 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
3978 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3979 h->long_ref_count++;
3981 case MMCO_LONG2UNUSED:
3982 pic= remove_long(h, mmco[i].long_index);
3984 unreference_pic(h, pic);
3985 else if(s->avctx->debug&FF_DEBUG_MMCO)
3986 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
3989 pic= remove_long(h, mmco[i].long_index);
3990 if(pic) unreference_pic(h, pic);
3992 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
3993 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3994 h->long_ref_count++;
3998 case MMCO_SET_MAX_LONG:
3999 assert(mmco[i].long_index <= 16);
4000 // just remove the long term which index is greater than new max
4001 for(j = mmco[i].long_index; j<16; j++){
4002 pic = remove_long(h, j);
4003 if (pic) unreference_pic(h, pic);
4007 while(h->short_ref_count){
4008 pic= remove_short(h, h->short_ref[0]->frame_num);
4009 unreference_pic(h, pic);
4011 for(j = 0; j < 16; j++) {
4012 pic= remove_long(h, j);
4013 if(pic) unreference_pic(h, pic);
4020 if(!current_is_long){
4021 pic= remove_short(h, s->current_picture_ptr->frame_num);
4023 unreference_pic(h, pic);
4024 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4027 if(h->short_ref_count)
4028 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4030 h->short_ref[0]= s->current_picture_ptr;
4031 h->short_ref[0]->long_ref=0;
4032 h->short_ref_count++;
4035 print_short_term(h);
4040 static int decode_ref_pic_marking(H264Context *h){
4041 MpegEncContext * const s = &h->s;
4044 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4045 s->broken_link= get_bits1(&s->gb) -1;
4046 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4047 if(h->mmco[0].long_index == -1)
4050 h->mmco[0].opcode= MMCO_LONG;
4054 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4055 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4056 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4058 h->mmco[i].opcode= opcode;
4059 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4060 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4061 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4062 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4066 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4067 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4068 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4069 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4074 if(opcode > MMCO_LONG){
4075 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4078 if(opcode == MMCO_END)
4083 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4085 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4086 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4087 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4097 static int init_poc(H264Context *h){
4098 MpegEncContext * const s = &h->s;
4099 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4102 if(h->nal_unit_type == NAL_IDR_SLICE){
4103 h->frame_num_offset= 0;
4105 if(h->frame_num < h->prev_frame_num)
4106 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4108 h->frame_num_offset= h->prev_frame_num_offset;
4111 if(h->sps.poc_type==0){
4112 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4114 if(h->nal_unit_type == NAL_IDR_SLICE){
4119 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4120 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4121 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4122 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4124 h->poc_msb = h->prev_poc_msb;
4125 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4127 field_poc[1] = h->poc_msb + h->poc_lsb;
4128 if(s->picture_structure == PICT_FRAME)
4129 field_poc[1] += h->delta_poc_bottom;
4130 }else if(h->sps.poc_type==1){
4131 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4134 if(h->sps.poc_cycle_length != 0)
4135 abs_frame_num = h->frame_num_offset + h->frame_num;
4139 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4142 expected_delta_per_poc_cycle = 0;
4143 for(i=0; i < h->sps.poc_cycle_length; i++)
4144 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4146 if(abs_frame_num > 0){
4147 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4148 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4150 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4151 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4152 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4156 if(h->nal_ref_idc == 0)
4157 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4159 field_poc[0] = expectedpoc + h->delta_poc[0];
4160 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4162 if(s->picture_structure == PICT_FRAME)
4163 field_poc[1] += h->delta_poc[1];
4166 if(h->nal_unit_type == NAL_IDR_SLICE){
4169 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4170 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4176 if(s->picture_structure != PICT_BOTTOM_FIELD)
4177 s->current_picture_ptr->field_poc[0]= field_poc[0];
4178 if(s->picture_structure != PICT_TOP_FIELD)
4179 s->current_picture_ptr->field_poc[1]= field_poc[1];
4180 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4181 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4187 * decodes a slice header.
4188 * this will allso call MPV_common_init() and frame_start() as needed
4190 static int decode_slice_header(H264Context *h){
4191 MpegEncContext * const s = &h->s;
4192 int first_mb_in_slice, pps_id;
4193 int num_ref_idx_active_override_flag;
4194 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4196 int default_ref_list_done = 0;
4198 s->current_picture.reference= h->nal_ref_idc != 0;
4199 s->dropable= h->nal_ref_idc == 0;
4201 first_mb_in_slice= get_ue_golomb(&s->gb);
4203 slice_type= get_ue_golomb(&s->gb);
4205 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4210 h->slice_type_fixed=1;
4212 h->slice_type_fixed=0;
4214 slice_type= slice_type_map[ slice_type ];
4215 if (slice_type == I_TYPE
4216 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4217 default_ref_list_done = 1;
4219 h->slice_type= slice_type;
4221 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4223 pps_id= get_ue_golomb(&s->gb);
4225 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4228 h->pps= h->pps_buffer[pps_id];
4229 if(h->pps.slice_group_count == 0){
4230 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4234 h->sps= h->sps_buffer[ h->pps.sps_id ];
4235 if(h->sps.log2_max_frame_num == 0){
4236 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4240 if(h->dequant_coeff_pps != pps_id){
4241 h->dequant_coeff_pps = pps_id;
4242 init_dequant_tables(h);
4245 s->mb_width= h->sps.mb_width;
4246 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4248 h->b_stride= s->mb_width*4 + 1;
4249 h->b8_stride= s->mb_width*2 + 1;
4251 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4252 if(h->sps.frame_mbs_only_flag)
4253 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4255 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4257 if (s->context_initialized
4258 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4262 if (!s->context_initialized) {
4263 if (MPV_common_init(s) < 0)
4266 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4267 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4268 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4271 for(i=0; i<16; i++){
4272 #define T(x) (x>>2) | ((x<<2) & 0xF)
4273 h->zigzag_scan[i] = T(zigzag_scan[i]);
4274 h-> field_scan[i] = T( field_scan[i]);
4277 if(h->sps.transform_bypass){ //FIXME same ugly
4278 h->zigzag_scan_q0 = zigzag_scan;
4279 h->field_scan_q0 = field_scan;
4281 h->zigzag_scan_q0 = h->zigzag_scan;
4282 h->field_scan_q0 = h->field_scan;
4287 s->avctx->width = s->width;
4288 s->avctx->height = s->height;
4289 s->avctx->sample_aspect_ratio= h->sps.sar;
4290 if(!s->avctx->sample_aspect_ratio.den)
4291 s->avctx->sample_aspect_ratio.den = 1;
4293 if(h->sps.timing_info_present_flag){
4294 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
4298 if(h->slice_num == 0){
4299 if(frame_start(h) < 0)
4303 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4304 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4306 h->mb_aff_frame = 0;
4307 if(h->sps.frame_mbs_only_flag){
4308 s->picture_structure= PICT_FRAME;
4310 if(get_bits1(&s->gb)) { //field_pic_flag
4311 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4313 s->picture_structure= PICT_FRAME;
4314 first_mb_in_slice <<= h->sps.mb_aff;
4315 h->mb_aff_frame = h->sps.mb_aff;
4319 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4320 s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width;
4321 if(s->mb_y >= s->mb_height){
4325 if(s->picture_structure==PICT_FRAME){
4326 h->curr_pic_num= h->frame_num;
4327 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4329 h->curr_pic_num= 2*h->frame_num;
4330 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4333 if(h->nal_unit_type == NAL_IDR_SLICE){
4334 get_ue_golomb(&s->gb); /* idr_pic_id */
4337 if(h->sps.poc_type==0){
4338 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4340 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4341 h->delta_poc_bottom= get_se_golomb(&s->gb);
4345 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4346 h->delta_poc[0]= get_se_golomb(&s->gb);
4348 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4349 h->delta_poc[1]= get_se_golomb(&s->gb);
4354 if(h->pps.redundant_pic_cnt_present){
4355 h->redundant_pic_count= get_ue_golomb(&s->gb);
4358 //set defaults, might be overriden a few line later
4359 h->ref_count[0]= h->pps.ref_count[0];
4360 h->ref_count[1]= h->pps.ref_count[1];
4362 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4363 if(h->slice_type == B_TYPE){
4364 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4366 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4368 if(num_ref_idx_active_override_flag){
4369 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4370 if(h->slice_type==B_TYPE)
4371 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4373 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4374 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4380 if(!default_ref_list_done){
4381 fill_default_ref_list(h);
4384 if(decode_ref_pic_list_reordering(h) < 0)
4387 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4388 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4389 pred_weight_table(h);
4390 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4391 implicit_weight_table(h);
4395 if(s->current_picture.reference)
4396 decode_ref_pic_marking(h);
4398 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4399 h->cabac_init_idc = get_ue_golomb(&s->gb);
4401 h->last_qscale_diff = 0;
4402 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4403 if(s->qscale<0 || s->qscale>51){
4404 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4407 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4408 //FIXME qscale / qp ... stuff
4409 if(h->slice_type == SP_TYPE){
4410 get_bits1(&s->gb); /* sp_for_switch_flag */
4412 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4413 get_se_golomb(&s->gb); /* slice_qs_delta */
4416 h->deblocking_filter = 1;
4417 h->slice_alpha_c0_offset = 0;
4418 h->slice_beta_offset = 0;
4419 if( h->pps.deblocking_filter_parameters_present ) {
4420 h->deblocking_filter= get_ue_golomb(&s->gb);
4421 if(h->deblocking_filter < 2)
4422 h->deblocking_filter^= 1; // 1<->0
4424 if( h->deblocking_filter ) {
4425 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4426 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4429 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4430 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4431 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4432 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4433 h->deblocking_filter= 0;
4436 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4437 slice_group_change_cycle= get_bits(&s->gb, ?);
4442 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4443 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4445 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4447 av_get_pict_type_char(h->slice_type),
4448 pps_id, h->frame_num,
4449 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4450 h->ref_count[0], h->ref_count[1],
4452 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4454 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4464 static inline int get_level_prefix(GetBitContext *gb){
4468 OPEN_READER(re, gb);
4469 UPDATE_CACHE(re, gb);
4470 buf=GET_CACHE(re, gb);
4472 log= 32 - av_log2(buf);
4474 print_bin(buf>>(32-log), log);
4475 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4478 LAST_SKIP_BITS(re, gb, log);
4479 CLOSE_READER(re, gb);
4484 static inline int get_dct8x8_allowed(H264Context *h){
4487 if(!IS_SUB_8X8(h->sub_mb_type[i])
4488 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4495 * decodes a residual block.
4496 * @param n block index
4497 * @param scantable scantable
4498 * @param max_coeff number of coefficients in the block
4499 * @return <0 if an error occured
4501 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4502 MpegEncContext * const s = &h->s;
4503 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4505 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4507 //FIXME put trailing_onex into the context
4509 if(n == CHROMA_DC_BLOCK_INDEX){
4510 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4511 total_coeff= coeff_token>>2;
4513 if(n == LUMA_DC_BLOCK_INDEX){
4514 total_coeff= pred_non_zero_count(h, 0);
4515 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4516 total_coeff= coeff_token>>2;
4518 total_coeff= pred_non_zero_count(h, n);
4519 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4520 total_coeff= coeff_token>>2;
4521 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4525 //FIXME set last_non_zero?
4530 trailing_ones= coeff_token&3;
4531 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4532 assert(total_coeff<=16);
4534 for(i=0; i<trailing_ones; i++){
4535 level[i]= 1 - 2*get_bits1(gb);
4539 int level_code, mask;
4540 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4541 int prefix= get_level_prefix(gb);
4543 //first coefficient has suffix_length equal to 0 or 1
4544 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4546 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4548 level_code= (prefix<<suffix_length); //part
4549 }else if(prefix==14){
4551 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4553 level_code= prefix + get_bits(gb, 4); //part
4554 }else if(prefix==15){
4555 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4556 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4558 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4562 if(trailing_ones < 3) level_code += 2;
4567 mask= -(level_code&1);
4568 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4571 //remaining coefficients have suffix_length > 0
4572 for(;i<total_coeff;i++) {
4573 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4574 prefix = get_level_prefix(gb);
4576 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4577 }else if(prefix==15){
4578 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4580 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4583 mask= -(level_code&1);
4584 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4585 if(level_code > suffix_limit[suffix_length])
4590 if(total_coeff == max_coeff)
4593 if(n == CHROMA_DC_BLOCK_INDEX)
4594 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4596 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4599 coeff_num = zeros_left + total_coeff - 1;
4600 j = scantable[coeff_num];
4602 block[j] = level[0];
4603 for(i=1;i<total_coeff;i++) {
4606 else if(zeros_left < 7){
4607 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4609 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4611 zeros_left -= run_before;
4612 coeff_num -= 1 + run_before;
4613 j= scantable[ coeff_num ];
4618 block[j] = (level[0] * qmul[j] + 32)>>6;
4619 for(i=1;i<total_coeff;i++) {
4622 else if(zeros_left < 7){
4623 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4625 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4627 zeros_left -= run_before;
4628 coeff_num -= 1 + run_before;
4629 j= scantable[ coeff_num ];
4631 block[j]= (level[i] * qmul[j] + 32)>>6;
4636 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4644 * decodes a P_SKIP or B_SKIP macroblock
4646 static void decode_mb_skip(H264Context *h){
4647 MpegEncContext * const s = &h->s;
4648 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4651 memset(h->non_zero_count[mb_xy], 0, 16);
4652 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4654 if(h->mb_aff_frame && s->mb_skip_run==0 && (s->mb_y&1)==0){
4655 h->mb_field_decoding_flag= get_bits1(&s->gb);
4657 if(h->mb_field_decoding_flag)
4658 mb_type|= MB_TYPE_INTERLACED;
4660 if( h->slice_type == B_TYPE )
4662 // just for fill_caches. pred_direct_motion will set the real mb_type
4663 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4665 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4666 pred_direct_motion(h, &mb_type);
4668 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4669 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
4675 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4677 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4678 pred_pskip_motion(h, &mx, &my);
4679 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4680 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4682 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4685 write_back_motion(h, mb_type);
4686 s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP;
4687 s->current_picture.qscale_table[mb_xy]= s->qscale;
4688 h->slice_table[ mb_xy ]= h->slice_num;
4689 h->prev_mb_skipped= 1;
4693 * decodes a macroblock
4694 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4696 static int decode_mb_cavlc(H264Context *h){
4697 MpegEncContext * const s = &h->s;
4698 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4699 int mb_type, partition_count, cbp;
4700 int dct8x8_allowed= h->pps.transform_8x8_mode;
4702 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4704 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4705 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4707 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4708 if(s->mb_skip_run==-1)
4709 s->mb_skip_run= get_ue_golomb(&s->gb);
4711 if (s->mb_skip_run--) {
4716 if(h->mb_aff_frame){
4717 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
4718 h->mb_field_decoding_flag = get_bits1(&s->gb);
4720 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4722 h->prev_mb_skipped= 0;
4724 mb_type= get_ue_golomb(&s->gb);
4725 if(h->slice_type == B_TYPE){
4727 partition_count= b_mb_type_info[mb_type].partition_count;
4728 mb_type= b_mb_type_info[mb_type].type;
4731 goto decode_intra_mb;
4733 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4735 partition_count= p_mb_type_info[mb_type].partition_count;
4736 mb_type= p_mb_type_info[mb_type].type;
4739 goto decode_intra_mb;
4742 assert(h->slice_type == I_TYPE);
4745 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4749 cbp= i_mb_type_info[mb_type].cbp;
4750 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4751 mb_type= i_mb_type_info[mb_type].type;
4754 if(h->mb_field_decoding_flag)
4755 mb_type |= MB_TYPE_INTERLACED;
4757 h->slice_table[ mb_xy ]= h->slice_num;
4759 if(IS_INTRA_PCM(mb_type)){
4762 // we assume these blocks are very rare so we dont optimize it
4763 align_get_bits(&s->gb);
4765 // The pixels are stored in the same order as levels in h->mb array.
4766 for(y=0; y<16; y++){
4767 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4768 for(x=0; x<16; x++){
4769 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4770 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4774 const int index= 256 + 4*(y&3) + 32*(y>>2);
4776 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4777 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4781 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4783 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4784 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4788 // In deblocking, the quantizer is 0
4789 s->current_picture.qscale_table[mb_xy]= 0;
4790 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
4791 // All coeffs are present
4792 memset(h->non_zero_count[mb_xy], 16, 16);
4794 s->current_picture.mb_type[mb_xy]= mb_type;
4798 fill_caches(h, mb_type, 0);
4801 if(IS_INTRA(mb_type)){
4802 // init_top_left_availability(h);
4803 if(IS_INTRA4x4(mb_type)){
4806 if(dct8x8_allowed && get_bits1(&s->gb)){
4807 mb_type |= MB_TYPE_8x8DCT;
4811 // fill_intra4x4_pred_table(h);
4812 for(i=0; i<16; i+=di){
4813 const int mode_coded= !get_bits1(&s->gb);
4814 const int predicted_mode= pred_intra_mode(h, i);
4818 const int rem_mode= get_bits(&s->gb, 3);
4819 if(rem_mode<predicted_mode)
4824 mode= predicted_mode;
4828 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4830 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4832 write_back_intra_pred_mode(h);
4833 if( check_intra4x4_pred_mode(h) < 0)
4836 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4837 if(h->intra16x16_pred_mode < 0)
4840 h->chroma_pred_mode= get_ue_golomb(&s->gb);
4842 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
4843 if(h->chroma_pred_mode < 0)
4845 }else if(partition_count==4){
4846 int i, j, sub_partition_count[4], list, ref[2][4];
4848 if(h->slice_type == B_TYPE){
4850 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4851 if(h->sub_mb_type[i] >=13){
4852 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4855 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4856 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4858 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4859 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3]))
4860 pred_direct_motion(h, &mb_type);
4862 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4864 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4865 if(h->sub_mb_type[i] >=4){
4866 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4869 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4870 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4874 for(list=0; list<2; list++){
4875 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4876 if(ref_count == 0) continue;
4877 if (h->mb_aff_frame && h->mb_field_decoding_flag) {
4881 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4882 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4883 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4892 dct8x8_allowed = get_dct8x8_allowed(h);
4894 for(list=0; list<2; list++){
4895 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4896 if(ref_count == 0) continue;
4899 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4900 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4901 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4903 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4904 const int sub_mb_type= h->sub_mb_type[i];
4905 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4906 for(j=0; j<sub_partition_count[i]; j++){
4908 const int index= 4*i + block_width*j;
4909 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4910 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4911 mx += get_se_golomb(&s->gb);
4912 my += get_se_golomb(&s->gb);
4913 tprintf("final mv:%d %d\n", mx, my);
4915 if(IS_SUB_8X8(sub_mb_type)){
4916 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
4917 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4918 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
4919 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4920 }else if(IS_SUB_8X4(sub_mb_type)){
4921 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
4922 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
4923 }else if(IS_SUB_4X8(sub_mb_type)){
4924 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
4925 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
4927 assert(IS_SUB_4X4(sub_mb_type));
4928 mv_cache[ 0 ][0]= mx;
4929 mv_cache[ 0 ][1]= my;
4933 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4939 }else if(IS_DIRECT(mb_type)){
4940 pred_direct_motion(h, &mb_type);
4941 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4943 int list, mx, my, i;
4944 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4945 if(IS_16X16(mb_type)){
4946 for(list=0; list<2; list++){
4947 if(h->ref_count[list]>0){
4948 if(IS_DIR(mb_type, 0, list)){
4949 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4950 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4952 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
4955 for(list=0; list<2; list++){
4956 if(IS_DIR(mb_type, 0, list)){
4957 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4958 mx += get_se_golomb(&s->gb);
4959 my += get_se_golomb(&s->gb);
4960 tprintf("final mv:%d %d\n", mx, my);
4962 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
4964 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
4967 else if(IS_16X8(mb_type)){
4968 for(list=0; list<2; list++){
4969 if(h->ref_count[list]>0){
4971 if(IS_DIR(mb_type, i, list)){
4972 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4973 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4975 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
4979 for(list=0; list<2; list++){
4981 if(IS_DIR(mb_type, i, list)){
4982 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4983 mx += get_se_golomb(&s->gb);
4984 my += get_se_golomb(&s->gb);
4985 tprintf("final mv:%d %d\n", mx, my);
4987 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
4989 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
4993 assert(IS_8X16(mb_type));
4994 for(list=0; list<2; list++){
4995 if(h->ref_count[list]>0){
4997 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4998 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4999 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5001 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5005 for(list=0; list<2; list++){
5007 if(IS_DIR(mb_type, i, list)){
5008 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5009 mx += get_se_golomb(&s->gb);
5010 my += get_se_golomb(&s->gb);
5011 tprintf("final mv:%d %d\n", mx, my);
5013 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5015 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5021 if(IS_INTER(mb_type))
5022 write_back_motion(h, mb_type);
5024 if(!IS_INTRA16x16(mb_type)){
5025 cbp= get_ue_golomb(&s->gb);
5027 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5031 if(IS_INTRA4x4(mb_type))
5032 cbp= golomb_to_intra4x4_cbp[cbp];
5034 cbp= golomb_to_inter_cbp[cbp];
5037 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5038 if(get_bits1(&s->gb))
5039 mb_type |= MB_TYPE_8x8DCT;
5041 s->current_picture.mb_type[mb_xy]= mb_type;
5043 if(cbp || IS_INTRA16x16(mb_type)){
5044 int i8x8, i4x4, chroma_idx;
5045 int chroma_qp, dquant;
5046 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5047 const uint8_t *scan, *dc_scan;
5049 // fill_non_zero_count_cache(h);
5051 if(IS_INTERLACED(mb_type)){
5052 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5053 dc_scan= luma_dc_field_scan;
5055 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5056 dc_scan= luma_dc_zigzag_scan;
5059 dquant= get_se_golomb(&s->gb);
5061 if( dquant > 25 || dquant < -26 ){
5062 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5066 s->qscale += dquant;
5067 if(((unsigned)s->qscale) > 51){
5068 if(s->qscale<0) s->qscale+= 52;
5069 else s->qscale-= 52;
5072 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5073 if(IS_INTRA16x16(mb_type)){
5074 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5075 return -1; //FIXME continue if partitioned and other return -1 too
5078 assert((cbp&15) == 0 || (cbp&15) == 15);
5081 for(i8x8=0; i8x8<4; i8x8++){
5082 for(i4x4=0; i4x4<4; i4x4++){
5083 const int index= i4x4 + 4*i8x8;
5084 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5090 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5093 for(i8x8=0; i8x8<4; i8x8++){
5094 if(cbp & (1<<i8x8)){
5095 if(IS_8x8DCT(mb_type)){
5096 DCTELEM *buf = &h->mb[64*i8x8];
5098 for(i4x4=0; i4x4<4; i4x4++){
5099 if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4,
5100 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5103 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5104 nnz[0] |= nnz[1] | nnz[8] | nnz[9];
5106 for(i4x4=0; i4x4<4; i4x4++){
5107 const int index= i4x4 + 4*i8x8;
5109 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5115 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5116 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5122 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5123 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5129 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5130 for(i4x4=0; i4x4<4; i4x4++){
5131 const int index= 16 + 4*chroma_idx + i4x4;
5132 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5138 uint8_t * const nnz= &h->non_zero_count_cache[0];
5139 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5140 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5143 uint8_t * const nnz= &h->non_zero_count_cache[0];
5144 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5145 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5146 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5148 s->current_picture.qscale_table[mb_xy]= s->qscale;
5149 write_back_non_zero_count(h);
5154 static int decode_cabac_field_decoding_flag(H264Context *h) {
5155 MpegEncContext * const s = &h->s;
5156 const int mb_x = s->mb_x;
5157 const int mb_y = s->mb_y & ~1;
5158 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5159 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5161 unsigned int ctx = 0;
5163 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5166 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5170 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5173 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5174 uint8_t *state= &h->cabac_state[ctx_base];
5178 MpegEncContext * const s = &h->s;
5179 const int mba_xy = h->left_mb_xy[0];
5180 const int mbb_xy = h->top_mb_xy;
5182 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5184 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5186 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5187 return 0; /* I4x4 */
5190 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5191 return 0; /* I4x4 */
5194 if( get_cabac_terminate( &h->cabac ) )
5195 return 25; /* PCM */
5197 mb_type = 1; /* I16x16 */
5198 if( get_cabac( &h->cabac, &state[1] ) )
5199 mb_type += 12; /* cbp_luma != 0 */
5201 if( get_cabac( &h->cabac, &state[2] ) ) {
5202 if( get_cabac( &h->cabac, &state[2+intra_slice] ) )
5203 mb_type += 4 * 2; /* cbp_chroma == 2 */
5205 mb_type += 4 * 1; /* cbp_chroma == 1 */
5207 if( get_cabac( &h->cabac, &state[3+intra_slice] ) )
5209 if( get_cabac( &h->cabac, &state[3+2*intra_slice] ) )
5214 static int decode_cabac_mb_type( H264Context *h ) {
5215 MpegEncContext * const s = &h->s;
5217 if( h->slice_type == I_TYPE ) {
5218 return decode_cabac_intra_mb_type(h, 3, 1);
5219 } else if( h->slice_type == P_TYPE ) {
5220 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5222 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5223 if( get_cabac( &h->cabac, &h->cabac_state[16] ) == 0 )
5224 return 0; /* P_L0_D16x16; */
5226 return 3; /* P_8x8; */
5228 if( get_cabac( &h->cabac, &h->cabac_state[17] ) == 0 )
5229 return 2; /* P_L0_D8x16; */
5231 return 1; /* P_L0_D16x8; */
5234 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5236 } else if( h->slice_type == B_TYPE ) {
5237 const int mba_xy = h->left_mb_xy[0];
5238 const int mbb_xy = h->top_mb_xy;
5242 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] )
5243 && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5245 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )
5246 && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5249 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5250 return 0; /* B_Direct_16x16 */
5252 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5253 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5256 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5257 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5258 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5259 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5261 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5262 else if( bits == 13 ) {
5263 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5264 } else if( bits == 14 )
5265 return 11; /* B_L1_L0_8x16 */
5266 else if( bits == 15 )
5267 return 22; /* B_8x8 */
5269 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5270 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5272 /* TODO SI/SP frames? */
5277 static int decode_cabac_mb_skip( H264Context *h) {
5278 MpegEncContext * const s = &h->s;
5279 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5280 const int mba_xy = mb_xy - 1;
5281 const int mbb_xy = mb_xy - s->mb_stride;
5284 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5286 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5289 if( h->slice_type == P_TYPE || h->slice_type == SP_TYPE)
5290 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5292 return get_cabac( &h->cabac, &h->cabac_state[24+ctx] );
5295 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5298 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5301 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5302 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5303 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5305 if( mode >= pred_mode )
5311 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5312 const int mba_xy = h->left_mb_xy[0];
5313 const int mbb_xy = h->top_mb_xy;
5317 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5318 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5321 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5324 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5327 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5329 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5335 static const uint8_t block_idx_x[16] = {
5336 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5338 static const uint8_t block_idx_y[16] = {
5339 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5341 static const uint8_t block_idx_xy[4][4] = {
5348 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5349 MpegEncContext * const s = &h->s;
5354 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5360 x = block_idx_x[4*i8x8];
5361 y = block_idx_y[4*i8x8];
5365 else if( s->mb_x > 0 && (h->slice_table[h->left_mb_xy[0]] == h->slice_num)) {
5366 cbp_a = h->left_cbp;
5367 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5372 else if( s->mb_y > 0 && (h->slice_table[h->top_mb_xy] == h->slice_num)) {
5374 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5377 /* No need to test for skip as we put 0 for skip block */
5378 /* No need to test for IPCM as we put 1 for IPCM block */
5380 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5381 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5386 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5387 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5391 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5397 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5401 cbp_a = (h->left_cbp>>4)&0x03;
5402 cbp_b = (h-> top_cbp>>4)&0x03;
5405 if( cbp_a > 0 ) ctx++;
5406 if( cbp_b > 0 ) ctx += 2;
5407 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5411 if( cbp_a == 2 ) ctx++;
5412 if( cbp_b == 2 ) ctx += 2;
5413 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5415 static int decode_cabac_mb_dqp( H264Context *h) {
5416 MpegEncContext * const s = &h->s;
5422 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5424 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5426 if( h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) )
5429 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5435 if(val > 52) //prevent infinite loop
5442 return -(val + 1)/2;
5444 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5445 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5447 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5449 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5453 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5455 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5456 return 0; /* B_Direct_8x8 */
5457 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5458 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5460 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5461 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5462 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5465 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5466 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5470 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5471 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5474 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5475 int refa = h->ref_cache[list][scan8[n] - 1];
5476 int refb = h->ref_cache[list][scan8[n] - 8];
5480 if( h->slice_type == B_TYPE) {
5481 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5483 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5492 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5502 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5503 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5504 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5505 int ctxbase = (l == 0) ? 40 : 47;
5510 else if( amvd > 32 )
5515 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5520 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5528 while( get_cabac_bypass( &h->cabac ) ) {
5533 if( get_cabac_bypass( &h->cabac ) )
5537 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
5541 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5546 nza = h->left_cbp&0x100;
5547 nzb = h-> top_cbp&0x100;
5548 } else if( cat == 1 || cat == 2 ) {
5549 nza = h->non_zero_count_cache[scan8[idx] - 1];
5550 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5551 } else if( cat == 3 ) {
5552 nza = (h->left_cbp>>(6+idx))&0x01;
5553 nzb = (h-> top_cbp>>(6+idx))&0x01;
5556 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5557 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5566 return ctx + 4 * cat;
5569 static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5570 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5571 static const int significant_coeff_flag_field_offset[2] = { 105, 277 };
5572 static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 };
5573 static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 };
5574 static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 };
5575 static const int coeff_abs_level_m1_offset[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 };
5576 static const int significant_coeff_flag_offset_8x8[63] = {
5577 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5578 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5579 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5580 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
5582 static const int last_coeff_flag_offset_8x8[63] = {
5583 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5584 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5585 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5586 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5592 int coeff_count = 0;
5595 int abslevelgt1 = 0;
5597 uint8_t *significant_coeff_ctx_base;
5598 uint8_t *last_coeff_ctx_base;
5599 uint8_t *abs_level_m1_ctx_base;
5601 /* cat: 0-> DC 16x16 n = 0
5602 * 1-> AC 16x16 n = luma4x4idx
5603 * 2-> Luma4x4 n = luma4x4idx
5604 * 3-> DC Chroma n = iCbCr
5605 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5606 * 5-> Luma8x8 n = 4 * luma8x8idx
5609 /* read coded block flag */
5611 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5612 if( cat == 1 || cat == 2 )
5613 h->non_zero_count_cache[scan8[n]] = 0;
5615 h->non_zero_count_cache[scan8[16+n]] = 0;
5621 significant_coeff_ctx_base = h->cabac_state
5622 + significant_coeff_flag_offset[cat]
5623 + significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5624 last_coeff_ctx_base = h->cabac_state
5625 + last_significant_coeff_flag_offset[cat]
5626 + last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5627 abs_level_m1_ctx_base = h->cabac_state
5628 + coeff_abs_level_m1_offset[cat];
5631 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5632 for(last= 0; last < coefs; last++) { \
5633 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5634 if( get_cabac( &h->cabac, sig_ctx )) { \
5635 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5636 index[coeff_count++] = last; \
5637 if( get_cabac( &h->cabac, last_ctx ) ) { \
5643 DECODE_SIGNIFICANCE( 63, significant_coeff_flag_offset_8x8[last],
5644 last_coeff_flag_offset_8x8[last] );
5646 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5648 if( last == max_coeff -1 ) {
5649 index[coeff_count++] = last;
5651 assert(coeff_count > 0);
5654 h->cbp_table[mb_xy] |= 0x100;
5655 else if( cat == 1 || cat == 2 )
5656 h->non_zero_count_cache[scan8[n]] = coeff_count;
5658 h->cbp_table[mb_xy] |= 0x40 << n;
5660 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5663 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, 1, 1);
5666 for( i = coeff_count - 1; i >= 0; i-- ) {
5667 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5668 int j= scantable[index[i]];
5670 if( get_cabac( &h->cabac, ctx ) == 0 ) {
5672 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
5675 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6;
5676 else block[j] = ( qmul[j] + 32) >> 6;
5682 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5683 while( coeff_abs < 15 && get_cabac( &h->cabac, ctx ) ) {
5687 if( coeff_abs >= 15 ) {
5689 while( get_cabac_bypass( &h->cabac ) ) {
5690 coeff_abs += 1 << j;
5695 if( get_cabac_bypass( &h->cabac ) )
5696 coeff_abs += 1 << j ;
5701 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
5702 else block[j] = coeff_abs;
5704 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5705 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5714 void inline compute_mb_neighboors(H264Context *h)
5716 MpegEncContext * const s = &h->s;
5717 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5718 h->top_mb_xy = mb_xy - s->mb_stride;
5719 h->left_mb_xy[0] = mb_xy - 1;
5720 if(h->mb_aff_frame){
5721 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5722 const int top_pair_xy = pair_xy - s->mb_stride;
5723 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5724 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5725 const int curr_mb_frame_flag = !h->mb_field_decoding_flag;
5726 const int bottom = (s->mb_y & 1);
5728 ? !curr_mb_frame_flag // bottom macroblock
5729 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5731 h->top_mb_xy -= s->mb_stride;
5733 if (left_mb_frame_flag != curr_mb_frame_flag) {
5734 h->left_mb_xy[0] = pair_xy - 1;
5741 * decodes a macroblock
5742 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5744 static int decode_mb_cabac(H264Context *h) {
5745 MpegEncContext * const s = &h->s;
5746 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5747 int mb_type, partition_count, cbp = 0;
5748 int dct8x8_allowed= h->pps.transform_8x8_mode;
5750 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5752 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5753 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5754 /* read skip flags */
5755 if( decode_cabac_mb_skip( h ) ) {
5758 h->cbp_table[mb_xy] = 0;
5759 h->chroma_pred_mode_table[mb_xy] = 0;
5760 h->last_qscale_diff = 0;
5766 if(h->mb_aff_frame){
5767 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
5768 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5770 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5772 h->prev_mb_skipped = 0;
5774 compute_mb_neighboors(h);
5775 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5776 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5780 if( h->slice_type == B_TYPE ) {
5782 partition_count= b_mb_type_info[mb_type].partition_count;
5783 mb_type= b_mb_type_info[mb_type].type;
5786 goto decode_intra_mb;
5788 } else if( h->slice_type == P_TYPE ) {
5790 partition_count= p_mb_type_info[mb_type].partition_count;
5791 mb_type= p_mb_type_info[mb_type].type;
5794 goto decode_intra_mb;
5797 assert(h->slice_type == I_TYPE);
5799 partition_count = 0;
5800 cbp= i_mb_type_info[mb_type].cbp;
5801 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5802 mb_type= i_mb_type_info[mb_type].type;
5804 if(h->mb_field_decoding_flag)
5805 mb_type |= MB_TYPE_INTERLACED;
5807 h->slice_table[ mb_xy ]= h->slice_num;
5809 if(IS_INTRA_PCM(mb_type)) {
5813 // We assume these blocks are very rare so we dont optimize it.
5814 // FIXME The two following lines get the bitstream position in the cabac
5815 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5816 ptr= h->cabac.bytestream;
5817 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
5819 // The pixels are stored in the same order as levels in h->mb array.
5820 for(y=0; y<16; y++){
5821 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5822 for(x=0; x<16; x++){
5823 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
5824 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5828 const int index= 256 + 4*(y&3) + 32*(y>>2);
5830 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5831 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5835 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5837 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5838 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5842 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5844 // All blocks are present
5845 h->cbp_table[mb_xy] = 0x1ef;
5846 h->chroma_pred_mode_table[mb_xy] = 0;
5847 // In deblocking, the quantizer is 0
5848 s->current_picture.qscale_table[mb_xy]= 0;
5849 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5850 // All coeffs are present
5851 memset(h->non_zero_count[mb_xy], 16, 16);
5852 s->current_picture.mb_type[mb_xy]= mb_type;
5856 fill_caches(h, mb_type, 0);
5858 if( IS_INTRA( mb_type ) ) {
5860 if( IS_INTRA4x4( mb_type ) ) {
5861 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5862 mb_type |= MB_TYPE_8x8DCT;
5863 for( i = 0; i < 16; i+=4 ) {
5864 int pred = pred_intra_mode( h, i );
5865 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5866 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5869 for( i = 0; i < 16; i++ ) {
5870 int pred = pred_intra_mode( h, i );
5871 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5873 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5876 write_back_intra_pred_mode(h);
5877 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5879 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5880 if( h->intra16x16_pred_mode < 0 ) return -1;
5882 h->chroma_pred_mode_table[mb_xy] =
5883 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5885 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
5886 if( h->chroma_pred_mode < 0 ) return -1;
5887 } else if( partition_count == 4 ) {
5888 int i, j, sub_partition_count[4], list, ref[2][4];
5890 if( h->slice_type == B_TYPE ) {
5891 for( i = 0; i < 4; i++ ) {
5892 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5893 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5894 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5896 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5897 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5898 pred_direct_motion(h, &mb_type);
5899 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5900 for( i = 0; i < 4; i++ )
5901 if( IS_DIRECT(h->sub_mb_type[i]) )
5902 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5906 for( i = 0; i < 4; i++ ) {
5907 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5908 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5909 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5913 for( list = 0; list < 2; list++ ) {
5914 if( h->ref_count[list] > 0 ) {
5915 for( i = 0; i < 4; i++ ) {
5916 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5917 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5918 if( h->ref_count[list] > 1 )
5919 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5925 h->ref_cache[list][ scan8[4*i]+1 ]=
5926 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5932 dct8x8_allowed = get_dct8x8_allowed(h);
5934 for(list=0; list<2; list++){
5936 if(IS_DIRECT(h->sub_mb_type[i])){
5937 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5940 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5942 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5943 const int sub_mb_type= h->sub_mb_type[i];
5944 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5945 for(j=0; j<sub_partition_count[i]; j++){
5948 const int index= 4*i + block_width*j;
5949 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5950 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5951 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5953 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5954 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5955 tprintf("final mv:%d %d\n", mx, my);
5957 if(IS_SUB_8X8(sub_mb_type)){
5958 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5959 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5960 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5961 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5963 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
5964 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5965 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
5966 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5967 }else if(IS_SUB_8X4(sub_mb_type)){
5968 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5969 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5971 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
5972 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
5973 }else if(IS_SUB_4X8(sub_mb_type)){
5974 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5975 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5977 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
5978 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
5980 assert(IS_SUB_4X4(sub_mb_type));
5981 mv_cache[ 0 ][0]= mx;
5982 mv_cache[ 0 ][1]= my;
5984 mvd_cache[ 0 ][0]= mx - mpx;
5985 mvd_cache[ 0 ][1]= my - mpy;
5989 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5990 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5991 p[0] = p[1] = p[8] = p[9] = 0;
5992 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5996 } else if( IS_DIRECT(mb_type) ) {
5997 pred_direct_motion(h, &mb_type);
5998 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5999 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6000 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6002 int list, mx, my, i, mpx, mpy;
6003 if(IS_16X16(mb_type)){
6004 for(list=0; list<2; list++){
6005 if(IS_DIR(mb_type, 0, list)){
6006 if(h->ref_count[list] > 0 ){
6007 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6008 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6011 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6013 for(list=0; list<2; list++){
6014 if(IS_DIR(mb_type, 0, list)){
6015 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6017 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6018 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6019 tprintf("final mv:%d %d\n", mx, my);
6021 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6022 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6024 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6027 else if(IS_16X8(mb_type)){
6028 for(list=0; list<2; list++){
6029 if(h->ref_count[list]>0){
6031 if(IS_DIR(mb_type, i, list)){
6032 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6033 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6035 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6039 for(list=0; list<2; list++){
6041 if(IS_DIR(mb_type, i, list)){
6042 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6043 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6044 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6045 tprintf("final mv:%d %d\n", mx, my);
6047 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6048 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6050 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6051 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6056 assert(IS_8X16(mb_type));
6057 for(list=0; list<2; list++){
6058 if(h->ref_count[list]>0){
6060 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6061 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6062 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6064 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6068 for(list=0; list<2; list++){
6070 if(IS_DIR(mb_type, i, list)){
6071 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6072 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6073 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6075 tprintf("final mv:%d %d\n", mx, my);
6076 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6077 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6079 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6080 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6087 if( IS_INTER( mb_type ) ) {
6088 h->chroma_pred_mode_table[mb_xy] = 0;
6089 write_back_motion( h, mb_type );
6092 if( !IS_INTRA16x16( mb_type ) ) {
6093 cbp = decode_cabac_mb_cbp_luma( h );
6094 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6097 h->cbp_table[mb_xy] = cbp;
6099 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6100 if( decode_cabac_mb_transform_size( h ) )
6101 mb_type |= MB_TYPE_8x8DCT;
6103 s->current_picture.mb_type[mb_xy]= mb_type;
6105 if( cbp || IS_INTRA16x16( mb_type ) ) {
6106 const uint8_t *scan, *dc_scan;
6109 if(IS_INTERLACED(mb_type)){
6110 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6111 dc_scan= luma_dc_field_scan;
6113 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6114 dc_scan= luma_dc_zigzag_scan;
6117 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6118 if( dqp == INT_MIN ){
6119 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6123 if(((unsigned)s->qscale) > 51){
6124 if(s->qscale<0) s->qscale+= 52;
6125 else s->qscale-= 52;
6127 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6129 if( IS_INTRA16x16( mb_type ) ) {
6131 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6132 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6135 for( i = 0; i < 16; i++ ) {
6136 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6137 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6141 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6145 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6146 if( cbp & (1<<i8x8) ) {
6147 if( IS_8x8DCT(mb_type) ) {
6148 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6149 zigzag_scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6152 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6153 const int index = 4*i8x8 + i4x4;
6154 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6155 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6159 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6160 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6167 for( c = 0; c < 2; c++ ) {
6168 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6169 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6176 for( c = 0; c < 2; c++ ) {
6177 for( i = 0; i < 4; i++ ) {
6178 const int index = 16 + 4 * c + i;
6179 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6180 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6185 uint8_t * const nnz= &h->non_zero_count_cache[0];
6186 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6187 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6190 uint8_t * const nnz= &h->non_zero_count_cache[0];
6191 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6192 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6193 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6196 s->current_picture.qscale_table[mb_xy]= s->qscale;
6197 write_back_non_zero_count(h);
6203 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6205 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6206 const int alpha = alpha_table[index_a];
6207 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6212 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6213 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6215 /* 16px edge length, because bS=4 is triggered by being at
6216 * the edge of an intra MB, so all 4 bS are the same */
6217 for( d = 0; d < 16; d++ ) {
6218 const int p0 = pix[-1];
6219 const int p1 = pix[-2];
6220 const int p2 = pix[-3];
6222 const int q0 = pix[0];
6223 const int q1 = pix[1];
6224 const int q2 = pix[2];
6226 if( ABS( p0 - q0 ) < alpha &&
6227 ABS( p1 - p0 ) < beta &&
6228 ABS( q1 - q0 ) < beta ) {
6230 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6231 if( ABS( p2 - p0 ) < beta)
6233 const int p3 = pix[-4];
6235 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6236 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6237 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6240 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6242 if( ABS( q2 - q0 ) < beta)
6244 const int q3 = pix[3];
6246 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6247 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6248 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6251 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6255 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6256 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6258 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6264 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6266 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6267 const int alpha = alpha_table[index_a];
6268 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6273 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6274 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6276 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6280 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6282 for( i = 0; i < 16; i++, pix += stride) {
6288 int bS_index = (i >> 1);
6289 if (h->mb_field_decoding_flag) {
6291 bS_index |= (i & 1);
6294 if( bS[bS_index] == 0 ) {
6298 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6299 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6300 alpha = alpha_table[index_a];
6301 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6304 if( bS[bS_index] < 4 ) {
6305 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6306 /* 4px edge length */
6307 const int p0 = pix[-1];
6308 const int p1 = pix[-2];
6309 const int p2 = pix[-3];
6310 const int q0 = pix[0];
6311 const int q1 = pix[1];
6312 const int q2 = pix[2];
6314 if( ABS( p0 - q0 ) < alpha &&
6315 ABS( p1 - p0 ) < beta &&
6316 ABS( q1 - q0 ) < beta ) {
6320 if( ABS( p2 - p0 ) < beta ) {
6321 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6324 if( ABS( q2 - q0 ) < beta ) {
6325 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6329 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6330 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6331 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6332 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6335 /* 4px edge length */
6336 const int p0 = pix[-1];
6337 const int p1 = pix[-2];
6338 const int p2 = pix[-3];
6340 const int q0 = pix[0];
6341 const int q1 = pix[1];
6342 const int q2 = pix[2];
6344 if( ABS( p0 - q0 ) < alpha &&
6345 ABS( p1 - p0 ) < beta &&
6346 ABS( q1 - q0 ) < beta ) {
6348 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6349 if( ABS( p2 - p0 ) < beta)
6351 const int p3 = pix[-4];
6353 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6354 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6355 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6358 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6360 if( ABS( q2 - q0 ) < beta)
6362 const int q3 = pix[3];
6364 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6365 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6366 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6369 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6373 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6374 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6376 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6381 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp[2] ) {
6383 for( i = 0; i < 8; i++, pix += stride) {
6391 if( bS[bS_index] == 0 ) {
6395 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6396 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6397 alpha = alpha_table[index_a];
6398 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6399 if( bS[bS_index] < 4 ) {
6400 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6401 /* 2px edge length (because we use same bS than the one for luma) */
6402 const int p0 = pix[-1];
6403 const int p1 = pix[-2];
6404 const int q0 = pix[0];
6405 const int q1 = pix[1];
6407 if( ABS( p0 - q0 ) < alpha &&
6408 ABS( p1 - p0 ) < beta &&
6409 ABS( q1 - q0 ) < beta ) {
6410 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6412 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6413 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6414 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6417 const int p0 = pix[-1];
6418 const int p1 = pix[-2];
6419 const int q0 = pix[0];
6420 const int q1 = pix[1];
6422 if( ABS( p0 - q0 ) < alpha &&
6423 ABS( p1 - p0 ) < beta &&
6424 ABS( q1 - q0 ) < beta ) {
6426 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6427 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6428 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6434 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6436 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6437 const int alpha = alpha_table[index_a];
6438 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6439 const int pix_next = stride;
6444 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6445 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6447 /* 16px edge length, see filter_mb_edgev */
6448 for( d = 0; d < 16; d++ ) {
6449 const int p0 = pix[-1*pix_next];
6450 const int p1 = pix[-2*pix_next];
6451 const int p2 = pix[-3*pix_next];
6452 const int q0 = pix[0];
6453 const int q1 = pix[1*pix_next];
6454 const int q2 = pix[2*pix_next];
6456 if( ABS( p0 - q0 ) < alpha &&
6457 ABS( p1 - p0 ) < beta &&
6458 ABS( q1 - q0 ) < beta ) {
6460 const int p3 = pix[-4*pix_next];
6461 const int q3 = pix[ 3*pix_next];
6463 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6464 if( ABS( p2 - p0 ) < beta) {
6466 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6467 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6468 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6471 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6473 if( ABS( q2 - q0 ) < beta) {
6475 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6476 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6477 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6480 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6484 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6485 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6487 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6494 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6496 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6497 const int alpha = alpha_table[index_a];
6498 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6503 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6504 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6506 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6510 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6511 MpegEncContext * const s = &h->s;
6512 const int mb_xy= mb_x + mb_y*s->mb_stride;
6513 int first_vertical_edge_done = 0;
6515 /* FIXME: A given frame may occupy more than one position in
6516 * the reference list. So ref2frm should be populated with
6517 * frame numbers, not indices. */
6518 static const int ref2frm[18] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6521 // left mb is in picture
6522 && h->slice_table[mb_xy-1] != 255
6523 // and current and left pair do not have the same interlaced type
6524 && (IS_INTERLACED(s->current_picture.mb_type[mb_xy]) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6525 // and left mb is in the same slice if deblocking_filter == 2
6526 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6527 /* First vertical edge is different in MBAFF frames
6528 * There are 8 different bS to compute and 2 different Qp
6535 first_vertical_edge_done = 1;
6536 for( i = 0; i < 8; i++ ) {
6538 int b_idx= 8 + 4 + 8*y;
6539 int bn_idx= b_idx - 1;
6541 int mbn_xy = h->mb_field_decoding_flag ? h->left_mb_xy[i>>2] : h->left_mb_xy[i&1];
6543 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6544 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6546 } else if( h->non_zero_count_cache[b_idx] != 0 ||
6547 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6548 h->non_zero_count_cache[bn_idx] != 0 ) {
6553 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6554 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6555 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6556 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6563 if(bS[0]+bS[1]+bS[2]+bS[3] != 0) {
6564 // Do not use s->qscale as luma quantizer because it has not the same
6565 // value in IPCM macroblocks.
6566 qp[0] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[0]] + 1 ) >> 1;
6567 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6568 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[0]] ) + 1 ) >> 1;
6569 qp[1] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[1]] + 1 ) >> 1;
6570 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6571 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[1]] ) + 1 ) >> 1;
6574 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
6575 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6576 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6577 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
6578 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
6581 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6582 for( dir = 0; dir < 2; dir++ )
6585 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6586 const int mb_type = s->current_picture.mb_type[mb_xy];
6587 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6588 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6590 const int edges = ((mb_type & mbm_type) & (MB_TYPE_16x16|MB_TYPE_SKIP))
6591 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6592 // how often to recheck mv-based bS when iterating between edges
6593 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6594 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6595 // how often to recheck mv-based bS when iterating along each edge
6596 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6598 if (first_vertical_edge_done) {
6600 first_vertical_edge_done = 0;
6603 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6607 for( edge = start; edge < edges; edge++ ) {
6608 /* mbn_xy: neighbor macroblock */
6609 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6610 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6614 if( (edge&1) && IS_8x8DCT(mb_type) )
6617 if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0)
6618 && !IS_INTERLACED(mb_type)
6619 && IS_INTERLACED(mbn_type)
6621 // This is a special case in the norm where the filtering must
6622 // be done twice (one each of the field) even if we are in a
6623 // frame macroblock.
6625 unsigned int tmp_linesize = 2 * linesize;
6626 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6627 int mbn_xy = mb_xy - 2 * s->mb_stride;
6631 if( IS_INTRA(mb_type) ||
6632 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6633 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6636 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6639 // Do not use s->qscale as luma quantizer because it has not the same
6640 // value in IPCM macroblocks.
6641 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6642 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6643 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6644 filter_mb_edgeh( h, &img_y[0], tmp_linesize, bS, qp );
6645 chroma_qp = ( h->chroma_qp +
6646 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6647 filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp );
6648 filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp );
6651 mbn_xy += s->mb_stride;
6652 if( IS_INTRA(mb_type) ||
6653 IS_INTRA(mbn_type) ) {
6654 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6657 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6660 // Do not use s->qscale as luma quantizer because it has not the same
6661 // value in IPCM macroblocks.
6662 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6663 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6664 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6665 filter_mb_edgeh( h, &img_y[linesize], tmp_linesize, bS, qp );
6666 chroma_qp = ( h->chroma_qp +
6667 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6668 filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6669 filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6672 if( IS_INTRA(mb_type) ||
6673 IS_INTRA(mbn_type) ) {
6676 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6677 || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6686 bS[0] = bS[1] = bS[2] = bS[3] = value;
6691 if( edge & mask_edge ) {
6692 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6695 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6696 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6697 int bn_idx= b_idx - (dir ? 8:1);
6699 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6700 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6701 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6702 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4;
6704 bS[0] = bS[1] = bS[2] = bS[3] = v;
6710 for( i = 0; i < 4; i++ ) {
6711 int x = dir == 0 ? edge : i;
6712 int y = dir == 0 ? i : edge;
6713 int b_idx= 8 + 4 + x + 8*y;
6714 int bn_idx= b_idx - (dir ? 8:1);
6716 if( h->non_zero_count_cache[b_idx] != 0 ||
6717 h->non_zero_count_cache[bn_idx] != 0 ) {
6723 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6724 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6725 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6726 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6734 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6739 // Do not use s->qscale as luma quantizer because it has not the same
6740 // value in IPCM macroblocks.
6741 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6742 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6743 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6744 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6746 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6747 if( (edge&1) == 0 ) {
6748 int chroma_qp = ( h->chroma_qp +
6749 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6750 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
6751 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
6754 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6755 if( (edge&1) == 0 ) {
6756 int chroma_qp = ( h->chroma_qp +
6757 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6758 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6759 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6766 static int decode_slice(H264Context *h){
6767 MpegEncContext * const s = &h->s;
6768 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6772 if( h->pps.cabac ) {
6776 align_get_bits( &s->gb );
6779 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
6780 ff_init_cabac_decoder( &h->cabac,
6781 s->gb.buffer + get_bits_count(&s->gb)/8,
6782 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6783 /* calculate pre-state */
6784 for( i= 0; i < 460; i++ ) {
6786 if( h->slice_type == I_TYPE )
6787 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6789 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6792 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6794 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6798 int ret = decode_mb_cabac(h);
6801 if(ret>=0) hl_decode_mb(h);
6803 /* XXX: useless as decode_mb_cabac it doesn't support that ... */
6804 if( ret >= 0 && h->mb_aff_frame ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6807 if(ret>=0) ret = decode_mb_cabac(h);
6809 if(ret>=0) hl_decode_mb(h);
6812 eos = get_cabac_terminate( &h->cabac );
6814 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
6815 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6816 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6820 if( ++s->mb_x >= s->mb_width ) {
6822 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6824 if(h->mb_aff_frame) {
6829 if( eos || s->mb_y >= s->mb_height ) {
6830 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6831 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6838 int ret = decode_mb_cavlc(h);
6840 if(ret>=0) hl_decode_mb(h);
6842 if(ret>=0 && h->mb_aff_frame){ //FIXME optimal? or let mb_decode decode 16x32 ?
6844 ret = decode_mb_cavlc(h);
6846 if(ret>=0) hl_decode_mb(h);
6851 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6852 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6857 if(++s->mb_x >= s->mb_width){
6859 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6861 if(h->mb_aff_frame) {
6864 if(s->mb_y >= s->mb_height){
6865 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6867 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6868 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6872 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6879 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6880 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6881 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6882 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6886 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6895 for(;s->mb_y < s->mb_height; s->mb_y++){
6896 for(;s->mb_x < s->mb_width; s->mb_x++){
6897 int ret= decode_mb(h);
6902 fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6903 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6908 if(++s->mb_x >= s->mb_width){
6910 if(++s->mb_y >= s->mb_height){
6911 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6912 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6916 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6923 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6924 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6925 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6929 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6936 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6939 return -1; //not reached
6942 static int decode_unregistered_user_data(H264Context *h, int size){
6943 MpegEncContext * const s = &h->s;
6944 uint8_t user_data[16+256];
6950 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6951 user_data[i]= get_bits(&s->gb, 8);
6955 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6956 if(e==1 && build>=0)
6957 h->x264_build= build;
6959 if(s->avctx->debug & FF_DEBUG_BUGS)
6960 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6963 skip_bits(&s->gb, 8);
6968 static int decode_sei(H264Context *h){
6969 MpegEncContext * const s = &h->s;
6971 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6976 type+= show_bits(&s->gb, 8);
6977 }while(get_bits(&s->gb, 8) == 255);
6981 size+= show_bits(&s->gb, 8);
6982 }while(get_bits(&s->gb, 8) == 255);
6986 if(decode_unregistered_user_data(h, size) < 0);
6990 skip_bits(&s->gb, 8*size);
6993 //FIXME check bits here
6994 align_get_bits(&s->gb);
7000 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7001 MpegEncContext * const s = &h->s;
7003 cpb_count = get_ue_golomb(&s->gb) + 1;
7004 get_bits(&s->gb, 4); /* bit_rate_scale */
7005 get_bits(&s->gb, 4); /* cpb_size_scale */
7006 for(i=0; i<cpb_count; i++){
7007 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7008 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7009 get_bits1(&s->gb); /* cbr_flag */
7011 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7012 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7013 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7014 get_bits(&s->gb, 5); /* time_offset_length */
7017 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7018 MpegEncContext * const s = &h->s;
7019 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7020 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7022 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7024 if( aspect_ratio_info_present_flag ) {
7025 aspect_ratio_idc= get_bits(&s->gb, 8);
7026 if( aspect_ratio_idc == EXTENDED_SAR ) {
7027 sps->sar.num= get_bits(&s->gb, 16);
7028 sps->sar.den= get_bits(&s->gb, 16);
7029 }else if(aspect_ratio_idc < 16){
7030 sps->sar= pixel_aspect[aspect_ratio_idc];
7032 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7039 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7041 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7042 get_bits1(&s->gb); /* overscan_appropriate_flag */
7045 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7046 get_bits(&s->gb, 3); /* video_format */
7047 get_bits1(&s->gb); /* video_full_range_flag */
7048 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7049 get_bits(&s->gb, 8); /* colour_primaries */
7050 get_bits(&s->gb, 8); /* transfer_characteristics */
7051 get_bits(&s->gb, 8); /* matrix_coefficients */
7055 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7056 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7057 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7060 sps->timing_info_present_flag = get_bits1(&s->gb);
7061 if(sps->timing_info_present_flag){
7062 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7063 sps->time_scale = get_bits_long(&s->gb, 32);
7064 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7067 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7068 if(nal_hrd_parameters_present_flag)
7069 decode_hrd_parameters(h, sps);
7070 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7071 if(vcl_hrd_parameters_present_flag)
7072 decode_hrd_parameters(h, sps);
7073 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7074 get_bits1(&s->gb); /* low_delay_hrd_flag */
7075 get_bits1(&s->gb); /* pic_struct_present_flag */
7077 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7078 if(sps->bitstream_restriction_flag){
7079 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7080 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7081 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7082 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7083 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7084 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7085 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7091 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size, const uint8_t *default_list){
7092 MpegEncContext * const s = &h->s;
7093 int i, last = 8, next = 8;
7094 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7095 if(!get_bits1(&s->gb)) /* matrix not written, we use the default one */
7096 memcpy(factors, default_list, size*sizeof(uint8_t));
7098 for(i=0;i<size;i++){
7100 next = (last + get_se_golomb(&s->gb)) & 0xff;
7101 if(!i && !next){ /* matrix not written, we use the default one */
7102 memcpy(factors, default_list, size*sizeof(uint8_t));
7105 last = factors[scan[i]] = next ? next : last;
7109 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7110 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7111 MpegEncContext * const s = &h->s;
7112 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7113 const uint8_t *fallback[4] = {
7114 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7115 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7116 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7117 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7119 if(get_bits1(&s->gb)){
7120 sps->scaling_matrix_present |= is_sps;
7121 decode_scaling_list(h,scaling_matrix4[0],16,fallback[0]); // Intra, Y
7122 decode_scaling_list(h,scaling_matrix4[1],16,scaling_matrix4[0]); // Intra, Cr
7123 decode_scaling_list(h,scaling_matrix4[2],16,scaling_matrix4[1]); // Intra, Cb
7124 decode_scaling_list(h,scaling_matrix4[3],16,fallback[1]); // Inter, Y
7125 decode_scaling_list(h,scaling_matrix4[4],16,scaling_matrix4[3]); // Inter, Cr
7126 decode_scaling_list(h,scaling_matrix4[5],16,scaling_matrix4[4]); // Inter, Cb
7127 if(is_sps || pps->transform_8x8_mode){
7128 decode_scaling_list(h,scaling_matrix8[0],64,fallback[2]); // Intra, Y
7129 decode_scaling_list(h,scaling_matrix8[1],64,fallback[3]); // Inter, Y
7131 } else if(fallback_sps) {
7132 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7133 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7137 static inline int decode_seq_parameter_set(H264Context *h){
7138 MpegEncContext * const s = &h->s;
7139 int profile_idc, level_idc;
7143 profile_idc= get_bits(&s->gb, 8);
7144 get_bits1(&s->gb); //constraint_set0_flag
7145 get_bits1(&s->gb); //constraint_set1_flag
7146 get_bits1(&s->gb); //constraint_set2_flag
7147 get_bits1(&s->gb); //constraint_set3_flag
7148 get_bits(&s->gb, 4); // reserved
7149 level_idc= get_bits(&s->gb, 8);
7150 sps_id= get_ue_golomb(&s->gb);
7152 sps= &h->sps_buffer[ sps_id ];
7153 sps->profile_idc= profile_idc;
7154 sps->level_idc= level_idc;
7156 if(sps->profile_idc >= 100){ //high profile
7157 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7158 get_bits1(&s->gb); //residual_color_transform_flag
7159 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7160 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7161 sps->transform_bypass = get_bits1(&s->gb);
7162 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7164 sps->scaling_matrix_present = 0;
7166 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7167 sps->poc_type= get_ue_golomb(&s->gb);
7169 if(sps->poc_type == 0){ //FIXME #define
7170 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7171 } else if(sps->poc_type == 1){//FIXME #define
7172 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7173 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7174 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7175 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7177 for(i=0; i<sps->poc_cycle_length; i++)
7178 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7180 if(sps->poc_type > 2){
7181 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7185 sps->ref_frame_count= get_ue_golomb(&s->gb);
7186 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7187 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7189 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7190 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7191 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7192 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7193 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7196 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7197 if(!sps->frame_mbs_only_flag)
7198 sps->mb_aff= get_bits1(&s->gb);
7202 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7204 sps->crop= get_bits1(&s->gb);
7206 sps->crop_left = get_ue_golomb(&s->gb);
7207 sps->crop_right = get_ue_golomb(&s->gb);
7208 sps->crop_top = get_ue_golomb(&s->gb);
7209 sps->crop_bottom= get_ue_golomb(&s->gb);
7210 if(sps->crop_left || sps->crop_top){
7211 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7217 sps->crop_bottom= 0;
7220 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7221 if( sps->vui_parameters_present_flag )
7222 decode_vui_parameters(h, sps);
7224 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7225 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7226 sps_id, sps->profile_idc, sps->level_idc,
7228 sps->ref_frame_count,
7229 sps->mb_width, sps->mb_height,
7230 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7231 sps->direct_8x8_inference_flag ? "8B8" : "",
7232 sps->crop_left, sps->crop_right,
7233 sps->crop_top, sps->crop_bottom,
7234 sps->vui_parameters_present_flag ? "VUI" : ""
7240 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7241 MpegEncContext * const s = &h->s;
7242 int pps_id= get_ue_golomb(&s->gb);
7243 PPS *pps= &h->pps_buffer[pps_id];
7245 pps->sps_id= get_ue_golomb(&s->gb);
7246 pps->cabac= get_bits1(&s->gb);
7247 pps->pic_order_present= get_bits1(&s->gb);
7248 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7249 if(pps->slice_group_count > 1 ){
7250 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7251 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7252 switch(pps->mb_slice_group_map_type){
7255 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7256 | run_length[ i ] |1 |ue(v) |
7261 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7263 | top_left_mb[ i ] |1 |ue(v) |
7264 | bottom_right_mb[ i ] |1 |ue(v) |
7272 | slice_group_change_direction_flag |1 |u(1) |
7273 | slice_group_change_rate_minus1 |1 |ue(v) |
7278 | slice_group_id_cnt_minus1 |1 |ue(v) |
7279 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7281 | slice_group_id[ i ] |1 |u(v) |
7286 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7287 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7288 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7289 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7293 pps->weighted_pred= get_bits1(&s->gb);
7294 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7295 pps->init_qp= get_se_golomb(&s->gb) + 26;
7296 pps->init_qs= get_se_golomb(&s->gb) + 26;
7297 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7298 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7299 pps->constrained_intra_pred= get_bits1(&s->gb);
7300 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7302 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7303 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7305 if(get_bits_count(&s->gb) < bit_length){
7306 pps->transform_8x8_mode= get_bits1(&s->gb);
7307 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7308 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7311 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7312 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7313 pps_id, pps->sps_id,
7314 pps->cabac ? "CABAC" : "CAVLC",
7315 pps->slice_group_count,
7316 pps->ref_count[0], pps->ref_count[1],
7317 pps->weighted_pred ? "weighted" : "",
7318 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7319 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7320 pps->constrained_intra_pred ? "CONSTR" : "",
7321 pps->redundant_pic_cnt_present ? "REDU" : "",
7322 pps->transform_8x8_mode ? "8x8DCT" : ""
7330 * finds the end of the current frame in the bitstream.
7331 * @return the position of the first byte of the next frame, or -1
7333 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7336 ParseContext *pc = &(h->s.parse_context);
7337 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7338 // mb_addr= pc->mb_addr - 1;
7340 for(i=0; i<=buf_size; i++){
7341 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7342 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7343 if(pc->frame_start_found){
7344 // If there isn't one more byte in the buffer
7345 // the test on first_mb_in_slice cannot be done yet
7346 // do it at next call.
7347 if (i >= buf_size) break;
7348 if (buf[i] & 0x80) {
7349 // first_mb_in_slice is 0, probably the first nal of a new
7351 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7353 pc->frame_start_found= 0;
7357 pc->frame_start_found = 1;
7359 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7360 if(pc->frame_start_found){
7362 pc->frame_start_found= 0;
7367 state= (state<<8) | buf[i];
7371 return END_NOT_FOUND;
7374 static int h264_parse(AVCodecParserContext *s,
7375 AVCodecContext *avctx,
7376 uint8_t **poutbuf, int *poutbuf_size,
7377 const uint8_t *buf, int buf_size)
7379 H264Context *h = s->priv_data;
7380 ParseContext *pc = &h->s.parse_context;
7383 next= find_frame_end(h, buf, buf_size);
7385 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
7391 *poutbuf = (uint8_t *)buf;
7392 *poutbuf_size = buf_size;
7396 static int h264_split(AVCodecContext *avctx,
7397 const uint8_t *buf, int buf_size)
7400 uint32_t state = -1;
7403 for(i=0; i<=buf_size; i++){
7404 if((state&0xFFFFFF1F) == 0x107)
7406 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7408 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
7410 while(i>4 && buf[i-5]==0) i--;
7415 state= (state<<8) | buf[i];
7421 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7422 MpegEncContext * const s = &h->s;
7423 AVCodecContext * const avctx= s->avctx;
7427 for(i=0; i<50; i++){
7428 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7432 s->current_picture_ptr= NULL;
7441 if(buf_index >= buf_size) break;
7443 for(i = 0; i < h->nal_length_size; i++)
7444 nalsize = (nalsize << 8) | buf[buf_index++];
7446 // start code prefix search
7447 for(; buf_index + 3 < buf_size; buf_index++){
7448 // this should allways succeed in the first iteration
7449 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7453 if(buf_index+3 >= buf_size) break;
7458 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7459 if(ptr[dst_length - 1] == 0) dst_length--;
7460 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
7462 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7463 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7466 if (h->is_avc && (nalsize != consumed))
7467 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7469 buf_index += consumed;
7471 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
7472 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7475 switch(h->nal_unit_type){
7477 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7479 init_get_bits(&s->gb, ptr, bit_length);
7481 h->inter_gb_ptr= &s->gb;
7482 s->data_partitioning = 0;
7484 if(decode_slice_header(h) < 0){
7485 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7488 if(h->redundant_pic_count==0 && s->hurry_up < 5
7489 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7490 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7491 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7492 && avctx->skip_frame < AVDISCARD_ALL)
7496 init_get_bits(&s->gb, ptr, bit_length);
7498 h->inter_gb_ptr= NULL;
7499 s->data_partitioning = 1;
7501 if(decode_slice_header(h) < 0){
7502 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7506 init_get_bits(&h->intra_gb, ptr, bit_length);
7507 h->intra_gb_ptr= &h->intra_gb;
7510 init_get_bits(&h->inter_gb, ptr, bit_length);
7511 h->inter_gb_ptr= &h->inter_gb;
7513 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
7515 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7516 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7517 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7518 && avctx->skip_frame < AVDISCARD_ALL)
7522 init_get_bits(&s->gb, ptr, bit_length);
7526 init_get_bits(&s->gb, ptr, bit_length);
7527 decode_seq_parameter_set(h);
7529 if(s->flags& CODEC_FLAG_LOW_DELAY)
7532 if(avctx->has_b_frames < 2)
7533 avctx->has_b_frames= !s->low_delay;
7536 init_get_bits(&s->gb, ptr, bit_length);
7538 decode_picture_parameter_set(h, bit_length);
7542 case NAL_END_SEQUENCE:
7543 case NAL_END_STREAM:
7544 case NAL_FILLER_DATA:
7546 case NAL_AUXILIARY_SLICE:
7549 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
7553 if(!s->current_picture_ptr) return buf_index; //no frame
7555 s->current_picture_ptr->pict_type= s->pict_type;
7556 s->current_picture_ptr->key_frame= s->pict_type == I_TYPE && h->nal_unit_type == NAL_IDR_SLICE;
7558 h->prev_frame_num_offset= h->frame_num_offset;
7559 h->prev_frame_num= h->frame_num;
7560 if(s->current_picture_ptr->reference){
7561 h->prev_poc_msb= h->poc_msb;
7562 h->prev_poc_lsb= h->poc_lsb;
7564 if(s->current_picture_ptr->reference)
7565 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7575 * returns the number of bytes consumed for building the current frame
7577 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7578 if(s->flags&CODEC_FLAG_TRUNCATED){
7579 pos -= s->parse_context.last_index;
7580 if(pos<0) pos=0; // FIXME remove (unneeded?)
7584 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
7585 if(pos+10>buf_size) pos=buf_size; // oops ;)
7591 static int decode_frame(AVCodecContext *avctx,
7592 void *data, int *data_size,
7593 uint8_t *buf, int buf_size)
7595 H264Context *h = avctx->priv_data;
7596 MpegEncContext *s = &h->s;
7597 AVFrame *pict = data;
7600 s->flags= avctx->flags;
7601 s->flags2= avctx->flags2;
7603 /* no supplementary picture */
7604 if (buf_size == 0) {
7608 if(s->flags&CODEC_FLAG_TRUNCATED){
7609 int next= find_frame_end(h, buf, buf_size);
7611 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7613 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7616 if(h->is_avc && !h->got_avcC) {
7617 int i, cnt, nalsize;
7618 unsigned char *p = avctx->extradata;
7619 if(avctx->extradata_size < 7) {
7620 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7624 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7627 /* sps and pps in the avcC always have length coded with 2 bytes,
7628 so put a fake nal_length_size = 2 while parsing them */
7629 h->nal_length_size = 2;
7630 // Decode sps from avcC
7631 cnt = *(p+5) & 0x1f; // Number of sps
7633 for (i = 0; i < cnt; i++) {
7634 nalsize = BE_16(p) + 2;
7635 if(decode_nal_units(h, p, nalsize) < 0) {
7636 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7641 // Decode pps from avcC
7642 cnt = *(p++); // Number of pps
7643 for (i = 0; i < cnt; i++) {
7644 nalsize = BE_16(p) + 2;
7645 if(decode_nal_units(h, p, nalsize) != nalsize) {
7646 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7651 // Now store right nal length size, that will be use to parse all other nals
7652 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7653 // Do not reparse avcC
7657 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
7658 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7662 buf_index=decode_nal_units(h, buf, buf_size);
7666 //FIXME do something with unavailable reference frames
7668 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
7669 if(!s->current_picture_ptr){
7670 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
7675 Picture *out = s->current_picture_ptr;
7676 #if 0 //decode order
7677 *data_size = sizeof(AVFrame);
7679 /* Sort B-frames into display order */
7680 Picture *cur = s->current_picture_ptr;
7681 Picture *prev = h->delayed_output_pic;
7686 int dropped_frame = 0;
7689 if(h->sps.bitstream_restriction_flag
7690 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7691 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7695 while(h->delayed_pic[pics]) pics++;
7696 h->delayed_pic[pics++] = cur;
7697 if(cur->reference == 0)
7700 for(i=0; h->delayed_pic[i]; i++)
7701 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7704 out = h->delayed_pic[0];
7705 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7706 if(h->delayed_pic[i]->poc < out->poc){
7707 out = h->delayed_pic[i];
7711 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7712 if(prev && pics <= s->avctx->has_b_frames)
7714 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7716 ((!cross_idr && prev && out->poc > prev->poc + 2)
7717 || cur->pict_type == B_TYPE)))
7720 s->avctx->has_b_frames++;
7723 else if(out_of_order)
7726 if(out_of_order || pics > s->avctx->has_b_frames){
7727 dropped_frame = (out != h->delayed_pic[out_idx]);
7728 for(i=out_idx; h->delayed_pic[i]; i++)
7729 h->delayed_pic[i] = h->delayed_pic[i+1];
7732 if(prev == out && !dropped_frame)
7735 *data_size = sizeof(AVFrame);
7736 if(prev && prev != out && prev->reference == 1)
7737 prev->reference = 0;
7738 h->delayed_output_pic = out;
7742 *pict= *(AVFrame*)out;
7744 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7747 assert(pict->data[0] || !*data_size);
7748 ff_print_debug_info(s, pict);
7749 //printf("out %d\n", (int)pict->data[0]);
7752 /* Return the Picture timestamp as the frame number */
7753 /* we substract 1 because it is added on utils.c */
7754 avctx->frame_number = s->picture_number - 1;
7756 return get_consumed_bytes(s, buf_index, buf_size);
7759 static inline void fill_mb_avail(H264Context *h){
7760 MpegEncContext * const s = &h->s;
7761 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7764 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7765 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7766 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7772 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7773 h->mb_avail[4]= 1; //FIXME move out
7774 h->mb_avail[5]= 0; //FIXME move out
7780 #define SIZE (COUNT*40)
7786 // int int_temp[10000];
7788 AVCodecContext avctx;
7790 dsputil_init(&dsp, &avctx);
7792 init_put_bits(&pb, temp, SIZE);
7793 printf("testing unsigned exp golomb\n");
7794 for(i=0; i<COUNT; i++){
7796 set_ue_golomb(&pb, i);
7797 STOP_TIMER("set_ue_golomb");
7799 flush_put_bits(&pb);
7801 init_get_bits(&gb, temp, 8*SIZE);
7802 for(i=0; i<COUNT; i++){
7805 s= show_bits(&gb, 24);
7808 j= get_ue_golomb(&gb);
7810 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7813 STOP_TIMER("get_ue_golomb");
7817 init_put_bits(&pb, temp, SIZE);
7818 printf("testing signed exp golomb\n");
7819 for(i=0; i<COUNT; i++){
7821 set_se_golomb(&pb, i - COUNT/2);
7822 STOP_TIMER("set_se_golomb");
7824 flush_put_bits(&pb);
7826 init_get_bits(&gb, temp, 8*SIZE);
7827 for(i=0; i<COUNT; i++){
7830 s= show_bits(&gb, 24);
7833 j= get_se_golomb(&gb);
7834 if(j != i - COUNT/2){
7835 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7838 STOP_TIMER("get_se_golomb");
7841 printf("testing 4x4 (I)DCT\n");
7844 uint8_t src[16], ref[16];
7845 uint64_t error= 0, max_error=0;
7847 for(i=0; i<COUNT; i++){
7849 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7850 for(j=0; j<16; j++){
7851 ref[j]= random()%255;
7852 src[j]= random()%255;
7855 h264_diff_dct_c(block, src, ref, 4);
7858 for(j=0; j<16; j++){
7859 // printf("%d ", block[j]);
7860 block[j]= block[j]*4;
7861 if(j&1) block[j]= (block[j]*4 + 2)/5;
7862 if(j&4) block[j]= (block[j]*4 + 2)/5;
7866 s->dsp.h264_idct_add(ref, block, 4);
7867 /* for(j=0; j<16; j++){
7868 printf("%d ", ref[j]);
7872 for(j=0; j<16; j++){
7873 int diff= ABS(src[j] - ref[j]);
7876 max_error= FFMAX(max_error, diff);
7879 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7881 printf("testing quantizer\n");
7882 for(qp=0; qp<52; qp++){
7884 src1_block[i]= src2_block[i]= random()%255;
7888 printf("Testing NAL layer\n");
7890 uint8_t bitstream[COUNT];
7891 uint8_t nal[COUNT*2];
7893 memset(&h, 0, sizeof(H264Context));
7895 for(i=0; i<COUNT; i++){
7903 for(j=0; j<COUNT; j++){
7904 bitstream[j]= (random() % 255) + 1;
7907 for(j=0; j<zeros; j++){
7908 int pos= random() % COUNT;
7909 while(bitstream[pos] == 0){
7918 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7920 printf("encoding failed\n");
7924 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7928 if(out_length != COUNT){
7929 printf("incorrect length %d %d\n", out_length, COUNT);
7933 if(consumed != nal_length){
7934 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7938 if(memcmp(bitstream, out, COUNT)){
7939 printf("missmatch\n");
7944 printf("Testing RBSP\n");
7952 static int decode_end(AVCodecContext *avctx)
7954 H264Context *h = avctx->priv_data;
7955 MpegEncContext *s = &h->s;
7957 av_freep(&h->rbsp_buffer);
7958 free_tables(h); //FIXME cleanup init stuff perhaps
7961 // memset(h, 0, sizeof(H264Context));
7967 AVCodec h264_decoder = {
7971 sizeof(H264Context),
7976 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
7980 AVCodecParser h264_parser = {
7982 sizeof(H264Context),