2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
58 * Sequence parameter set
64 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
65 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
66 int poc_type; ///< pic_order_cnt_type
67 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
68 int delta_pic_order_always_zero_flag;
69 int offset_for_non_ref_pic;
70 int offset_for_top_to_bottom_field;
71 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
72 int ref_frame_count; ///< num_ref_frames
73 int gaps_in_frame_num_allowed_flag;
74 int mb_width; ///< frame_width_in_mbs_minus1 + 1
75 int mb_height; ///< frame_height_in_mbs_minus1 + 1
76 int frame_mbs_only_flag;
77 int mb_aff; ///<mb_adaptive_frame_field_flag
78 int direct_8x8_inference_flag;
79 int crop; ///< frame_cropping_flag
80 int crop_left; ///< frame_cropping_rect_left_offset
81 int crop_right; ///< frame_cropping_rect_right_offset
82 int crop_top; ///< frame_cropping_rect_top_offset
83 int crop_bottom; ///< frame_cropping_rect_bottom_offset
84 int vui_parameters_present_flag;
86 int timing_info_present_flag;
87 uint32_t num_units_in_tick;
89 int fixed_frame_rate_flag;
90 short offset_for_ref_frame[256]; //FIXME dyn aloc?
91 int bitstream_restriction_flag;
92 int num_reorder_frames;
93 int scaling_matrix_present;
94 uint8_t scaling_matrix4[6][16];
95 uint8_t scaling_matrix8[2][64];
99 * Picture parameter set
103 int cabac; ///< entropy_coding_mode_flag
104 int pic_order_present; ///< pic_order_present_flag
105 int slice_group_count; ///< num_slice_groups_minus1 + 1
106 int mb_slice_group_map_type;
107 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
108 int weighted_pred; ///< weighted_pred_flag
109 int weighted_bipred_idc;
110 int init_qp; ///< pic_init_qp_minus26 + 26
111 int init_qs; ///< pic_init_qs_minus26 + 26
112 int chroma_qp_index_offset;
113 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
114 int constrained_intra_pred; ///< constrained_intra_pred_flag
115 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
116 int transform_8x8_mode; ///< transform_8x8_mode_flag
117 uint8_t scaling_matrix4[6][16];
118 uint8_t scaling_matrix8[2][64];
122 * Memory management control operation opcode.
124 typedef enum MMCOOpcode{
135 * Memory management control operation.
146 typedef struct H264Context{
154 #define NAL_IDR_SLICE 5
159 #define NAL_END_SEQUENCE 10
160 #define NAL_END_STREAM 11
161 #define NAL_FILLER_DATA 12
162 #define NAL_SPS_EXT 13
163 #define NAL_AUXILIARY_SLICE 19
164 uint8_t *rbsp_buffer;
165 unsigned int rbsp_buffer_size;
168 * Used to parse AVC variant of h264
170 int is_avc; ///< this flag is != 0 if codec is avc1
171 int got_avcC; ///< flag used to parse avcC data only once
172 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
176 int prev_mb_skipped; //FIXME remove (IMHO not used)
179 int chroma_pred_mode;
180 int intra16x16_pred_mode;
185 int8_t intra4x4_pred_mode_cache[5*8];
186 int8_t (*intra4x4_pred_mode)[8];
187 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
188 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
189 void (*pred8x8 [4+3])(uint8_t *src, int stride);
190 void (*pred16x16[4+3])(uint8_t *src, int stride);
191 unsigned int topleft_samples_available;
192 unsigned int top_samples_available;
193 unsigned int topright_samples_available;
194 unsigned int left_samples_available;
195 uint8_t (*top_borders[2])[16+2*8];
196 uint8_t left_border[2*(17+2*9)];
199 * non zero coeff count cache.
200 * is 64 if not available.
202 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
203 uint8_t (*non_zero_count)[16];
206 * Motion vector cache.
208 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
209 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
210 #define LIST_NOT_USED -1 //FIXME rename?
211 #define PART_NOT_AVAILABLE -2
214 * is 1 if the specific list MV&references are set to 0,0,-2.
216 int mv_cache_clean[2];
219 * number of neighbors (top and/or left) that used 8x8 dct
221 int neighbor_transform_size;
224 * block_offset[ 0..23] for frame macroblocks
225 * block_offset[24..47] for field macroblocks
227 int block_offset[2*(16+8)];
229 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
231 int b_stride; //FIXME use s->b4_stride
237 int unknown_svq3_flag;
238 int next_slice_index;
240 SPS sps_buffer[MAX_SPS_COUNT];
241 SPS sps; ///< current sps
243 PPS pps_buffer[MAX_PPS_COUNT];
247 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
249 uint32_t dequant4_buffer[6][52][16];
250 uint32_t dequant8_buffer[2][52][64];
251 uint32_t (*dequant4_coeff[6])[16];
252 uint32_t (*dequant8_coeff[2])[64];
253 int dequant_coeff_pps; ///< reinit tables when pps changes
256 uint8_t *slice_table_base;
257 uint8_t *slice_table; ///< slice_table_base + mb_stride + 1
259 int slice_type_fixed;
261 //interlacing specific flags
263 int mb_field_decoding_flag;
270 int delta_poc_bottom;
273 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
274 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
275 int frame_num_offset; ///< for POC type 2
276 int prev_frame_num_offset; ///< for POC type 2
277 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
280 * frame_num for frames or 2*frame_num for field pics.
285 * max_frame_num or 2*max_frame_num for field pics.
289 //Weighted pred stuff
291 int use_weight_chroma;
292 int luma_log2_weight_denom;
293 int chroma_log2_weight_denom;
294 int luma_weight[2][16];
295 int luma_offset[2][16];
296 int chroma_weight[2][16][2];
297 int chroma_offset[2][16][2];
298 int implicit_weight[16][16];
301 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
302 int slice_alpha_c0_offset;
303 int slice_beta_offset;
305 int redundant_pic_count;
307 int direct_spatial_mv_pred;
308 int dist_scale_factor[16];
309 int map_col_to_list0[2][16];
312 * num_ref_idx_l0/1_active_minus1 + 1
314 int ref_count[2];// FIXME split for AFF
315 Picture *short_ref[32];
316 Picture *long_ref[32];
317 Picture default_ref_list[2][32];
318 Picture ref_list[2][32]; //FIXME size?
319 Picture field_ref_list[2][32]; //FIXME size?
320 Picture *delayed_pic[16]; //FIXME size?
321 Picture *delayed_output_pic;
324 * memory management control operations buffer.
326 MMCO mmco[MAX_MMCO_COUNT];
329 int long_ref_count; ///< number of actual long term references
330 int short_ref_count; ///< number of actual short term references
333 GetBitContext intra_gb;
334 GetBitContext inter_gb;
335 GetBitContext *intra_gb_ptr;
336 GetBitContext *inter_gb_ptr;
338 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
344 uint8_t cabac_state[460];
347 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
351 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
352 uint8_t *chroma_pred_mode_table;
353 int last_qscale_diff;
354 int16_t (*mvd_table[2])[2];
355 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
356 uint8_t *direct_table;
357 uint8_t direct_cache[5*8];
359 uint8_t zigzag_scan[16];
360 uint8_t field_scan[16];
361 const uint8_t *zigzag_scan_q0;
362 const uint8_t *field_scan_q0;
367 static VLC coeff_token_vlc[4];
368 static VLC chroma_dc_coeff_token_vlc;
370 static VLC total_zeros_vlc[15];
371 static VLC chroma_dc_total_zeros_vlc[3];
373 static VLC run_vlc[6];
376 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
377 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
378 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
380 static always_inline uint32_t pack16to32(int a, int b){
381 #ifdef WORDS_BIGENDIAN
382 return (b&0xFFFF) + (a<<16);
384 return (a&0xFFFF) + (b<<16);
390 * @param h height of the rectangle, should be a constant
391 * @param w width of the rectangle, should be a constant
392 * @param size the size of val (1 or 4), should be a constant
394 static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
395 uint8_t *p= (uint8_t*)vp;
396 assert(size==1 || size==4);
401 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
402 assert((stride&(w-1))==0);
403 //FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it
406 *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
407 }else if(w==2 && h==4){
408 *(uint16_t*)(p + 0*stride)=
409 *(uint16_t*)(p + 1*stride)=
410 *(uint16_t*)(p + 2*stride)=
411 *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
412 }else if(w==4 && h==1){
413 *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
414 }else if(w==4 && h==2){
415 *(uint32_t*)(p + 0*stride)=
416 *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
417 }else if(w==4 && h==4){
418 *(uint32_t*)(p + 0*stride)=
419 *(uint32_t*)(p + 1*stride)=
420 *(uint32_t*)(p + 2*stride)=
421 *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
422 }else if(w==8 && h==1){
424 *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
425 }else if(w==8 && h==2){
426 *(uint32_t*)(p + 0 + 0*stride)=
427 *(uint32_t*)(p + 4 + 0*stride)=
428 *(uint32_t*)(p + 0 + 1*stride)=
429 *(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101;
430 }else if(w==8 && h==4){
431 *(uint64_t*)(p + 0*stride)=
432 *(uint64_t*)(p + 1*stride)=
433 *(uint64_t*)(p + 2*stride)=
434 *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
435 }else if(w==16 && h==2){
436 *(uint64_t*)(p + 0+0*stride)=
437 *(uint64_t*)(p + 8+0*stride)=
438 *(uint64_t*)(p + 0+1*stride)=
439 *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
440 }else if(w==16 && h==4){
441 *(uint64_t*)(p + 0+0*stride)=
442 *(uint64_t*)(p + 8+0*stride)=
443 *(uint64_t*)(p + 0+1*stride)=
444 *(uint64_t*)(p + 8+1*stride)=
445 *(uint64_t*)(p + 0+2*stride)=
446 *(uint64_t*)(p + 8+2*stride)=
447 *(uint64_t*)(p + 0+3*stride)=
448 *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
453 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
454 MpegEncContext * const s = &h->s;
455 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
456 int topleft_xy, top_xy, topright_xy, left_xy[2];
457 int topleft_type, top_type, topright_type, left_type[2];
461 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
462 // the actual condition is whether we're on the edge of a slice,
463 // and even then the intra and nnz parts are unnecessary.
464 if(for_deblock && h->slice_num == 1)
467 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
469 top_xy = mb_xy - s->mb_stride;
470 topleft_xy = top_xy - 1;
471 topright_xy= top_xy + 1;
472 left_xy[1] = left_xy[0] = mb_xy-1;
482 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
483 const int top_pair_xy = pair_xy - s->mb_stride;
484 const int topleft_pair_xy = top_pair_xy - 1;
485 const int topright_pair_xy = top_pair_xy + 1;
486 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
487 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
488 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
489 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
490 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
491 const int bottom = (s->mb_y & 1);
492 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
494 ? !curr_mb_frame_flag // bottom macroblock
495 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
497 top_xy -= s->mb_stride;
500 ? !curr_mb_frame_flag // bottom macroblock
501 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
503 topleft_xy -= s->mb_stride;
506 ? !curr_mb_frame_flag // bottom macroblock
507 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
509 topright_xy -= s->mb_stride;
511 if (left_mb_frame_flag != curr_mb_frame_flag) {
512 left_xy[1] = left_xy[0] = pair_xy - 1;
513 if (curr_mb_frame_flag) {
534 left_xy[1] += s->mb_stride;
547 h->top_mb_xy = top_xy;
548 h->left_mb_xy[0] = left_xy[0];
549 h->left_mb_xy[1] = left_xy[1];
551 topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
552 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
553 topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
554 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
555 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
557 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
558 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
559 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
560 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
561 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
564 if(IS_INTRA(mb_type)){
565 h->topleft_samples_available=
566 h->top_samples_available=
567 h->left_samples_available= 0xFFFF;
568 h->topright_samples_available= 0xEEEA;
570 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
571 h->topleft_samples_available= 0xB3FF;
572 h->top_samples_available= 0x33FF;
573 h->topright_samples_available= 0x26EA;
576 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
577 h->topleft_samples_available&= 0xDF5F;
578 h->left_samples_available&= 0x5F5F;
582 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
583 h->topleft_samples_available&= 0x7FFF;
585 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
586 h->topright_samples_available&= 0xFBFF;
588 if(IS_INTRA4x4(mb_type)){
589 if(IS_INTRA4x4(top_type)){
590 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
591 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
592 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
593 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
596 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
601 h->intra4x4_pred_mode_cache[4+8*0]=
602 h->intra4x4_pred_mode_cache[5+8*0]=
603 h->intra4x4_pred_mode_cache[6+8*0]=
604 h->intra4x4_pred_mode_cache[7+8*0]= pred;
607 if(IS_INTRA4x4(left_type[i])){
608 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
609 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
612 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
617 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
618 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
633 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
635 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
636 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
637 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
638 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
640 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
641 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
643 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
644 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
647 h->non_zero_count_cache[4+8*0]=
648 h->non_zero_count_cache[5+8*0]=
649 h->non_zero_count_cache[6+8*0]=
650 h->non_zero_count_cache[7+8*0]=
652 h->non_zero_count_cache[1+8*0]=
653 h->non_zero_count_cache[2+8*0]=
655 h->non_zero_count_cache[1+8*3]=
656 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
660 for (i=0; i<2; i++) {
662 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
663 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
664 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
665 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
667 h->non_zero_count_cache[3+8*1 + 2*8*i]=
668 h->non_zero_count_cache[3+8*2 + 2*8*i]=
669 h->non_zero_count_cache[0+8*1 + 8*i]=
670 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
677 h->top_cbp = h->cbp_table[top_xy];
678 } else if(IS_INTRA(mb_type)) {
685 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
686 } else if(IS_INTRA(mb_type)) {
692 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
695 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
700 //FIXME direct mb can skip much of this
701 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
703 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
704 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
705 /*if(!h->mv_cache_clean[list]){
706 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
707 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
708 h->mv_cache_clean[list]= 1;
712 h->mv_cache_clean[list]= 0;
714 if(IS_INTER(top_type)){
715 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
716 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
717 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
718 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
719 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
720 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
721 h->ref_cache[list][scan8[0] + 0 - 1*8]=
722 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
723 h->ref_cache[list][scan8[0] + 2 - 1*8]=
724 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
726 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
727 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
728 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
729 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
730 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
733 //FIXME unify cleanup or sth
734 if(IS_INTER(left_type[0])){
735 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
736 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
737 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
738 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
739 h->ref_cache[list][scan8[0] - 1 + 0*8]=
740 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
742 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
743 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
744 h->ref_cache[list][scan8[0] - 1 + 0*8]=
745 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
748 if(IS_INTER(left_type[1])){
749 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
750 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
751 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
752 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
753 h->ref_cache[list][scan8[0] - 1 + 2*8]=
754 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
756 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
757 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
758 h->ref_cache[list][scan8[0] - 1 + 2*8]=
759 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
760 assert((!left_type[0]) == (!left_type[1]));
763 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
766 if(IS_INTER(topleft_type)){
767 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
768 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
769 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
770 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
772 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
773 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
776 if(IS_INTER(topright_type)){
777 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
778 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
779 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
780 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
782 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
783 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
787 h->ref_cache[list][scan8[5 ]+1] =
788 h->ref_cache[list][scan8[7 ]+1] =
789 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
790 h->ref_cache[list][scan8[4 ]] =
791 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
792 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
793 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
794 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
795 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
796 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
799 /* XXX beurk, Load mvd */
800 if(IS_INTER(topleft_type)){
801 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
802 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy];
804 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0;
807 if(IS_INTER(top_type)){
808 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
809 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
810 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
811 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
812 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
814 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
815 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
816 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
817 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
819 if(IS_INTER(left_type[0])){
820 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
821 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
822 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
824 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
825 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
827 if(IS_INTER(left_type[1])){
828 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
829 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
830 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
832 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
833 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
835 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
836 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
837 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
838 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
839 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
841 if(h->slice_type == B_TYPE){
842 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
844 if(IS_DIRECT(top_type)){
845 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
846 }else if(IS_8X8(top_type)){
847 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
848 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
849 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
851 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
855 if(IS_DIRECT(left_type[0])){
856 h->direct_cache[scan8[0] - 1 + 0*8]=
857 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
858 }else if(IS_8X8(left_type[0])){
859 int b8_xy = h->mb2b8_xy[left_xy[0]] + 1;
860 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[b8_xy];
861 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[b8_xy + h->b8_stride];
863 h->direct_cache[scan8[0] - 1 + 0*8]=
864 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
872 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
875 static inline void write_back_intra_pred_mode(H264Context *h){
876 MpegEncContext * const s = &h->s;
877 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
879 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
880 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
881 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
882 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
883 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
884 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
885 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
889 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
891 static inline int check_intra4x4_pred_mode(H264Context *h){
892 MpegEncContext * const s = &h->s;
893 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
894 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
897 if(!(h->top_samples_available&0x8000)){
899 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
901 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
904 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
909 if(!(h->left_samples_available&0x8000)){
911 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
913 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
916 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
922 } //FIXME cleanup like next
925 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
927 static inline int check_intra_pred_mode(H264Context *h, int mode){
928 MpegEncContext * const s = &h->s;
929 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
930 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
932 if(mode < 0 || mode > 6) {
933 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
937 if(!(h->top_samples_available&0x8000)){
940 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
945 if(!(h->left_samples_available&0x8000)){
948 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
957 * gets the predicted intra4x4 prediction mode.
959 static inline int pred_intra_mode(H264Context *h, int n){
960 const int index8= scan8[n];
961 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
962 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
963 const int min= FFMIN(left, top);
965 tprintf("mode:%d %d min:%d\n", left ,top, min);
967 if(min<0) return DC_PRED;
971 static inline void write_back_non_zero_count(H264Context *h){
972 MpegEncContext * const s = &h->s;
973 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
975 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
976 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
977 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
978 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
979 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
980 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
981 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
983 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
984 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
985 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
987 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
988 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
989 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
993 * gets the predicted number of non zero coefficients.
994 * @param n block index
996 static inline int pred_non_zero_count(H264Context *h, int n){
997 const int index8= scan8[n];
998 const int left= h->non_zero_count_cache[index8 - 1];
999 const int top = h->non_zero_count_cache[index8 - 8];
1002 if(i<64) i= (i+1)>>1;
1004 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1009 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1010 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1012 if(topright_ref != PART_NOT_AVAILABLE){
1013 *C= h->mv_cache[list][ i - 8 + part_width ];
1014 return topright_ref;
1016 tprintf("topright MV not available\n");
1018 *C= h->mv_cache[list][ i - 8 - 1 ];
1019 return h->ref_cache[list][ i - 8 - 1 ];
1024 * gets the predicted MV.
1025 * @param n the block index
1026 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1027 * @param mx the x component of the predicted motion vector
1028 * @param my the y component of the predicted motion vector
1030 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1031 const int index8= scan8[n];
1032 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1033 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1034 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1035 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1037 int diagonal_ref, match_count;
1039 assert(part_width==1 || part_width==2 || part_width==4);
1049 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1050 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1051 tprintf("pred_motion match_count=%d\n", match_count);
1052 if(match_count > 1){ //most common
1053 *mx= mid_pred(A[0], B[0], C[0]);
1054 *my= mid_pred(A[1], B[1], C[1]);
1055 }else if(match_count==1){
1059 }else if(top_ref==ref){
1067 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1071 *mx= mid_pred(A[0], B[0], C[0]);
1072 *my= mid_pred(A[1], B[1], C[1]);
1076 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1080 * gets the directionally predicted 16x8 MV.
1081 * @param n the block index
1082 * @param mx the x component of the predicted motion vector
1083 * @param my the y component of the predicted motion vector
1085 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1087 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1088 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1090 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1098 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1099 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1101 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1103 if(left_ref == ref){
1111 pred_motion(h, n, 4, list, ref, mx, my);
1115 * gets the directionally predicted 8x16 MV.
1116 * @param n the block index
1117 * @param mx the x component of the predicted motion vector
1118 * @param my the y component of the predicted motion vector
1120 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1122 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1123 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1125 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1127 if(left_ref == ref){
1136 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1138 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1140 if(diagonal_ref == ref){
1148 pred_motion(h, n, 2, list, ref, mx, my);
1151 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1152 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1153 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1155 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1157 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1158 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1159 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1165 pred_motion(h, 0, 4, 0, 0, mx, my);
1170 static inline void direct_dist_scale_factor(H264Context * const h){
1171 const int poc = h->s.current_picture_ptr->poc;
1172 const int poc1 = h->ref_list[1][0].poc;
1174 for(i=0; i<h->ref_count[0]; i++){
1175 int poc0 = h->ref_list[0][i].poc;
1176 int td = clip(poc1 - poc0, -128, 127);
1177 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1178 h->dist_scale_factor[i] = 256;
1180 int tb = clip(poc - poc0, -128, 127);
1181 int tx = (16384 + (ABS(td) >> 1)) / td;
1182 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1186 static inline void direct_ref_list_init(H264Context * const h){
1187 MpegEncContext * const s = &h->s;
1188 Picture * const ref1 = &h->ref_list[1][0];
1189 Picture * const cur = s->current_picture_ptr;
1191 if(cur->pict_type == I_TYPE)
1192 cur->ref_count[0] = 0;
1193 if(cur->pict_type != B_TYPE)
1194 cur->ref_count[1] = 0;
1195 for(list=0; list<2; list++){
1196 cur->ref_count[list] = h->ref_count[list];
1197 for(j=0; j<h->ref_count[list]; j++)
1198 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1200 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1202 for(list=0; list<2; list++){
1203 for(i=0; i<ref1->ref_count[list]; i++){
1204 const int poc = ref1->ref_poc[list][i];
1205 h->map_col_to_list0[list][i] = PART_NOT_AVAILABLE;
1206 for(j=0; j<h->ref_count[list]; j++)
1207 if(h->ref_list[list][j].poc == poc){
1208 h->map_col_to_list0[list][i] = j;
1215 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1216 MpegEncContext * const s = &h->s;
1217 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1218 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1219 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1220 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1221 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1222 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1223 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1224 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1225 const int is_b8x8 = IS_8X8(*mb_type);
1229 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1230 /* FIXME save sub mb types from previous frames (or derive from MVs)
1231 * so we know exactly what block size to use */
1232 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1233 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1234 }else if(!is_b8x8 && (IS_16X16(mb_type_col) || IS_INTRA(mb_type_col))){
1235 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1236 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1238 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1239 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1242 *mb_type |= MB_TYPE_DIRECT2;
1244 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1246 if(h->direct_spatial_mv_pred){
1251 /* ref = min(neighbors) */
1252 for(list=0; list<2; list++){
1253 int refa = h->ref_cache[list][scan8[0] - 1];
1254 int refb = h->ref_cache[list][scan8[0] - 8];
1255 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1257 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1259 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1261 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1267 if(ref[0] < 0 && ref[1] < 0){
1268 ref[0] = ref[1] = 0;
1269 mv[0][0] = mv[0][1] =
1270 mv[1][0] = mv[1][1] = 0;
1272 for(list=0; list<2; list++){
1274 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1276 mv[list][0] = mv[list][1] = 0;
1281 *mb_type &= ~MB_TYPE_P0L1;
1282 sub_mb_type &= ~MB_TYPE_P0L1;
1283 }else if(ref[0] < 0){
1284 *mb_type &= ~MB_TYPE_P0L0;
1285 sub_mb_type &= ~MB_TYPE_P0L0;
1288 if(IS_16X16(*mb_type)){
1289 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1290 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1291 if(!IS_INTRA(mb_type_col)
1292 && ( (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1)
1293 || (l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1
1294 && (h->x264_build>33 || !h->x264_build)))){
1296 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1298 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1300 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1302 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1304 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1305 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1308 for(i8=0; i8<4; i8++){
1309 const int x8 = i8&1;
1310 const int y8 = i8>>1;
1312 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1314 h->sub_mb_type[i8] = sub_mb_type;
1316 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1317 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1318 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1319 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1322 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1323 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1324 && (h->x264_build>33 || !h->x264_build)))){
1325 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1326 if(IS_SUB_8X8(sub_mb_type)){
1327 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1328 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1330 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1332 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1335 for(i4=0; i4<4; i4++){
1336 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1337 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1339 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1341 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1347 }else{ /* direct temporal mv pred */
1348 if(IS_16X16(*mb_type)){
1349 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1350 if(IS_INTRA(mb_type_col)){
1351 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1352 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1353 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1355 const int ref0 = l1ref0[0] >= 0 ? h->map_col_to_list0[0][l1ref0[0]]
1356 : h->map_col_to_list0[1][l1ref1[0]];
1357 const int dist_scale_factor = h->dist_scale_factor[ref0];
1358 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1360 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1361 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1362 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1363 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1364 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1367 for(i8=0; i8<4; i8++){
1368 const int x8 = i8&1;
1369 const int y8 = i8>>1;
1370 int ref0, dist_scale_factor;
1371 const int16_t (*l1mv)[2]= l1mv0;
1373 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1375 h->sub_mb_type[i8] = sub_mb_type;
1376 if(IS_INTRA(mb_type_col)){
1377 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1378 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1379 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1380 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1384 ref0 = l1ref0[x8 + y8*h->b8_stride];
1386 ref0 = h->map_col_to_list0[0][ref0];
1388 ref0 = h->map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1391 dist_scale_factor = h->dist_scale_factor[ref0];
1393 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1394 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1395 if(IS_SUB_8X8(sub_mb_type)){
1396 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1397 int mx = (dist_scale_factor * mv_col[0] + 128) >> 8;
1398 int my = (dist_scale_factor * mv_col[1] + 128) >> 8;
1399 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1400 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1402 for(i4=0; i4<4; i4++){
1403 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1404 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1405 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1406 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1407 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1408 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1415 static inline void write_back_motion(H264Context *h, int mb_type){
1416 MpegEncContext * const s = &h->s;
1417 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1418 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1421 for(list=0; list<2; list++){
1423 if(!USES_LIST(mb_type, list)){
1424 if(1){ //FIXME skip or never read if mb_type doesn't use it
1426 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
1427 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
1429 if( h->pps.cabac ) {
1430 /* FIXME needed ? */
1432 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]=
1433 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= 0;
1437 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]=
1438 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= LIST_NOT_USED;
1445 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1446 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1448 if( h->pps.cabac ) {
1450 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1451 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1455 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
1456 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
1460 if(h->slice_type == B_TYPE && h->pps.cabac){
1461 if(IS_8X8(mb_type)){
1462 h->direct_table[b8_xy+1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1463 h->direct_table[b8_xy+0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1464 h->direct_table[b8_xy+1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1470 * Decodes a network abstraction layer unit.
1471 * @param consumed is the number of bytes used as input
1472 * @param length is the length of the array
1473 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1474 * @returns decoded bytes, might be src+1 if no escapes
1476 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1480 // src[0]&0x80; //forbidden bit
1481 h->nal_ref_idc= src[0]>>5;
1482 h->nal_unit_type= src[0]&0x1F;
1486 for(i=0; i<length; i++)
1487 printf("%2X ", src[i]);
1489 for(i=0; i+1<length; i+=2){
1490 if(src[i]) continue;
1491 if(i>0 && src[i-1]==0) i--;
1492 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1494 /* startcode, so we must be past the end */
1501 if(i>=length-1){ //no escaped 0
1502 *dst_length= length;
1503 *consumed= length+1; //+1 for the header
1507 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1508 dst= h->rbsp_buffer;
1510 //printf("decoding esc\n");
1513 //remove escapes (very rare 1:2^22)
1514 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1515 if(src[si+2]==3){ //escape
1520 }else //next start code
1524 dst[di++]= src[si++];
1528 *consumed= si + 1;//+1 for the header
1529 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1535 * @param src the data which should be escaped
1536 * @param dst the target buffer, dst+1 == src is allowed as a special case
1537 * @param length the length of the src data
1538 * @param dst_length the length of the dst array
1539 * @returns length of escaped data in bytes or -1 if an error occured
1541 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1542 int i, escape_count, si, di;
1546 assert(dst_length>0);
1548 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1550 if(length==0) return 1;
1553 for(i=0; i<length; i+=2){
1554 if(src[i]) continue;
1555 if(i>0 && src[i-1]==0)
1557 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1563 if(escape_count==0){
1565 memcpy(dst+1, src, length);
1569 if(length + escape_count + 1> dst_length)
1572 //this should be damn rare (hopefully)
1574 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1575 temp= h->rbsp_buffer;
1576 //printf("encoding esc\n");
1581 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1582 temp[di++]= 0; si++;
1583 temp[di++]= 0; si++;
1585 temp[di++]= src[si++];
1588 temp[di++]= src[si++];
1590 memcpy(dst+1, temp, length+escape_count);
1592 assert(di == length+escape_count);
1598 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1600 static void encode_rbsp_trailing(PutBitContext *pb){
1603 length= (-put_bits_count(pb))&7;
1604 if(length) put_bits(pb, length, 0);
1609 * identifies the exact end of the bitstream
1610 * @return the length of the trailing, or 0 if damaged
1612 static int decode_rbsp_trailing(uint8_t *src){
1616 tprintf("rbsp trailing %X\n", v);
1626 * idct tranforms the 16 dc values and dequantize them.
1627 * @param qp quantization parameter
1629 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1632 int temp[16]; //FIXME check if this is a good idea
1633 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1634 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1636 //memset(block, 64, 2*256);
1639 const int offset= y_offset[i];
1640 const int z0= block[offset+stride*0] + block[offset+stride*4];
1641 const int z1= block[offset+stride*0] - block[offset+stride*4];
1642 const int z2= block[offset+stride*1] - block[offset+stride*5];
1643 const int z3= block[offset+stride*1] + block[offset+stride*5];
1652 const int offset= x_offset[i];
1653 const int z0= temp[4*0+i] + temp[4*2+i];
1654 const int z1= temp[4*0+i] - temp[4*2+i];
1655 const int z2= temp[4*1+i] - temp[4*3+i];
1656 const int z3= temp[4*1+i] + temp[4*3+i];
1658 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1659 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1660 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1661 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1667 * dct tranforms the 16 dc values.
1668 * @param qp quantization parameter ??? FIXME
1670 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1671 // const int qmul= dequant_coeff[qp][0];
1673 int temp[16]; //FIXME check if this is a good idea
1674 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1675 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1678 const int offset= y_offset[i];
1679 const int z0= block[offset+stride*0] + block[offset+stride*4];
1680 const int z1= block[offset+stride*0] - block[offset+stride*4];
1681 const int z2= block[offset+stride*1] - block[offset+stride*5];
1682 const int z3= block[offset+stride*1] + block[offset+stride*5];
1691 const int offset= x_offset[i];
1692 const int z0= temp[4*0+i] + temp[4*2+i];
1693 const int z1= temp[4*0+i] - temp[4*2+i];
1694 const int z2= temp[4*1+i] - temp[4*3+i];
1695 const int z3= temp[4*1+i] + temp[4*3+i];
1697 block[stride*0 +offset]= (z0 + z3)>>1;
1698 block[stride*2 +offset]= (z1 + z2)>>1;
1699 block[stride*8 +offset]= (z1 - z2)>>1;
1700 block[stride*10+offset]= (z0 - z3)>>1;
1708 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1709 const int stride= 16*2;
1710 const int xStride= 16;
1713 a= block[stride*0 + xStride*0];
1714 b= block[stride*0 + xStride*1];
1715 c= block[stride*1 + xStride*0];
1716 d= block[stride*1 + xStride*1];
1723 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1724 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1725 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1726 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1730 static void chroma_dc_dct_c(DCTELEM *block){
1731 const int stride= 16*2;
1732 const int xStride= 16;
1735 a= block[stride*0 + xStride*0];
1736 b= block[stride*0 + xStride*1];
1737 c= block[stride*1 + xStride*0];
1738 d= block[stride*1 + xStride*1];
1745 block[stride*0 + xStride*0]= (a+c);
1746 block[stride*0 + xStride*1]= (e+b);
1747 block[stride*1 + xStride*0]= (a-c);
1748 block[stride*1 + xStride*1]= (e-b);
1753 * gets the chroma qp.
1755 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1757 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
1762 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1764 //FIXME try int temp instead of block
1767 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1768 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1769 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1770 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1771 const int z0= d0 + d3;
1772 const int z3= d0 - d3;
1773 const int z1= d1 + d2;
1774 const int z2= d1 - d2;
1776 block[0 + 4*i]= z0 + z1;
1777 block[1 + 4*i]= 2*z3 + z2;
1778 block[2 + 4*i]= z0 - z1;
1779 block[3 + 4*i]= z3 - 2*z2;
1783 const int z0= block[0*4 + i] + block[3*4 + i];
1784 const int z3= block[0*4 + i] - block[3*4 + i];
1785 const int z1= block[1*4 + i] + block[2*4 + i];
1786 const int z2= block[1*4 + i] - block[2*4 + i];
1788 block[0*4 + i]= z0 + z1;
1789 block[1*4 + i]= 2*z3 + z2;
1790 block[2*4 + i]= z0 - z1;
1791 block[3*4 + i]= z3 - 2*z2;
1796 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1797 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1798 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1800 const int * const quant_table= quant_coeff[qscale];
1801 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1802 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1803 const unsigned int threshold2= (threshold1<<1);
1809 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1810 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1811 const unsigned int dc_threshold2= (dc_threshold1<<1);
1813 int level= block[0]*quant_coeff[qscale+18][0];
1814 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1816 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1819 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1822 // last_non_zero = i;
1827 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1828 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1829 const unsigned int dc_threshold2= (dc_threshold1<<1);
1831 int level= block[0]*quant_table[0];
1832 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1834 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1837 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1840 // last_non_zero = i;
1853 const int j= scantable[i];
1854 int level= block[j]*quant_table[j];
1856 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1857 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1858 if(((unsigned)(level+threshold1))>threshold2){
1860 level= (bias + level)>>QUANT_SHIFT;
1863 level= (bias - level)>>QUANT_SHIFT;
1872 return last_non_zero;
1875 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1876 const uint32_t a= ((uint32_t*)(src-stride))[0];
1877 ((uint32_t*)(src+0*stride))[0]= a;
1878 ((uint32_t*)(src+1*stride))[0]= a;
1879 ((uint32_t*)(src+2*stride))[0]= a;
1880 ((uint32_t*)(src+3*stride))[0]= a;
1883 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1884 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1885 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1886 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1887 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1890 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1891 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1892 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1894 ((uint32_t*)(src+0*stride))[0]=
1895 ((uint32_t*)(src+1*stride))[0]=
1896 ((uint32_t*)(src+2*stride))[0]=
1897 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1900 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1901 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1903 ((uint32_t*)(src+0*stride))[0]=
1904 ((uint32_t*)(src+1*stride))[0]=
1905 ((uint32_t*)(src+2*stride))[0]=
1906 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1909 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1910 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1912 ((uint32_t*)(src+0*stride))[0]=
1913 ((uint32_t*)(src+1*stride))[0]=
1914 ((uint32_t*)(src+2*stride))[0]=
1915 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1918 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1919 ((uint32_t*)(src+0*stride))[0]=
1920 ((uint32_t*)(src+1*stride))[0]=
1921 ((uint32_t*)(src+2*stride))[0]=
1922 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1926 #define LOAD_TOP_RIGHT_EDGE\
1927 const int t4= topright[0];\
1928 const int t5= topright[1];\
1929 const int t6= topright[2];\
1930 const int t7= topright[3];\
1932 #define LOAD_LEFT_EDGE\
1933 const int l0= src[-1+0*stride];\
1934 const int l1= src[-1+1*stride];\
1935 const int l2= src[-1+2*stride];\
1936 const int l3= src[-1+3*stride];\
1938 #define LOAD_TOP_EDGE\
1939 const int t0= src[ 0-1*stride];\
1940 const int t1= src[ 1-1*stride];\
1941 const int t2= src[ 2-1*stride];\
1942 const int t3= src[ 3-1*stride];\
1944 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1945 const int lt= src[-1-1*stride];
1949 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
1951 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
1954 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
1958 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1961 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1963 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1964 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1967 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1972 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1974 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1977 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1981 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1984 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1986 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1987 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1990 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1991 const int lt= src[-1-1*stride];
1994 const __attribute__((unused)) int unu= l3;
1997 src[1+2*stride]=(lt + t0 + 1)>>1;
1999 src[2+2*stride]=(t0 + t1 + 1)>>1;
2001 src[3+2*stride]=(t1 + t2 + 1)>>1;
2002 src[3+0*stride]=(t2 + t3 + 1)>>1;
2004 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2006 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2008 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2009 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2010 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2011 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2014 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2017 const __attribute__((unused)) int unu= t7;
2019 src[0+0*stride]=(t0 + t1 + 1)>>1;
2021 src[0+2*stride]=(t1 + t2 + 1)>>1;
2023 src[1+2*stride]=(t2 + t3 + 1)>>1;
2025 src[2+2*stride]=(t3 + t4+ 1)>>1;
2026 src[3+2*stride]=(t4 + t5+ 1)>>1;
2027 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2029 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2031 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2033 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2034 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2037 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2040 src[0+0*stride]=(l0 + l1 + 1)>>1;
2041 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2043 src[0+1*stride]=(l1 + l2 + 1)>>1;
2045 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2047 src[0+2*stride]=(l2 + l3 + 1)>>1;
2049 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2058 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2059 const int lt= src[-1-1*stride];
2062 const __attribute__((unused)) int unu= t3;
2065 src[2+1*stride]=(lt + l0 + 1)>>1;
2067 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2068 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2069 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2071 src[2+2*stride]=(l0 + l1 + 1)>>1;
2073 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2075 src[2+3*stride]=(l1 + l2+ 1)>>1;
2077 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2078 src[0+3*stride]=(l2 + l3 + 1)>>1;
2079 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2082 static void pred16x16_vertical_c(uint8_t *src, int stride){
2084 const uint32_t a= ((uint32_t*)(src-stride))[0];
2085 const uint32_t b= ((uint32_t*)(src-stride))[1];
2086 const uint32_t c= ((uint32_t*)(src-stride))[2];
2087 const uint32_t d= ((uint32_t*)(src-stride))[3];
2089 for(i=0; i<16; i++){
2090 ((uint32_t*)(src+i*stride))[0]= a;
2091 ((uint32_t*)(src+i*stride))[1]= b;
2092 ((uint32_t*)(src+i*stride))[2]= c;
2093 ((uint32_t*)(src+i*stride))[3]= d;
2097 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2100 for(i=0; i<16; i++){
2101 ((uint32_t*)(src+i*stride))[0]=
2102 ((uint32_t*)(src+i*stride))[1]=
2103 ((uint32_t*)(src+i*stride))[2]=
2104 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2108 static void pred16x16_dc_c(uint8_t *src, int stride){
2112 dc+= src[-1+i*stride];
2119 dc= 0x01010101*((dc + 16)>>5);
2121 for(i=0; i<16; i++){
2122 ((uint32_t*)(src+i*stride))[0]=
2123 ((uint32_t*)(src+i*stride))[1]=
2124 ((uint32_t*)(src+i*stride))[2]=
2125 ((uint32_t*)(src+i*stride))[3]= dc;
2129 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2133 dc+= src[-1+i*stride];
2136 dc= 0x01010101*((dc + 8)>>4);
2138 for(i=0; i<16; i++){
2139 ((uint32_t*)(src+i*stride))[0]=
2140 ((uint32_t*)(src+i*stride))[1]=
2141 ((uint32_t*)(src+i*stride))[2]=
2142 ((uint32_t*)(src+i*stride))[3]= dc;
2146 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2152 dc= 0x01010101*((dc + 8)>>4);
2154 for(i=0; i<16; i++){
2155 ((uint32_t*)(src+i*stride))[0]=
2156 ((uint32_t*)(src+i*stride))[1]=
2157 ((uint32_t*)(src+i*stride))[2]=
2158 ((uint32_t*)(src+i*stride))[3]= dc;
2162 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2165 for(i=0; i<16; i++){
2166 ((uint32_t*)(src+i*stride))[0]=
2167 ((uint32_t*)(src+i*stride))[1]=
2168 ((uint32_t*)(src+i*stride))[2]=
2169 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2173 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2176 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2177 const uint8_t * const src0 = src+7-stride;
2178 const uint8_t *src1 = src+8*stride-1;
2179 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2180 int H = src0[1] - src0[-1];
2181 int V = src1[0] - src2[ 0];
2182 for(k=2; k<=8; ++k) {
2183 src1 += stride; src2 -= stride;
2184 H += k*(src0[k] - src0[-k]);
2185 V += k*(src1[0] - src2[ 0]);
2188 H = ( 5*(H/4) ) / 16;
2189 V = ( 5*(V/4) ) / 16;
2191 /* required for 100% accuracy */
2192 i = H; H = V; V = i;
2194 H = ( 5*H+32 ) >> 6;
2195 V = ( 5*V+32 ) >> 6;
2198 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2199 for(j=16; j>0; --j) {
2202 for(i=-16; i<0; i+=4) {
2203 src[16+i] = cm[ (b ) >> 5 ];
2204 src[17+i] = cm[ (b+ H) >> 5 ];
2205 src[18+i] = cm[ (b+2*H) >> 5 ];
2206 src[19+i] = cm[ (b+3*H) >> 5 ];
2213 static void pred16x16_plane_c(uint8_t *src, int stride){
2214 pred16x16_plane_compat_c(src, stride, 0);
2217 static void pred8x8_vertical_c(uint8_t *src, int stride){
2219 const uint32_t a= ((uint32_t*)(src-stride))[0];
2220 const uint32_t b= ((uint32_t*)(src-stride))[1];
2223 ((uint32_t*)(src+i*stride))[0]= a;
2224 ((uint32_t*)(src+i*stride))[1]= b;
2228 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2232 ((uint32_t*)(src+i*stride))[0]=
2233 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2237 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2241 ((uint32_t*)(src+i*stride))[0]=
2242 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2246 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2252 dc0+= src[-1+i*stride];
2253 dc2+= src[-1+(i+4)*stride];
2255 dc0= 0x01010101*((dc0 + 2)>>2);
2256 dc2= 0x01010101*((dc2 + 2)>>2);
2259 ((uint32_t*)(src+i*stride))[0]=
2260 ((uint32_t*)(src+i*stride))[1]= dc0;
2263 ((uint32_t*)(src+i*stride))[0]=
2264 ((uint32_t*)(src+i*stride))[1]= dc2;
2268 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2274 dc0+= src[i-stride];
2275 dc1+= src[4+i-stride];
2277 dc0= 0x01010101*((dc0 + 2)>>2);
2278 dc1= 0x01010101*((dc1 + 2)>>2);
2281 ((uint32_t*)(src+i*stride))[0]= dc0;
2282 ((uint32_t*)(src+i*stride))[1]= dc1;
2285 ((uint32_t*)(src+i*stride))[0]= dc0;
2286 ((uint32_t*)(src+i*stride))[1]= dc1;
2291 static void pred8x8_dc_c(uint8_t *src, int stride){
2293 int dc0, dc1, dc2, dc3;
2297 dc0+= src[-1+i*stride] + src[i-stride];
2298 dc1+= src[4+i-stride];
2299 dc2+= src[-1+(i+4)*stride];
2301 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2302 dc0= 0x01010101*((dc0 + 4)>>3);
2303 dc1= 0x01010101*((dc1 + 2)>>2);
2304 dc2= 0x01010101*((dc2 + 2)>>2);
2307 ((uint32_t*)(src+i*stride))[0]= dc0;
2308 ((uint32_t*)(src+i*stride))[1]= dc1;
2311 ((uint32_t*)(src+i*stride))[0]= dc2;
2312 ((uint32_t*)(src+i*stride))[1]= dc3;
2316 static void pred8x8_plane_c(uint8_t *src, int stride){
2319 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2320 const uint8_t * const src0 = src+3-stride;
2321 const uint8_t *src1 = src+4*stride-1;
2322 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2323 int H = src0[1] - src0[-1];
2324 int V = src1[0] - src2[ 0];
2325 for(k=2; k<=4; ++k) {
2326 src1 += stride; src2 -= stride;
2327 H += k*(src0[k] - src0[-k]);
2328 V += k*(src1[0] - src2[ 0]);
2330 H = ( 17*H+16 ) >> 5;
2331 V = ( 17*V+16 ) >> 5;
2333 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2334 for(j=8; j>0; --j) {
2337 src[0] = cm[ (b ) >> 5 ];
2338 src[1] = cm[ (b+ H) >> 5 ];
2339 src[2] = cm[ (b+2*H) >> 5 ];
2340 src[3] = cm[ (b+3*H) >> 5 ];
2341 src[4] = cm[ (b+4*H) >> 5 ];
2342 src[5] = cm[ (b+5*H) >> 5 ];
2343 src[6] = cm[ (b+6*H) >> 5 ];
2344 src[7] = cm[ (b+7*H) >> 5 ];
2349 #define SRC(x,y) src[(x)+(y)*stride]
2351 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2352 #define PREDICT_8x8_LOAD_LEFT \
2353 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2354 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2355 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2356 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2359 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2360 #define PREDICT_8x8_LOAD_TOP \
2361 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2362 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2363 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2364 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2365 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2368 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2369 #define PREDICT_8x8_LOAD_TOPRIGHT \
2370 int t8, t9, t10, t11, t12, t13, t14, t15; \
2371 if(has_topright) { \
2372 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2373 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2374 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2376 #define PREDICT_8x8_LOAD_TOPLEFT \
2377 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2379 #define PREDICT_8x8_DC(v) \
2381 for( y = 0; y < 8; y++ ) { \
2382 ((uint32_t*)src)[0] = \
2383 ((uint32_t*)src)[1] = v; \
2387 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2389 PREDICT_8x8_DC(0x80808080);
2391 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2393 PREDICT_8x8_LOAD_LEFT;
2394 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2397 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2399 PREDICT_8x8_LOAD_TOP;
2400 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2403 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2405 PREDICT_8x8_LOAD_LEFT;
2406 PREDICT_8x8_LOAD_TOP;
2407 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2408 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2411 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2413 PREDICT_8x8_LOAD_LEFT;
2414 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2415 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2416 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2419 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2422 PREDICT_8x8_LOAD_TOP;
2431 for( y = 1; y < 8; y++ )
2432 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2434 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2436 PREDICT_8x8_LOAD_TOP;
2437 PREDICT_8x8_LOAD_TOPRIGHT;
2438 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2439 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2440 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2441 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2442 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2443 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2444 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2445 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2446 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2447 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2448 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2449 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2450 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2451 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2452 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2454 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2456 PREDICT_8x8_LOAD_TOP;
2457 PREDICT_8x8_LOAD_LEFT;
2458 PREDICT_8x8_LOAD_TOPLEFT;
2459 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2460 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2461 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2462 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2463 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2464 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2465 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2466 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2467 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2468 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2469 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2470 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2471 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2472 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2473 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2476 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2478 PREDICT_8x8_LOAD_TOP;
2479 PREDICT_8x8_LOAD_LEFT;
2480 PREDICT_8x8_LOAD_TOPLEFT;
2481 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2482 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2483 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2484 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2485 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2486 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2487 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2488 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2489 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2490 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2491 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2492 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2493 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2494 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2495 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2496 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2497 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2498 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2499 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2500 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2501 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2502 SRC(7,0)= (t6 + t7 + 1) >> 1;
2504 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2506 PREDICT_8x8_LOAD_TOP;
2507 PREDICT_8x8_LOAD_LEFT;
2508 PREDICT_8x8_LOAD_TOPLEFT;
2509 SRC(0,7)= (l6 + l7 + 1) >> 1;
2510 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2511 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2512 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2513 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2514 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2515 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2516 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2517 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2518 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2519 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2520 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2521 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2522 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2523 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2524 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2525 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2526 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2527 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2528 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2529 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2530 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2532 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2534 PREDICT_8x8_LOAD_TOP;
2535 PREDICT_8x8_LOAD_TOPRIGHT;
2536 SRC(0,0)= (t0 + t1 + 1) >> 1;
2537 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2538 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2539 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2540 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2541 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2542 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2543 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2544 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2545 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2546 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2547 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2548 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2549 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2550 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2551 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2552 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2553 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2554 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2555 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2556 SRC(7,6)= (t10 + t11 + 1) >> 1;
2557 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2559 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2561 PREDICT_8x8_LOAD_LEFT;
2562 SRC(0,0)= (l0 + l1 + 1) >> 1;
2563 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2564 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2565 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2566 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2567 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2568 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2569 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2570 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2571 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2572 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2573 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2574 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2575 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2576 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2577 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2578 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2579 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2581 #undef PREDICT_8x8_LOAD_LEFT
2582 #undef PREDICT_8x8_LOAD_TOP
2583 #undef PREDICT_8x8_LOAD_TOPLEFT
2584 #undef PREDICT_8x8_LOAD_TOPRIGHT
2585 #undef PREDICT_8x8_DC
2591 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2592 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2593 int src_x_offset, int src_y_offset,
2594 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2595 MpegEncContext * const s = &h->s;
2596 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2597 const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2598 const int luma_xy= (mx&3) + ((my&3)<<2);
2599 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
2600 uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
2601 uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
2602 int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
2603 int extra_height= extra_width;
2605 const int full_mx= mx>>2;
2606 const int full_my= my>>2;
2607 const int pic_width = 16*s->mb_width;
2608 const int pic_height = 16*s->mb_height;
2610 assert(pic->data[0]);
2612 if(mx&7) extra_width -= 3;
2613 if(my&7) extra_height -= 3;
2615 if( full_mx < 0-extra_width
2616 || full_my < 0-extra_height
2617 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2618 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2619 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2620 src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
2624 qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
2626 qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
2629 if(s->flags&CODEC_FLAG_GRAY) return;
2632 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2633 src_cb= s->edge_emu_buffer;
2635 chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
2638 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2639 src_cr= s->edge_emu_buffer;
2641 chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
2644 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2645 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2646 int x_offset, int y_offset,
2647 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2648 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2649 int list0, int list1){
2650 MpegEncContext * const s = &h->s;
2651 qpel_mc_func *qpix_op= qpix_put;
2652 h264_chroma_mc_func chroma_op= chroma_put;
2654 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2655 dest_cb += x_offset + y_offset*s->uvlinesize;
2656 dest_cr += x_offset + y_offset*s->uvlinesize;
2657 x_offset += 8*s->mb_x;
2658 y_offset += 8*s->mb_y;
2661 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2662 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2663 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2664 qpix_op, chroma_op);
2667 chroma_op= chroma_avg;
2671 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2672 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2673 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2674 qpix_op, chroma_op);
2678 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2679 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2680 int x_offset, int y_offset,
2681 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2682 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2683 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2684 int list0, int list1){
2685 MpegEncContext * const s = &h->s;
2687 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2688 dest_cb += x_offset + y_offset*s->uvlinesize;
2689 dest_cr += x_offset + y_offset*s->uvlinesize;
2690 x_offset += 8*s->mb_x;
2691 y_offset += 8*s->mb_y;
2694 /* don't optimize for luma-only case, since B-frames usually
2695 * use implicit weights => chroma too. */
2696 uint8_t *tmp_cb = s->obmc_scratchpad;
2697 uint8_t *tmp_cr = tmp_cb + 8*s->uvlinesize;
2698 uint8_t *tmp_y = tmp_cr + 8*s->uvlinesize;
2699 int refn0 = h->ref_cache[0][ scan8[n] ];
2700 int refn1 = h->ref_cache[1][ scan8[n] ];
2702 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2703 dest_y, dest_cb, dest_cr,
2704 x_offset, y_offset, qpix_put, chroma_put);
2705 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2706 tmp_y, tmp_cb, tmp_cr,
2707 x_offset, y_offset, qpix_put, chroma_put);
2709 if(h->use_weight == 2){
2710 int weight0 = h->implicit_weight[refn0][refn1];
2711 int weight1 = 64 - weight0;
2712 luma_weight_avg( dest_y, tmp_y, s-> linesize, 5, weight0, weight1, 0);
2713 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0);
2714 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0);
2716 luma_weight_avg(dest_y, tmp_y, s->linesize, h->luma_log2_weight_denom,
2717 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2718 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
2719 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2720 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2721 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
2722 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2723 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2724 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
2727 int list = list1 ? 1 : 0;
2728 int refn = h->ref_cache[list][ scan8[n] ];
2729 Picture *ref= &h->ref_list[list][refn];
2730 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2731 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2732 qpix_put, chroma_put);
2734 luma_weight_op(dest_y, s->linesize, h->luma_log2_weight_denom,
2735 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2736 if(h->use_weight_chroma){
2737 chroma_weight_op(dest_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2738 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2739 chroma_weight_op(dest_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2740 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2745 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2746 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2747 int x_offset, int y_offset,
2748 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2749 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2750 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2751 int list0, int list1){
2752 if((h->use_weight==2 && list0 && list1
2753 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2754 || h->use_weight==1)
2755 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2756 x_offset, y_offset, qpix_put, chroma_put,
2757 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2759 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2760 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2763 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2764 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2765 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2766 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2767 MpegEncContext * const s = &h->s;
2768 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2769 const int mb_type= s->current_picture.mb_type[mb_xy];
2771 assert(IS_INTER(mb_type));
2773 if(IS_16X16(mb_type)){
2774 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2775 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2776 &weight_op[0], &weight_avg[0],
2777 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2778 }else if(IS_16X8(mb_type)){
2779 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2780 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2781 &weight_op[1], &weight_avg[1],
2782 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2783 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2784 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2785 &weight_op[1], &weight_avg[1],
2786 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2787 }else if(IS_8X16(mb_type)){
2788 mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
2789 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2790 &weight_op[2], &weight_avg[2],
2791 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2792 mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
2793 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2794 &weight_op[2], &weight_avg[2],
2795 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2799 assert(IS_8X8(mb_type));
2802 const int sub_mb_type= h->sub_mb_type[i];
2804 int x_offset= (i&1)<<2;
2805 int y_offset= (i&2)<<1;
2807 if(IS_SUB_8X8(sub_mb_type)){
2808 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2809 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2810 &weight_op[3], &weight_avg[3],
2811 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2812 }else if(IS_SUB_8X4(sub_mb_type)){
2813 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2814 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2815 &weight_op[4], &weight_avg[4],
2816 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2817 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2818 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2819 &weight_op[4], &weight_avg[4],
2820 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2821 }else if(IS_SUB_4X8(sub_mb_type)){
2822 mc_part(h, n , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2823 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2824 &weight_op[5], &weight_avg[5],
2825 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2826 mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2827 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2828 &weight_op[5], &weight_avg[5],
2829 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2832 assert(IS_SUB_4X4(sub_mb_type));
2834 int sub_x_offset= x_offset + 2*(j&1);
2835 int sub_y_offset= y_offset + (j&2);
2836 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2837 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2838 &weight_op[6], &weight_avg[6],
2839 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2846 static void decode_init_vlc(H264Context *h){
2847 static int done = 0;
2853 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
2854 &chroma_dc_coeff_token_len [0], 1, 1,
2855 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
2858 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
2859 &coeff_token_len [i][0], 1, 1,
2860 &coeff_token_bits[i][0], 1, 1, 1);
2864 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2865 &chroma_dc_total_zeros_len [i][0], 1, 1,
2866 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
2868 for(i=0; i<15; i++){
2869 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
2870 &total_zeros_len [i][0], 1, 1,
2871 &total_zeros_bits[i][0], 1, 1, 1);
2875 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2876 &run_len [i][0], 1, 1,
2877 &run_bits[i][0], 1, 1, 1);
2879 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2880 &run_len [6][0], 1, 1,
2881 &run_bits[6][0], 1, 1, 1);
2886 * Sets the intra prediction function pointers.
2888 static void init_pred_ptrs(H264Context *h){
2889 // MpegEncContext * const s = &h->s;
2891 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
2892 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
2893 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
2894 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2895 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2896 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
2897 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
2898 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
2899 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
2900 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
2901 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
2902 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
2904 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
2905 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
2906 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
2907 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
2908 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
2909 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
2910 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
2911 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
2912 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
2913 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
2914 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
2915 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
2917 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
2918 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
2919 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
2920 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
2921 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
2922 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
2923 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
2925 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
2926 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
2927 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
2928 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
2929 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
2930 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
2931 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
2934 static void free_tables(H264Context *h){
2935 av_freep(&h->intra4x4_pred_mode);
2936 av_freep(&h->chroma_pred_mode_table);
2937 av_freep(&h->cbp_table);
2938 av_freep(&h->mvd_table[0]);
2939 av_freep(&h->mvd_table[1]);
2940 av_freep(&h->direct_table);
2941 av_freep(&h->non_zero_count);
2942 av_freep(&h->slice_table_base);
2943 av_freep(&h->top_borders[1]);
2944 av_freep(&h->top_borders[0]);
2945 h->slice_table= NULL;
2947 av_freep(&h->mb2b_xy);
2948 av_freep(&h->mb2b8_xy);
2950 av_freep(&h->s.obmc_scratchpad);
2953 static void init_dequant8_coeff_table(H264Context *h){
2955 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2956 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2958 for(i=0; i<2; i++ ){
2959 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2960 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2964 for(q=0; q<52; q++){
2965 int shift = div6[q];
2968 h->dequant8_coeff[i][q][x] = ((uint32_t)dequant8_coeff_init[idx][
2969 dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * h->pps.scaling_matrix8[i][x]) << shift;
2974 static void init_dequant4_coeff_table(H264Context *h){
2976 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2977 for(i=0; i<6; i++ ){
2978 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2980 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2981 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2988 for(q=0; q<52; q++){
2989 int shift = div6[q] + 2;
2992 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2993 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2994 h->pps.scaling_matrix4[i][x]) << shift;
2999 static void init_dequant_tables(H264Context *h){
3001 init_dequant4_coeff_table(h);
3002 if(h->pps.transform_8x8_mode)
3003 init_dequant8_coeff_table(h);
3004 if(h->sps.transform_bypass){
3007 h->dequant4_coeff[i][0][x] = 1<<6;
3008 if(h->pps.transform_8x8_mode)
3011 h->dequant8_coeff[i][0][x] = 1<<6;
3018 * needs width/height
3020 static int alloc_tables(H264Context *h){
3021 MpegEncContext * const s = &h->s;
3022 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3025 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3027 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3028 CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t))
3029 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3030 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3031 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3033 if( h->pps.cabac ) {
3034 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3035 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3036 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3037 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3040 memset(h->slice_table_base, -1, big_mb_num * sizeof(uint8_t));
3041 h->slice_table= h->slice_table_base + s->mb_stride + 1;
3043 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3044 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3045 for(y=0; y<s->mb_height; y++){
3046 for(x=0; x<s->mb_width; x++){
3047 const int mb_xy= x + y*s->mb_stride;
3048 const int b_xy = 4*x + 4*y*h->b_stride;
3049 const int b8_xy= 2*x + 2*y*h->b8_stride;
3051 h->mb2b_xy [mb_xy]= b_xy;
3052 h->mb2b8_xy[mb_xy]= b8_xy;
3056 s->obmc_scratchpad = NULL;
3058 if(!h->dequant4_coeff[0])
3059 init_dequant_tables(h);
3067 static void common_init(H264Context *h){
3068 MpegEncContext * const s = &h->s;
3070 s->width = s->avctx->width;
3071 s->height = s->avctx->height;
3072 s->codec_id= s->avctx->codec->id;
3076 h->dequant_coeff_pps= -1;
3077 s->unrestricted_mv=1;
3078 s->decode=1; //FIXME
3080 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3081 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3084 static int decode_init(AVCodecContext *avctx){
3085 H264Context *h= avctx->priv_data;
3086 MpegEncContext * const s = &h->s;
3088 MPV_decode_defaults(s);
3093 s->out_format = FMT_H264;
3094 s->workaround_bugs= avctx->workaround_bugs;
3097 // s->decode_mb= ff_h263_decode_mb;
3099 avctx->pix_fmt= PIX_FMT_YUV420P;
3103 if(avctx->extradata_size > 0 && avctx->extradata &&
3104 *(char *)avctx->extradata == 1){
3114 static int frame_start(H264Context *h){
3115 MpegEncContext * const s = &h->s;
3118 if(MPV_frame_start(s, s->avctx) < 0)
3120 ff_er_frame_start(s);
3122 assert(s->linesize && s->uvlinesize);
3124 for(i=0; i<16; i++){
3125 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3126 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3129 h->block_offset[16+i]=
3130 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3131 h->block_offset[24+16+i]=
3132 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3135 /* can't be in alloc_tables because linesize isn't known there.
3136 * FIXME: redo bipred weight to not require extra buffer? */
3137 if(!s->obmc_scratchpad)
3138 s->obmc_scratchpad = av_malloc(16*s->linesize + 2*8*s->uvlinesize);
3140 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3144 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3145 MpegEncContext * const s = &h->s;
3149 src_cb -= uvlinesize;
3150 src_cr -= uvlinesize;
3152 // There are two lines saved, the line above the the top macroblock of a pair,
3153 // and the line above the bottom macroblock
3154 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3155 for(i=1; i<17; i++){
3156 h->left_border[i]= src_y[15+i* linesize];
3159 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3160 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3162 if(!(s->flags&CODEC_FLAG_GRAY)){
3163 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3164 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3166 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3167 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3169 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3170 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3174 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3175 MpegEncContext * const s = &h->s;
3178 int deblock_left = (s->mb_x > 0);
3179 int deblock_top = (s->mb_y > 0);
3181 src_y -= linesize + 1;
3182 src_cb -= uvlinesize + 1;
3183 src_cr -= uvlinesize + 1;
3185 #define XCHG(a,b,t,xchg)\
3192 for(i = !deblock_top; i<17; i++){
3193 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3198 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3199 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3200 if(s->mb_x+1 < s->mb_width){
3201 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3205 if(!(s->flags&CODEC_FLAG_GRAY)){
3207 for(i = !deblock_top; i<9; i++){
3208 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3209 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3213 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3214 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3219 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3220 MpegEncContext * const s = &h->s;
3223 src_y -= 2 * linesize;
3224 src_cb -= 2 * uvlinesize;
3225 src_cr -= 2 * uvlinesize;
3227 // There are two lines saved, the line above the the top macroblock of a pair,
3228 // and the line above the bottom macroblock
3229 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3230 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3231 for(i=2; i<34; i++){
3232 h->left_border[i]= src_y[15+i* linesize];
3235 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3236 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3237 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3238 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3240 if(!(s->flags&CODEC_FLAG_GRAY)){
3241 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3242 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3243 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3244 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3245 for(i=2; i<18; i++){
3246 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3247 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3249 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3250 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3251 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3252 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3256 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3257 MpegEncContext * const s = &h->s;
3260 int deblock_left = (s->mb_x > 0);
3261 int deblock_top = (s->mb_y > 0);
3263 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3265 src_y -= 2 * linesize + 1;
3266 src_cb -= 2 * uvlinesize + 1;
3267 src_cr -= 2 * uvlinesize + 1;
3269 #define XCHG(a,b,t,xchg)\
3276 for(i = (!deblock_top)<<1; i<34; i++){
3277 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3282 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3283 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3284 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3285 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3288 if(!(s->flags&CODEC_FLAG_GRAY)){
3290 for(i = (!deblock_top) << 1; i<18; i++){
3291 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3292 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3296 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3297 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3298 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3299 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3304 static void hl_decode_mb(H264Context *h){
3305 MpegEncContext * const s = &h->s;
3306 const int mb_x= s->mb_x;
3307 const int mb_y= s->mb_y;
3308 const int mb_xy= mb_x + mb_y*s->mb_stride;
3309 const int mb_type= s->current_picture.mb_type[mb_xy];
3310 uint8_t *dest_y, *dest_cb, *dest_cr;
3311 int linesize, uvlinesize /*dct_offset*/;
3313 int *block_offset = &h->block_offset[0];
3314 const unsigned int bottom = mb_y & 1;
3315 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3316 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3317 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3322 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3323 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3324 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3326 if (h->mb_field_decoding_flag) {
3327 linesize = s->linesize * 2;
3328 uvlinesize = s->uvlinesize * 2;
3329 block_offset = &h->block_offset[24];
3330 if(mb_y&1){ //FIXME move out of this func?
3331 dest_y -= s->linesize*15;
3332 dest_cb-= s->uvlinesize*7;
3333 dest_cr-= s->uvlinesize*7;
3336 linesize = s->linesize;
3337 uvlinesize = s->uvlinesize;
3338 // dct_offset = s->linesize * 16;
3341 if(transform_bypass){
3343 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3344 }else if(IS_8x8DCT(mb_type)){
3345 idct_dc_add = s->dsp.h264_idct8_dc_add;
3346 idct_add = s->dsp.h264_idct8_add;
3348 idct_dc_add = s->dsp.h264_idct_dc_add;
3349 idct_add = s->dsp.h264_idct_add;
3352 if (IS_INTRA_PCM(mb_type)) {
3355 // The pixels are stored in h->mb array in the same order as levels,
3356 // copy them in output in the correct order.
3357 for(i=0; i<16; i++) {
3358 for (y=0; y<4; y++) {
3359 for (x=0; x<4; x++) {
3360 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3364 for(i=16; i<16+4; i++) {
3365 for (y=0; y<4; y++) {
3366 for (x=0; x<4; x++) {
3367 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3371 for(i=20; i<20+4; i++) {
3372 for (y=0; y<4; y++) {
3373 for (x=0; x<4; x++) {
3374 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3379 if(IS_INTRA(mb_type)){
3380 if(h->deblocking_filter) {
3381 if (h->mb_aff_frame) {
3383 xchg_pair_border(h, dest_y, dest_cb, dest_cr, s->linesize, s->uvlinesize, 1);
3385 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3389 if(!(s->flags&CODEC_FLAG_GRAY)){
3390 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3391 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3394 if(IS_INTRA4x4(mb_type)){
3396 if(IS_8x8DCT(mb_type)){
3397 for(i=0; i<16; i+=4){
3398 uint8_t * const ptr= dest_y + block_offset[i];
3399 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3400 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3401 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3402 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3404 if(nnz == 1 && h->mb[i*16])
3405 idct_dc_add(ptr, h->mb + i*16, linesize);
3407 idct_add(ptr, h->mb + i*16, linesize);
3411 for(i=0; i<16; i++){
3412 uint8_t * const ptr= dest_y + block_offset[i];
3414 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3417 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3418 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3419 assert(mb_y || linesize <= block_offset[i]);
3420 if(!topright_avail){
3421 tr= ptr[3 - linesize]*0x01010101;
3422 topright= (uint8_t*) &tr;
3424 topright= ptr + 4 - linesize;
3428 h->pred4x4[ dir ](ptr, topright, linesize);
3429 nnz = h->non_zero_count_cache[ scan8[i] ];
3431 if(s->codec_id == CODEC_ID_H264){
3432 if(nnz == 1 && h->mb[i*16])
3433 idct_dc_add(ptr, h->mb + i*16, linesize);
3435 idct_add(ptr, h->mb + i*16, linesize);
3437 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3442 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3443 if(s->codec_id == CODEC_ID_H264){
3444 if(!transform_bypass)
3445 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3447 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3449 if(h->deblocking_filter) {
3450 if (h->mb_aff_frame) {
3452 uint8_t *pair_dest_y = s->current_picture.data[0] + ((mb_y-1) * 16* s->linesize ) + mb_x * 16;
3453 uint8_t *pair_dest_cb = s->current_picture.data[1] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3454 uint8_t *pair_dest_cr = s->current_picture.data[2] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3456 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3460 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3463 }else if(s->codec_id == CODEC_ID_H264){
3464 hl_motion(h, dest_y, dest_cb, dest_cr,
3465 s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
3466 s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
3467 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3471 if(!IS_INTRA4x4(mb_type)){
3472 if(s->codec_id == CODEC_ID_H264){
3473 if(IS_INTRA16x16(mb_type)){
3474 for(i=0; i<16; i++){
3475 if(h->non_zero_count_cache[ scan8[i] ])
3476 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3477 else if(h->mb[i*16])
3478 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3481 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3482 for(i=0; i<16; i+=di){
3483 int nnz = h->non_zero_count_cache[ scan8[i] ];
3485 if(nnz==1 && h->mb[i*16])
3486 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3488 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3493 for(i=0; i<16; i++){
3494 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3495 uint8_t * const ptr= dest_y + block_offset[i];
3496 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3502 if(!(s->flags&CODEC_FLAG_GRAY)){
3503 uint8_t *dest[2] = {dest_cb, dest_cr};
3504 if(transform_bypass){
3505 idct_add = idct_dc_add = s->dsp.add_pixels4;
3507 idct_add = s->dsp.h264_idct_add;
3508 idct_dc_add = s->dsp.h264_idct_dc_add;
3509 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3510 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3512 if(s->codec_id == CODEC_ID_H264){
3513 for(i=16; i<16+8; i++){
3514 if(h->non_zero_count_cache[ scan8[i] ])
3515 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3516 else if(h->mb[i*16])
3517 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3520 for(i=16; i<16+8; i++){
3521 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3522 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3523 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3529 if(h->deblocking_filter) {
3530 if (h->mb_aff_frame) {
3531 const int mb_y = s->mb_y - 1;
3532 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3533 const int mb_xy= mb_x + mb_y*s->mb_stride;
3534 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3535 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3536 uint8_t tmp = s->current_picture.data[1][384];
3537 if (!bottom) return;
3538 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3539 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3540 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3542 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3543 // TODO deblock a pair
3546 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3547 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3548 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3549 if (tmp != s->current_picture.data[1][384]) {
3550 tprintf("modified pixel 8,1 (1)\n");
3554 tprintf("call mbaff filter_mb\n");
3555 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3556 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3557 if (tmp != s->current_picture.data[1][384]) {
3558 tprintf("modified pixel 8,1 (2)\n");
3561 tprintf("call filter_mb\n");
3562 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3563 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3564 filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3570 * fills the default_ref_list.
3572 static int fill_default_ref_list(H264Context *h){
3573 MpegEncContext * const s = &h->s;
3575 int smallest_poc_greater_than_current = -1;
3576 Picture sorted_short_ref[32];
3578 if(h->slice_type==B_TYPE){
3582 /* sort frame according to poc in B slice */
3583 for(out_i=0; out_i<h->short_ref_count; out_i++){
3585 int best_poc=INT_MAX;
3587 for(i=0; i<h->short_ref_count; i++){
3588 const int poc= h->short_ref[i]->poc;
3589 if(poc > limit && poc < best_poc){
3595 assert(best_i != INT_MIN);
3598 sorted_short_ref[out_i]= *h->short_ref[best_i];
3599 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3600 if (-1 == smallest_poc_greater_than_current) {
3601 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3602 smallest_poc_greater_than_current = out_i;
3608 if(s->picture_structure == PICT_FRAME){
3609 if(h->slice_type==B_TYPE){
3611 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3613 // find the largest poc
3614 for(list=0; list<2; list++){
3617 int step= list ? -1 : 1;
3619 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3620 while(j<0 || j>= h->short_ref_count){
3621 if(j != -99 && step == (list ? -1 : 1))
3624 j= smallest_poc_greater_than_current + (step>>1);
3626 if(sorted_short_ref[j].reference != 3) continue;
3627 h->default_ref_list[list][index ]= sorted_short_ref[j];
3628 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3631 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3632 if(h->long_ref[i] == NULL) continue;
3633 if(h->long_ref[i]->reference != 3) continue;
3635 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3636 h->default_ref_list[ list ][index++].pic_id= i;;
3639 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3640 // swap the two first elements of L1 when
3641 // L0 and L1 are identical
3642 Picture temp= h->default_ref_list[1][0];
3643 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3644 h->default_ref_list[1][1] = temp;
3647 if(index < h->ref_count[ list ])
3648 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3652 for(i=0; i<h->short_ref_count; i++){
3653 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3654 h->default_ref_list[0][index ]= *h->short_ref[i];
3655 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3657 for(i = 0; i < 16; i++){
3658 if(h->long_ref[i] == NULL) continue;
3659 if(h->long_ref[i]->reference != 3) continue;
3660 h->default_ref_list[0][index ]= *h->long_ref[i];
3661 h->default_ref_list[0][index++].pic_id= i;;
3663 if(index < h->ref_count[0])
3664 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3667 if(h->slice_type==B_TYPE){
3669 //FIXME second field balh
3673 for (i=0; i<h->ref_count[0]; i++) {
3674 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3676 if(h->slice_type==B_TYPE){
3677 for (i=0; i<h->ref_count[1]; i++) {
3678 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3685 static void print_short_term(H264Context *h);
3686 static void print_long_term(H264Context *h);
3688 static int decode_ref_pic_list_reordering(H264Context *h){
3689 MpegEncContext * const s = &h->s;
3692 print_short_term(h);
3694 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3696 for(list=0; list<2; list++){
3697 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3699 if(get_bits1(&s->gb)){
3700 int pred= h->curr_pic_num;
3702 for(index=0; ; index++){
3703 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3706 Picture *ref = NULL;
3708 if(reordering_of_pic_nums_idc==3)
3711 if(index >= h->ref_count[list]){
3712 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3716 if(reordering_of_pic_nums_idc<3){
3717 if(reordering_of_pic_nums_idc<2){
3718 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3720 if(abs_diff_pic_num >= h->max_pic_num){
3721 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3725 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3726 else pred+= abs_diff_pic_num;
3727 pred &= h->max_pic_num - 1;
3729 for(i= h->short_ref_count-1; i>=0; i--){
3730 ref = h->short_ref[i];
3731 assert(ref->reference == 3);
3732 assert(!ref->long_ref);
3733 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3737 ref->pic_id= ref->frame_num;
3739 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3740 ref = h->long_ref[pic_id];
3741 ref->pic_id= pic_id;
3742 assert(ref->reference == 3);
3743 assert(ref->long_ref);
3748 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3749 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3751 for(i=index; i+1<h->ref_count[list]; i++){
3752 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3755 for(; i > index; i--){
3756 h->ref_list[list][i]= h->ref_list[list][i-1];
3758 h->ref_list[list][index]= *ref;
3761 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3767 if(h->slice_type!=B_TYPE) break;
3769 for(list=0; list<2; list++){
3770 for(index= 0; index < h->ref_count[list]; index++){
3771 if(!h->ref_list[list][index].data[0])
3772 h->ref_list[list][index]= s->current_picture;
3774 if(h->slice_type!=B_TYPE) break;
3777 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3778 direct_dist_scale_factor(h);
3779 direct_ref_list_init(h);
3783 static int pred_weight_table(H264Context *h){
3784 MpegEncContext * const s = &h->s;
3786 int luma_def, chroma_def;
3789 h->use_weight_chroma= 0;
3790 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3791 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3792 luma_def = 1<<h->luma_log2_weight_denom;
3793 chroma_def = 1<<h->chroma_log2_weight_denom;
3795 for(list=0; list<2; list++){
3796 for(i=0; i<h->ref_count[list]; i++){
3797 int luma_weight_flag, chroma_weight_flag;
3799 luma_weight_flag= get_bits1(&s->gb);
3800 if(luma_weight_flag){
3801 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3802 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3803 if( h->luma_weight[list][i] != luma_def
3804 || h->luma_offset[list][i] != 0)
3807 h->luma_weight[list][i]= luma_def;
3808 h->luma_offset[list][i]= 0;
3811 chroma_weight_flag= get_bits1(&s->gb);
3812 if(chroma_weight_flag){
3815 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3816 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3817 if( h->chroma_weight[list][i][j] != chroma_def
3818 || h->chroma_offset[list][i][j] != 0)
3819 h->use_weight_chroma= 1;
3824 h->chroma_weight[list][i][j]= chroma_def;
3825 h->chroma_offset[list][i][j]= 0;
3829 if(h->slice_type != B_TYPE) break;
3831 h->use_weight= h->use_weight || h->use_weight_chroma;
3835 static void implicit_weight_table(H264Context *h){
3836 MpegEncContext * const s = &h->s;
3838 int cur_poc = s->current_picture_ptr->poc;
3840 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3841 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3843 h->use_weight_chroma= 0;
3848 h->use_weight_chroma= 2;
3849 h->luma_log2_weight_denom= 5;
3850 h->chroma_log2_weight_denom= 5;
3853 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3854 int poc0 = h->ref_list[0][ref0].poc;
3855 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3856 int poc1 = h->ref_list[1][ref1].poc;
3857 int td = clip(poc1 - poc0, -128, 127);
3859 int tb = clip(cur_poc - poc0, -128, 127);
3860 int tx = (16384 + (ABS(td) >> 1)) / td;
3861 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3862 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3863 h->implicit_weight[ref0][ref1] = 32;
3865 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3867 h->implicit_weight[ref0][ref1] = 32;
3872 static inline void unreference_pic(H264Context *h, Picture *pic){
3875 if(pic == h->delayed_output_pic)
3878 for(i = 0; h->delayed_pic[i]; i++)
3879 if(pic == h->delayed_pic[i]){
3887 * instantaneous decoder refresh.
3889 static void idr(H264Context *h){
3892 for(i=0; i<16; i++){
3893 if (h->long_ref[i] != NULL) {
3894 unreference_pic(h, h->long_ref[i]);
3895 h->long_ref[i]= NULL;
3898 h->long_ref_count=0;
3900 for(i=0; i<h->short_ref_count; i++){
3901 unreference_pic(h, h->short_ref[i]);
3902 h->short_ref[i]= NULL;
3904 h->short_ref_count=0;
3907 /* forget old pics after a seek */
3908 static void flush_dpb(AVCodecContext *avctx){
3909 H264Context *h= avctx->priv_data;
3912 h->delayed_pic[i]= NULL;
3913 h->delayed_output_pic= NULL;
3915 if(h->s.current_picture_ptr)
3916 h->s.current_picture_ptr->reference= 0;
3921 * @return the removed picture or NULL if an error occurs
3923 static Picture * remove_short(H264Context *h, int frame_num){
3924 MpegEncContext * const s = &h->s;
3927 if(s->avctx->debug&FF_DEBUG_MMCO)
3928 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3930 for(i=0; i<h->short_ref_count; i++){
3931 Picture *pic= h->short_ref[i];
3932 if(s->avctx->debug&FF_DEBUG_MMCO)
3933 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3934 if(pic->frame_num == frame_num){
3935 h->short_ref[i]= NULL;
3936 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
3937 h->short_ref_count--;
3946 * @return the removed picture or NULL if an error occurs
3948 static Picture * remove_long(H264Context *h, int i){
3951 pic= h->long_ref[i];
3952 h->long_ref[i]= NULL;
3953 if(pic) h->long_ref_count--;
3959 * print short term list
3961 static void print_short_term(H264Context *h) {
3963 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3964 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3965 for(i=0; i<h->short_ref_count; i++){
3966 Picture *pic= h->short_ref[i];
3967 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3973 * print long term list
3975 static void print_long_term(H264Context *h) {
3977 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3978 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3979 for(i = 0; i < 16; i++){
3980 Picture *pic= h->long_ref[i];
3982 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3989 * Executes the reference picture marking (memory management control operations).
3991 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3992 MpegEncContext * const s = &h->s;
3994 int current_is_long=0;
3997 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3998 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4000 for(i=0; i<mmco_count; i++){
4001 if(s->avctx->debug&FF_DEBUG_MMCO)
4002 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4004 switch(mmco[i].opcode){
4005 case MMCO_SHORT2UNUSED:
4006 pic= remove_short(h, mmco[i].short_frame_num);
4008 unreference_pic(h, pic);
4009 else if(s->avctx->debug&FF_DEBUG_MMCO)
4010 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4012 case MMCO_SHORT2LONG:
4013 pic= remove_long(h, mmco[i].long_index);
4014 if(pic) unreference_pic(h, pic);
4016 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4017 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4018 h->long_ref_count++;
4020 case MMCO_LONG2UNUSED:
4021 pic= remove_long(h, mmco[i].long_index);
4023 unreference_pic(h, pic);
4024 else if(s->avctx->debug&FF_DEBUG_MMCO)
4025 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4028 pic= remove_long(h, mmco[i].long_index);
4029 if(pic) unreference_pic(h, pic);
4031 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4032 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4033 h->long_ref_count++;
4037 case MMCO_SET_MAX_LONG:
4038 assert(mmco[i].long_index <= 16);
4039 // just remove the long term which index is greater than new max
4040 for(j = mmco[i].long_index; j<16; j++){
4041 pic = remove_long(h, j);
4042 if (pic) unreference_pic(h, pic);
4046 while(h->short_ref_count){
4047 pic= remove_short(h, h->short_ref[0]->frame_num);
4048 unreference_pic(h, pic);
4050 for(j = 0; j < 16; j++) {
4051 pic= remove_long(h, j);
4052 if(pic) unreference_pic(h, pic);
4059 if(!current_is_long){
4060 pic= remove_short(h, s->current_picture_ptr->frame_num);
4062 unreference_pic(h, pic);
4063 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4066 if(h->short_ref_count)
4067 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4069 h->short_ref[0]= s->current_picture_ptr;
4070 h->short_ref[0]->long_ref=0;
4071 h->short_ref_count++;
4074 print_short_term(h);
4079 static int decode_ref_pic_marking(H264Context *h){
4080 MpegEncContext * const s = &h->s;
4083 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4084 s->broken_link= get_bits1(&s->gb) -1;
4085 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4086 if(h->mmco[0].long_index == -1)
4089 h->mmco[0].opcode= MMCO_LONG;
4093 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4094 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4095 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4097 h->mmco[i].opcode= opcode;
4098 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4099 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4100 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4101 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4105 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4106 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4107 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4108 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4113 if(opcode > MMCO_LONG){
4114 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4117 if(opcode == MMCO_END)
4122 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4124 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4125 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4126 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4136 static int init_poc(H264Context *h){
4137 MpegEncContext * const s = &h->s;
4138 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4141 if(h->nal_unit_type == NAL_IDR_SLICE){
4142 h->frame_num_offset= 0;
4144 if(h->frame_num < h->prev_frame_num)
4145 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4147 h->frame_num_offset= h->prev_frame_num_offset;
4150 if(h->sps.poc_type==0){
4151 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4153 if(h->nal_unit_type == NAL_IDR_SLICE){
4158 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4159 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4160 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4161 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4163 h->poc_msb = h->prev_poc_msb;
4164 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4166 field_poc[1] = h->poc_msb + h->poc_lsb;
4167 if(s->picture_structure == PICT_FRAME)
4168 field_poc[1] += h->delta_poc_bottom;
4169 }else if(h->sps.poc_type==1){
4170 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4173 if(h->sps.poc_cycle_length != 0)
4174 abs_frame_num = h->frame_num_offset + h->frame_num;
4178 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4181 expected_delta_per_poc_cycle = 0;
4182 for(i=0; i < h->sps.poc_cycle_length; i++)
4183 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4185 if(abs_frame_num > 0){
4186 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4187 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4189 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4190 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4191 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4195 if(h->nal_ref_idc == 0)
4196 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4198 field_poc[0] = expectedpoc + h->delta_poc[0];
4199 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4201 if(s->picture_structure == PICT_FRAME)
4202 field_poc[1] += h->delta_poc[1];
4205 if(h->nal_unit_type == NAL_IDR_SLICE){
4208 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4209 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4215 if(s->picture_structure != PICT_BOTTOM_FIELD)
4216 s->current_picture_ptr->field_poc[0]= field_poc[0];
4217 if(s->picture_structure != PICT_TOP_FIELD)
4218 s->current_picture_ptr->field_poc[1]= field_poc[1];
4219 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4220 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4226 * decodes a slice header.
4227 * this will allso call MPV_common_init() and frame_start() as needed
4229 static int decode_slice_header(H264Context *h){
4230 MpegEncContext * const s = &h->s;
4231 int first_mb_in_slice, pps_id;
4232 int num_ref_idx_active_override_flag;
4233 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4235 int default_ref_list_done = 0;
4237 s->current_picture.reference= h->nal_ref_idc != 0;
4238 s->dropable= h->nal_ref_idc == 0;
4240 first_mb_in_slice= get_ue_golomb(&s->gb);
4242 slice_type= get_ue_golomb(&s->gb);
4244 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4249 h->slice_type_fixed=1;
4251 h->slice_type_fixed=0;
4253 slice_type= slice_type_map[ slice_type ];
4254 if (slice_type == I_TYPE
4255 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4256 default_ref_list_done = 1;
4258 h->slice_type= slice_type;
4260 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4262 pps_id= get_ue_golomb(&s->gb);
4264 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4267 h->pps= h->pps_buffer[pps_id];
4268 if(h->pps.slice_group_count == 0){
4269 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4273 h->sps= h->sps_buffer[ h->pps.sps_id ];
4274 if(h->sps.log2_max_frame_num == 0){
4275 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4279 if(h->dequant_coeff_pps != pps_id){
4280 h->dequant_coeff_pps = pps_id;
4281 init_dequant_tables(h);
4284 s->mb_width= h->sps.mb_width;
4285 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4287 h->b_stride= s->mb_width*4 + 1;
4288 h->b8_stride= s->mb_width*2 + 1;
4290 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4291 if(h->sps.frame_mbs_only_flag)
4292 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4294 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4296 if (s->context_initialized
4297 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4301 if (!s->context_initialized) {
4302 if (MPV_common_init(s) < 0)
4305 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4306 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4307 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4310 for(i=0; i<16; i++){
4311 #define T(x) (x>>2) | ((x<<2) & 0xF)
4312 h->zigzag_scan[i] = T(zigzag_scan[i]);
4313 h-> field_scan[i] = T( field_scan[i]);
4316 if(h->sps.transform_bypass){ //FIXME same ugly
4317 h->zigzag_scan_q0 = zigzag_scan;
4318 h->field_scan_q0 = field_scan;
4320 h->zigzag_scan_q0 = h->zigzag_scan;
4321 h->field_scan_q0 = h->field_scan;
4326 s->avctx->width = s->width;
4327 s->avctx->height = s->height;
4328 s->avctx->sample_aspect_ratio= h->sps.sar;
4329 if(!s->avctx->sample_aspect_ratio.den)
4330 s->avctx->sample_aspect_ratio.den = 1;
4332 if(h->sps.timing_info_present_flag){
4333 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4334 if(h->x264_build > 0 && h->x264_build < 44)
4335 s->avctx->time_base.den *= 2;
4336 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4337 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4341 if(h->slice_num == 0){
4342 if(frame_start(h) < 0)
4346 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4347 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4349 h->mb_aff_frame = 0;
4350 if(h->sps.frame_mbs_only_flag){
4351 s->picture_structure= PICT_FRAME;
4353 if(get_bits1(&s->gb)) { //field_pic_flag
4354 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4356 s->picture_structure= PICT_FRAME;
4357 first_mb_in_slice <<= h->sps.mb_aff;
4358 h->mb_aff_frame = h->sps.mb_aff;
4362 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4363 s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width;
4364 if(s->mb_y >= s->mb_height){
4368 if(s->picture_structure==PICT_FRAME){
4369 h->curr_pic_num= h->frame_num;
4370 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4372 h->curr_pic_num= 2*h->frame_num;
4373 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4376 if(h->nal_unit_type == NAL_IDR_SLICE){
4377 get_ue_golomb(&s->gb); /* idr_pic_id */
4380 if(h->sps.poc_type==0){
4381 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4383 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4384 h->delta_poc_bottom= get_se_golomb(&s->gb);
4388 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4389 h->delta_poc[0]= get_se_golomb(&s->gb);
4391 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4392 h->delta_poc[1]= get_se_golomb(&s->gb);
4397 if(h->pps.redundant_pic_cnt_present){
4398 h->redundant_pic_count= get_ue_golomb(&s->gb);
4401 //set defaults, might be overriden a few line later
4402 h->ref_count[0]= h->pps.ref_count[0];
4403 h->ref_count[1]= h->pps.ref_count[1];
4405 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4406 if(h->slice_type == B_TYPE){
4407 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4409 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4411 if(num_ref_idx_active_override_flag){
4412 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4413 if(h->slice_type==B_TYPE)
4414 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4416 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4417 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4423 if(!default_ref_list_done){
4424 fill_default_ref_list(h);
4427 if(decode_ref_pic_list_reordering(h) < 0)
4430 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4431 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4432 pred_weight_table(h);
4433 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4434 implicit_weight_table(h);
4438 if(s->current_picture.reference)
4439 decode_ref_pic_marking(h);
4441 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4442 h->cabac_init_idc = get_ue_golomb(&s->gb);
4444 h->last_qscale_diff = 0;
4445 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4446 if(s->qscale<0 || s->qscale>51){
4447 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4450 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4451 //FIXME qscale / qp ... stuff
4452 if(h->slice_type == SP_TYPE){
4453 get_bits1(&s->gb); /* sp_for_switch_flag */
4455 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4456 get_se_golomb(&s->gb); /* slice_qs_delta */
4459 h->deblocking_filter = 1;
4460 h->slice_alpha_c0_offset = 0;
4461 h->slice_beta_offset = 0;
4462 if( h->pps.deblocking_filter_parameters_present ) {
4463 h->deblocking_filter= get_ue_golomb(&s->gb);
4464 if(h->deblocking_filter < 2)
4465 h->deblocking_filter^= 1; // 1<->0
4467 if( h->deblocking_filter ) {
4468 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4469 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4472 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4473 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4474 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4475 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4476 h->deblocking_filter= 0;
4479 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4480 slice_group_change_cycle= get_bits(&s->gb, ?);
4485 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4486 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4488 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4490 av_get_pict_type_char(h->slice_type),
4491 pps_id, h->frame_num,
4492 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4493 h->ref_count[0], h->ref_count[1],
4495 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4497 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4507 static inline int get_level_prefix(GetBitContext *gb){
4511 OPEN_READER(re, gb);
4512 UPDATE_CACHE(re, gb);
4513 buf=GET_CACHE(re, gb);
4515 log= 32 - av_log2(buf);
4517 print_bin(buf>>(32-log), log);
4518 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4521 LAST_SKIP_BITS(re, gb, log);
4522 CLOSE_READER(re, gb);
4527 static inline int get_dct8x8_allowed(H264Context *h){
4530 if(!IS_SUB_8X8(h->sub_mb_type[i])
4531 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4538 * decodes a residual block.
4539 * @param n block index
4540 * @param scantable scantable
4541 * @param max_coeff number of coefficients in the block
4542 * @return <0 if an error occured
4544 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4545 MpegEncContext * const s = &h->s;
4546 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4548 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4550 //FIXME put trailing_onex into the context
4552 if(n == CHROMA_DC_BLOCK_INDEX){
4553 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4554 total_coeff= coeff_token>>2;
4556 if(n == LUMA_DC_BLOCK_INDEX){
4557 total_coeff= pred_non_zero_count(h, 0);
4558 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4559 total_coeff= coeff_token>>2;
4561 total_coeff= pred_non_zero_count(h, n);
4562 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4563 total_coeff= coeff_token>>2;
4564 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4568 //FIXME set last_non_zero?
4573 trailing_ones= coeff_token&3;
4574 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4575 assert(total_coeff<=16);
4577 for(i=0; i<trailing_ones; i++){
4578 level[i]= 1 - 2*get_bits1(gb);
4582 int level_code, mask;
4583 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4584 int prefix= get_level_prefix(gb);
4586 //first coefficient has suffix_length equal to 0 or 1
4587 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4589 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4591 level_code= (prefix<<suffix_length); //part
4592 }else if(prefix==14){
4594 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4596 level_code= prefix + get_bits(gb, 4); //part
4597 }else if(prefix==15){
4598 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4599 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4601 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4605 if(trailing_ones < 3) level_code += 2;
4610 mask= -(level_code&1);
4611 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4614 //remaining coefficients have suffix_length > 0
4615 for(;i<total_coeff;i++) {
4616 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4617 prefix = get_level_prefix(gb);
4619 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4620 }else if(prefix==15){
4621 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4623 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4626 mask= -(level_code&1);
4627 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4628 if(level_code > suffix_limit[suffix_length])
4633 if(total_coeff == max_coeff)
4636 if(n == CHROMA_DC_BLOCK_INDEX)
4637 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4639 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4642 coeff_num = zeros_left + total_coeff - 1;
4643 j = scantable[coeff_num];
4645 block[j] = level[0];
4646 for(i=1;i<total_coeff;i++) {
4649 else if(zeros_left < 7){
4650 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4652 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4654 zeros_left -= run_before;
4655 coeff_num -= 1 + run_before;
4656 j= scantable[ coeff_num ];
4661 block[j] = (level[0] * qmul[j] + 32)>>6;
4662 for(i=1;i<total_coeff;i++) {
4665 else if(zeros_left < 7){
4666 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4668 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4670 zeros_left -= run_before;
4671 coeff_num -= 1 + run_before;
4672 j= scantable[ coeff_num ];
4674 block[j]= (level[i] * qmul[j] + 32)>>6;
4679 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4687 * decodes a P_SKIP or B_SKIP macroblock
4689 static void decode_mb_skip(H264Context *h){
4690 MpegEncContext * const s = &h->s;
4691 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4694 memset(h->non_zero_count[mb_xy], 0, 16);
4695 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4697 if(h->mb_aff_frame && s->mb_skip_run==0 && (s->mb_y&1)==0){
4698 h->mb_field_decoding_flag= get_bits1(&s->gb);
4700 if(h->mb_field_decoding_flag)
4701 mb_type|= MB_TYPE_INTERLACED;
4703 if( h->slice_type == B_TYPE )
4705 // just for fill_caches. pred_direct_motion will set the real mb_type
4706 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4708 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4709 pred_direct_motion(h, &mb_type);
4711 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4712 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
4718 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4720 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4721 pred_pskip_motion(h, &mx, &my);
4722 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4723 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4725 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4728 write_back_motion(h, mb_type);
4729 s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP;
4730 s->current_picture.qscale_table[mb_xy]= s->qscale;
4731 h->slice_table[ mb_xy ]= h->slice_num;
4732 h->prev_mb_skipped= 1;
4736 * decodes a macroblock
4737 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4739 static int decode_mb_cavlc(H264Context *h){
4740 MpegEncContext * const s = &h->s;
4741 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4742 int mb_type, partition_count, cbp;
4743 int dct8x8_allowed= h->pps.transform_8x8_mode;
4745 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4747 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4748 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4750 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4751 if(s->mb_skip_run==-1)
4752 s->mb_skip_run= get_ue_golomb(&s->gb);
4754 if (s->mb_skip_run--) {
4759 if(h->mb_aff_frame){
4760 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
4761 h->mb_field_decoding_flag = get_bits1(&s->gb);
4763 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4765 h->prev_mb_skipped= 0;
4767 mb_type= get_ue_golomb(&s->gb);
4768 if(h->slice_type == B_TYPE){
4770 partition_count= b_mb_type_info[mb_type].partition_count;
4771 mb_type= b_mb_type_info[mb_type].type;
4774 goto decode_intra_mb;
4776 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4778 partition_count= p_mb_type_info[mb_type].partition_count;
4779 mb_type= p_mb_type_info[mb_type].type;
4782 goto decode_intra_mb;
4785 assert(h->slice_type == I_TYPE);
4788 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4792 cbp= i_mb_type_info[mb_type].cbp;
4793 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4794 mb_type= i_mb_type_info[mb_type].type;
4797 if(h->mb_field_decoding_flag)
4798 mb_type |= MB_TYPE_INTERLACED;
4800 h->slice_table[ mb_xy ]= h->slice_num;
4802 if(IS_INTRA_PCM(mb_type)){
4805 // we assume these blocks are very rare so we dont optimize it
4806 align_get_bits(&s->gb);
4808 // The pixels are stored in the same order as levels in h->mb array.
4809 for(y=0; y<16; y++){
4810 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4811 for(x=0; x<16; x++){
4812 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4813 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4817 const int index= 256 + 4*(y&3) + 32*(y>>2);
4819 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4820 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4824 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4826 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4827 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4831 // In deblocking, the quantizer is 0
4832 s->current_picture.qscale_table[mb_xy]= 0;
4833 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
4834 // All coeffs are present
4835 memset(h->non_zero_count[mb_xy], 16, 16);
4837 s->current_picture.mb_type[mb_xy]= mb_type;
4841 fill_caches(h, mb_type, 0);
4844 if(IS_INTRA(mb_type)){
4845 // init_top_left_availability(h);
4846 if(IS_INTRA4x4(mb_type)){
4849 if(dct8x8_allowed && get_bits1(&s->gb)){
4850 mb_type |= MB_TYPE_8x8DCT;
4854 // fill_intra4x4_pred_table(h);
4855 for(i=0; i<16; i+=di){
4856 const int mode_coded= !get_bits1(&s->gb);
4857 const int predicted_mode= pred_intra_mode(h, i);
4861 const int rem_mode= get_bits(&s->gb, 3);
4862 if(rem_mode<predicted_mode)
4867 mode= predicted_mode;
4871 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4873 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4875 write_back_intra_pred_mode(h);
4876 if( check_intra4x4_pred_mode(h) < 0)
4879 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4880 if(h->intra16x16_pred_mode < 0)
4883 h->chroma_pred_mode= get_ue_golomb(&s->gb);
4885 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
4886 if(h->chroma_pred_mode < 0)
4888 }else if(partition_count==4){
4889 int i, j, sub_partition_count[4], list, ref[2][4];
4891 if(h->slice_type == B_TYPE){
4893 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4894 if(h->sub_mb_type[i] >=13){
4895 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4898 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4899 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4901 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4902 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4903 pred_direct_motion(h, &mb_type);
4904 h->ref_cache[0][scan8[4]] =
4905 h->ref_cache[1][scan8[4]] =
4906 h->ref_cache[0][scan8[12]] =
4907 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4910 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4912 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4913 if(h->sub_mb_type[i] >=4){
4914 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4917 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4918 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4922 for(list=0; list<2; list++){
4923 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4924 if(ref_count == 0) continue;
4925 if (h->mb_aff_frame && h->mb_field_decoding_flag) {
4929 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4930 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4931 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4940 dct8x8_allowed = get_dct8x8_allowed(h);
4942 for(list=0; list<2; list++){
4943 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4944 if(ref_count == 0) continue;
4947 if(IS_DIRECT(h->sub_mb_type[i])) {
4948 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4951 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4952 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4954 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4955 const int sub_mb_type= h->sub_mb_type[i];
4956 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4957 for(j=0; j<sub_partition_count[i]; j++){
4959 const int index= 4*i + block_width*j;
4960 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4961 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4962 mx += get_se_golomb(&s->gb);
4963 my += get_se_golomb(&s->gb);
4964 tprintf("final mv:%d %d\n", mx, my);
4966 if(IS_SUB_8X8(sub_mb_type)){
4967 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
4968 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4969 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
4970 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4971 }else if(IS_SUB_8X4(sub_mb_type)){
4972 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
4973 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
4974 }else if(IS_SUB_4X8(sub_mb_type)){
4975 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
4976 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
4978 assert(IS_SUB_4X4(sub_mb_type));
4979 mv_cache[ 0 ][0]= mx;
4980 mv_cache[ 0 ][1]= my;
4984 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4990 }else if(IS_DIRECT(mb_type)){
4991 pred_direct_motion(h, &mb_type);
4992 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4994 int list, mx, my, i;
4995 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4996 if(IS_16X16(mb_type)){
4997 for(list=0; list<2; list++){
4998 if(h->ref_count[list]>0){
4999 if(IS_DIR(mb_type, 0, list)){
5000 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5001 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5003 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
5006 for(list=0; list<2; list++){
5007 if(IS_DIR(mb_type, 0, list)){
5008 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5009 mx += get_se_golomb(&s->gb);
5010 my += get_se_golomb(&s->gb);
5011 tprintf("final mv:%d %d\n", mx, my);
5013 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5015 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5018 else if(IS_16X8(mb_type)){
5019 for(list=0; list<2; list++){
5020 if(h->ref_count[list]>0){
5022 if(IS_DIR(mb_type, i, list)){
5023 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5024 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5026 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5030 for(list=0; list<2; list++){
5032 if(IS_DIR(mb_type, i, list)){
5033 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5034 mx += get_se_golomb(&s->gb);
5035 my += get_se_golomb(&s->gb);
5036 tprintf("final mv:%d %d\n", mx, my);
5038 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5040 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5044 assert(IS_8X16(mb_type));
5045 for(list=0; list<2; list++){
5046 if(h->ref_count[list]>0){
5048 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5049 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5050 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5052 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5056 for(list=0; list<2; list++){
5058 if(IS_DIR(mb_type, i, list)){
5059 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5060 mx += get_se_golomb(&s->gb);
5061 my += get_se_golomb(&s->gb);
5062 tprintf("final mv:%d %d\n", mx, my);
5064 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5066 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5072 if(IS_INTER(mb_type))
5073 write_back_motion(h, mb_type);
5075 if(!IS_INTRA16x16(mb_type)){
5076 cbp= get_ue_golomb(&s->gb);
5078 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5082 if(IS_INTRA4x4(mb_type))
5083 cbp= golomb_to_intra4x4_cbp[cbp];
5085 cbp= golomb_to_inter_cbp[cbp];
5088 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5089 if(get_bits1(&s->gb))
5090 mb_type |= MB_TYPE_8x8DCT;
5092 s->current_picture.mb_type[mb_xy]= mb_type;
5094 if(cbp || IS_INTRA16x16(mb_type)){
5095 int i8x8, i4x4, chroma_idx;
5096 int chroma_qp, dquant;
5097 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5098 const uint8_t *scan, *dc_scan;
5100 // fill_non_zero_count_cache(h);
5102 if(IS_INTERLACED(mb_type)){
5103 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5104 dc_scan= luma_dc_field_scan;
5106 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5107 dc_scan= luma_dc_zigzag_scan;
5110 dquant= get_se_golomb(&s->gb);
5112 if( dquant > 25 || dquant < -26 ){
5113 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5117 s->qscale += dquant;
5118 if(((unsigned)s->qscale) > 51){
5119 if(s->qscale<0) s->qscale+= 52;
5120 else s->qscale-= 52;
5123 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5124 if(IS_INTRA16x16(mb_type)){
5125 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5126 return -1; //FIXME continue if partitioned and other return -1 too
5129 assert((cbp&15) == 0 || (cbp&15) == 15);
5132 for(i8x8=0; i8x8<4; i8x8++){
5133 for(i4x4=0; i4x4<4; i4x4++){
5134 const int index= i4x4 + 4*i8x8;
5135 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5141 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5144 for(i8x8=0; i8x8<4; i8x8++){
5145 if(cbp & (1<<i8x8)){
5146 if(IS_8x8DCT(mb_type)){
5147 DCTELEM *buf = &h->mb[64*i8x8];
5149 for(i4x4=0; i4x4<4; i4x4++){
5150 if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4,
5151 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5154 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5155 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5157 for(i4x4=0; i4x4<4; i4x4++){
5158 const int index= i4x4 + 4*i8x8;
5160 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5166 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5167 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5173 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5174 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5180 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5181 for(i4x4=0; i4x4<4; i4x4++){
5182 const int index= 16 + 4*chroma_idx + i4x4;
5183 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5189 uint8_t * const nnz= &h->non_zero_count_cache[0];
5190 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5191 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5194 uint8_t * const nnz= &h->non_zero_count_cache[0];
5195 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5196 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5197 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5199 s->current_picture.qscale_table[mb_xy]= s->qscale;
5200 write_back_non_zero_count(h);
5205 static int decode_cabac_field_decoding_flag(H264Context *h) {
5206 MpegEncContext * const s = &h->s;
5207 const int mb_x = s->mb_x;
5208 const int mb_y = s->mb_y & ~1;
5209 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5210 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5212 unsigned int ctx = 0;
5214 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5217 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5221 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5224 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5225 uint8_t *state= &h->cabac_state[ctx_base];
5229 MpegEncContext * const s = &h->s;
5230 const int mba_xy = h->left_mb_xy[0];
5231 const int mbb_xy = h->top_mb_xy;
5233 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5235 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5237 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5238 return 0; /* I4x4 */
5241 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5242 return 0; /* I4x4 */
5245 if( get_cabac_terminate( &h->cabac ) )
5246 return 25; /* PCM */
5248 mb_type = 1; /* I16x16 */
5249 if( get_cabac( &h->cabac, &state[1] ) )
5250 mb_type += 12; /* cbp_luma != 0 */
5252 if( get_cabac( &h->cabac, &state[2] ) ) {
5253 if( get_cabac( &h->cabac, &state[2+intra_slice] ) )
5254 mb_type += 4 * 2; /* cbp_chroma == 2 */
5256 mb_type += 4 * 1; /* cbp_chroma == 1 */
5258 if( get_cabac( &h->cabac, &state[3+intra_slice] ) )
5260 if( get_cabac( &h->cabac, &state[3+2*intra_slice] ) )
5265 static int decode_cabac_mb_type( H264Context *h ) {
5266 MpegEncContext * const s = &h->s;
5268 if( h->slice_type == I_TYPE ) {
5269 return decode_cabac_intra_mb_type(h, 3, 1);
5270 } else if( h->slice_type == P_TYPE ) {
5271 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5273 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5274 if( get_cabac( &h->cabac, &h->cabac_state[16] ) == 0 )
5275 return 0; /* P_L0_D16x16; */
5277 return 3; /* P_8x8; */
5279 if( get_cabac( &h->cabac, &h->cabac_state[17] ) == 0 )
5280 return 2; /* P_L0_D8x16; */
5282 return 1; /* P_L0_D16x8; */
5285 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5287 } else if( h->slice_type == B_TYPE ) {
5288 const int mba_xy = h->left_mb_xy[0];
5289 const int mbb_xy = h->top_mb_xy;
5293 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] )
5294 && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5296 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )
5297 && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5300 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5301 return 0; /* B_Direct_16x16 */
5303 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5304 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5307 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5308 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5309 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5310 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5312 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5313 else if( bits == 13 ) {
5314 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5315 } else if( bits == 14 )
5316 return 11; /* B_L1_L0_8x16 */
5317 else if( bits == 15 )
5318 return 22; /* B_8x8 */
5320 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5321 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5323 /* TODO SI/SP frames? */
5328 static int decode_cabac_mb_skip( H264Context *h) {
5329 MpegEncContext * const s = &h->s;
5330 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5331 const int mba_xy = mb_xy - 1;
5332 const int mbb_xy = mb_xy - s->mb_stride;
5335 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5337 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5340 if( h->slice_type == P_TYPE || h->slice_type == SP_TYPE)
5341 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5343 return get_cabac( &h->cabac, &h->cabac_state[24+ctx] );
5346 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5349 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5352 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5353 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5354 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5356 if( mode >= pred_mode )
5362 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5363 const int mba_xy = h->left_mb_xy[0];
5364 const int mbb_xy = h->top_mb_xy;
5368 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5369 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5372 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5375 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5378 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5380 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5386 static const uint8_t block_idx_x[16] = {
5387 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5389 static const uint8_t block_idx_y[16] = {
5390 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5392 static const uint8_t block_idx_xy[4][4] = {
5399 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5400 MpegEncContext * const s = &h->s;
5405 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5411 x = block_idx_x[4*i8x8];
5412 y = block_idx_y[4*i8x8];
5416 else if( s->mb_x > 0 && (h->slice_table[h->left_mb_xy[0]] == h->slice_num)) {
5417 cbp_a = h->left_cbp;
5418 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5423 else if( s->mb_y > 0 && (h->slice_table[h->top_mb_xy] == h->slice_num)) {
5425 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5428 /* No need to test for skip as we put 0 for skip block */
5429 /* No need to test for IPCM as we put 1 for IPCM block */
5431 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5432 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5437 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5438 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5442 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5448 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5452 cbp_a = (h->left_cbp>>4)&0x03;
5453 cbp_b = (h-> top_cbp>>4)&0x03;
5456 if( cbp_a > 0 ) ctx++;
5457 if( cbp_b > 0 ) ctx += 2;
5458 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5462 if( cbp_a == 2 ) ctx++;
5463 if( cbp_b == 2 ) ctx += 2;
5464 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5466 static int decode_cabac_mb_dqp( H264Context *h) {
5467 MpegEncContext * const s = &h->s;
5473 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5475 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5477 if( h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) )
5480 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5486 if(val > 52) //prevent infinite loop
5493 return -(val + 1)/2;
5495 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5496 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5498 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5500 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5504 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5506 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5507 return 0; /* B_Direct_8x8 */
5508 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5509 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5511 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5512 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5513 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5516 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5517 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5521 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5522 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5525 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5526 int refa = h->ref_cache[list][scan8[n] - 1];
5527 int refb = h->ref_cache[list][scan8[n] - 8];
5531 if( h->slice_type == B_TYPE) {
5532 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5534 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5543 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5553 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5554 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5555 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5556 int ctxbase = (l == 0) ? 40 : 47;
5561 else if( amvd > 32 )
5566 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5571 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5579 while( get_cabac_bypass( &h->cabac ) ) {
5584 if( get_cabac_bypass( &h->cabac ) )
5588 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
5592 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5597 nza = h->left_cbp&0x100;
5598 nzb = h-> top_cbp&0x100;
5599 } else if( cat == 1 || cat == 2 ) {
5600 nza = h->non_zero_count_cache[scan8[idx] - 1];
5601 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5602 } else if( cat == 3 ) {
5603 nza = (h->left_cbp>>(6+idx))&0x01;
5604 nzb = (h-> top_cbp>>(6+idx))&0x01;
5607 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5608 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5617 return ctx + 4 * cat;
5620 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5621 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5622 static const int significant_coeff_flag_field_offset[2] = { 105, 277 };
5623 static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 };
5624 static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 };
5625 static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 };
5626 static const int coeff_abs_level_m1_offset[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 };
5627 static const int significant_coeff_flag_offset_8x8[63] = {
5628 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5629 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5630 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5631 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
5633 static const int last_coeff_flag_offset_8x8[63] = {
5634 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5635 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5636 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5637 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5643 int coeff_count = 0;
5646 int abslevelgt1 = 0;
5648 uint8_t *significant_coeff_ctx_base;
5649 uint8_t *last_coeff_ctx_base;
5650 uint8_t *abs_level_m1_ctx_base;
5652 /* cat: 0-> DC 16x16 n = 0
5653 * 1-> AC 16x16 n = luma4x4idx
5654 * 2-> Luma4x4 n = luma4x4idx
5655 * 3-> DC Chroma n = iCbCr
5656 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5657 * 5-> Luma8x8 n = 4 * luma8x8idx
5660 /* read coded block flag */
5662 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5663 if( cat == 1 || cat == 2 )
5664 h->non_zero_count_cache[scan8[n]] = 0;
5666 h->non_zero_count_cache[scan8[16+n]] = 0;
5672 significant_coeff_ctx_base = h->cabac_state
5673 + significant_coeff_flag_offset[cat]
5674 + significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5675 last_coeff_ctx_base = h->cabac_state
5676 + last_significant_coeff_flag_offset[cat]
5677 + last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5678 abs_level_m1_ctx_base = h->cabac_state
5679 + coeff_abs_level_m1_offset[cat];
5682 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5683 for(last= 0; last < coefs; last++) { \
5684 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5685 if( get_cabac( &h->cabac, sig_ctx )) { \
5686 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5687 index[coeff_count++] = last; \
5688 if( get_cabac( &h->cabac, last_ctx ) ) { \
5694 DECODE_SIGNIFICANCE( 63, significant_coeff_flag_offset_8x8[last],
5695 last_coeff_flag_offset_8x8[last] );
5697 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5699 if( last == max_coeff -1 ) {
5700 index[coeff_count++] = last;
5702 assert(coeff_count > 0);
5705 h->cbp_table[mb_xy] |= 0x100;
5706 else if( cat == 1 || cat == 2 )
5707 h->non_zero_count_cache[scan8[n]] = coeff_count;
5709 h->cbp_table[mb_xy] |= 0x40 << n;
5711 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5714 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5717 for( i = coeff_count - 1; i >= 0; i-- ) {
5718 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5719 int j= scantable[index[i]];
5721 if( get_cabac( &h->cabac, ctx ) == 0 ) {
5723 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
5726 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6;
5727 else block[j] = ( qmul[j] + 32) >> 6;
5733 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5734 while( coeff_abs < 15 && get_cabac( &h->cabac, ctx ) ) {
5738 if( coeff_abs >= 15 ) {
5740 while( get_cabac_bypass( &h->cabac ) ) {
5741 coeff_abs += 1 << j;
5746 if( get_cabac_bypass( &h->cabac ) )
5747 coeff_abs += 1 << j ;
5752 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
5753 else block[j] = coeff_abs;
5755 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5756 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5765 static void inline compute_mb_neighbors(H264Context *h)
5767 MpegEncContext * const s = &h->s;
5768 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5769 h->top_mb_xy = mb_xy - s->mb_stride;
5770 h->left_mb_xy[0] = mb_xy - 1;
5771 if(h->mb_aff_frame){
5772 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5773 const int top_pair_xy = pair_xy - s->mb_stride;
5774 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5775 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5776 const int curr_mb_frame_flag = !h->mb_field_decoding_flag;
5777 const int bottom = (s->mb_y & 1);
5779 ? !curr_mb_frame_flag // bottom macroblock
5780 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5782 h->top_mb_xy -= s->mb_stride;
5784 if (left_mb_frame_flag != curr_mb_frame_flag) {
5785 h->left_mb_xy[0] = pair_xy - 1;
5792 * decodes a macroblock
5793 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5795 static int decode_mb_cabac(H264Context *h) {
5796 MpegEncContext * const s = &h->s;
5797 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5798 int mb_type, partition_count, cbp = 0;
5799 int dct8x8_allowed= h->pps.transform_8x8_mode;
5801 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5803 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5804 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5805 /* read skip flags */
5806 if( decode_cabac_mb_skip( h ) ) {
5809 h->cbp_table[mb_xy] = 0;
5810 h->chroma_pred_mode_table[mb_xy] = 0;
5811 h->last_qscale_diff = 0;
5817 if(h->mb_aff_frame){
5818 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
5819 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5821 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5823 h->prev_mb_skipped = 0;
5825 compute_mb_neighbors(h);
5826 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5827 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5831 if( h->slice_type == B_TYPE ) {
5833 partition_count= b_mb_type_info[mb_type].partition_count;
5834 mb_type= b_mb_type_info[mb_type].type;
5837 goto decode_intra_mb;
5839 } else if( h->slice_type == P_TYPE ) {
5841 partition_count= p_mb_type_info[mb_type].partition_count;
5842 mb_type= p_mb_type_info[mb_type].type;
5845 goto decode_intra_mb;
5848 assert(h->slice_type == I_TYPE);
5850 partition_count = 0;
5851 cbp= i_mb_type_info[mb_type].cbp;
5852 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5853 mb_type= i_mb_type_info[mb_type].type;
5855 if(h->mb_field_decoding_flag)
5856 mb_type |= MB_TYPE_INTERLACED;
5858 h->slice_table[ mb_xy ]= h->slice_num;
5860 if(IS_INTRA_PCM(mb_type)) {
5864 // We assume these blocks are very rare so we dont optimize it.
5865 // FIXME The two following lines get the bitstream position in the cabac
5866 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5867 ptr= h->cabac.bytestream;
5868 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
5870 // The pixels are stored in the same order as levels in h->mb array.
5871 for(y=0; y<16; y++){
5872 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5873 for(x=0; x<16; x++){
5874 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
5875 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5879 const int index= 256 + 4*(y&3) + 32*(y>>2);
5881 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5882 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5886 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5888 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5889 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5893 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5895 // All blocks are present
5896 h->cbp_table[mb_xy] = 0x1ef;
5897 h->chroma_pred_mode_table[mb_xy] = 0;
5898 // In deblocking, the quantizer is 0
5899 s->current_picture.qscale_table[mb_xy]= 0;
5900 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5901 // All coeffs are present
5902 memset(h->non_zero_count[mb_xy], 16, 16);
5903 s->current_picture.mb_type[mb_xy]= mb_type;
5907 fill_caches(h, mb_type, 0);
5909 if( IS_INTRA( mb_type ) ) {
5911 if( IS_INTRA4x4( mb_type ) ) {
5912 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5913 mb_type |= MB_TYPE_8x8DCT;
5914 for( i = 0; i < 16; i+=4 ) {
5915 int pred = pred_intra_mode( h, i );
5916 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5917 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5920 for( i = 0; i < 16; i++ ) {
5921 int pred = pred_intra_mode( h, i );
5922 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5924 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5927 write_back_intra_pred_mode(h);
5928 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5930 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5931 if( h->intra16x16_pred_mode < 0 ) return -1;
5933 h->chroma_pred_mode_table[mb_xy] =
5934 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5936 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
5937 if( h->chroma_pred_mode < 0 ) return -1;
5938 } else if( partition_count == 4 ) {
5939 int i, j, sub_partition_count[4], list, ref[2][4];
5941 if( h->slice_type == B_TYPE ) {
5942 for( i = 0; i < 4; i++ ) {
5943 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5944 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5945 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5947 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5948 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5949 pred_direct_motion(h, &mb_type);
5950 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5951 for( i = 0; i < 4; i++ )
5952 if( IS_DIRECT(h->sub_mb_type[i]) )
5953 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5957 for( i = 0; i < 4; i++ ) {
5958 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5959 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5960 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5964 for( list = 0; list < 2; list++ ) {
5965 if( h->ref_count[list] > 0 ) {
5966 for( i = 0; i < 4; i++ ) {
5967 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5968 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5969 if( h->ref_count[list] > 1 )
5970 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5976 h->ref_cache[list][ scan8[4*i]+1 ]=
5977 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5983 dct8x8_allowed = get_dct8x8_allowed(h);
5985 for(list=0; list<2; list++){
5987 if(IS_DIRECT(h->sub_mb_type[i])){
5988 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5991 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5993 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5994 const int sub_mb_type= h->sub_mb_type[i];
5995 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5996 for(j=0; j<sub_partition_count[i]; j++){
5999 const int index= 4*i + block_width*j;
6000 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6001 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6002 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6004 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6005 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6006 tprintf("final mv:%d %d\n", mx, my);
6008 if(IS_SUB_8X8(sub_mb_type)){
6009 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
6010 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6011 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
6012 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6014 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
6015 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6016 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
6017 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6018 }else if(IS_SUB_8X4(sub_mb_type)){
6019 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
6020 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
6022 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
6023 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
6024 }else if(IS_SUB_4X8(sub_mb_type)){
6025 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
6026 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
6028 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
6029 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
6031 assert(IS_SUB_4X4(sub_mb_type));
6032 mv_cache[ 0 ][0]= mx;
6033 mv_cache[ 0 ][1]= my;
6035 mvd_cache[ 0 ][0]= mx - mpx;
6036 mvd_cache[ 0 ][1]= my - mpy;
6040 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6041 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6042 p[0] = p[1] = p[8] = p[9] = 0;
6043 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6047 } else if( IS_DIRECT(mb_type) ) {
6048 pred_direct_motion(h, &mb_type);
6049 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6050 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6051 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6053 int list, mx, my, i, mpx, mpy;
6054 if(IS_16X16(mb_type)){
6055 for(list=0; list<2; list++){
6056 if(IS_DIR(mb_type, 0, list)){
6057 if(h->ref_count[list] > 0 ){
6058 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6059 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6062 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6064 for(list=0; list<2; list++){
6065 if(IS_DIR(mb_type, 0, list)){
6066 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6068 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6069 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6070 tprintf("final mv:%d %d\n", mx, my);
6072 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6073 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6075 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6078 else if(IS_16X8(mb_type)){
6079 for(list=0; list<2; list++){
6080 if(h->ref_count[list]>0){
6082 if(IS_DIR(mb_type, i, list)){
6083 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6084 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6086 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6090 for(list=0; list<2; list++){
6092 if(IS_DIR(mb_type, i, list)){
6093 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6094 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6095 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6096 tprintf("final mv:%d %d\n", mx, my);
6098 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6099 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6101 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6102 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6107 assert(IS_8X16(mb_type));
6108 for(list=0; list<2; list++){
6109 if(h->ref_count[list]>0){
6111 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6112 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6113 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6115 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6119 for(list=0; list<2; list++){
6121 if(IS_DIR(mb_type, i, list)){
6122 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6123 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6124 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6126 tprintf("final mv:%d %d\n", mx, my);
6127 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6128 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6130 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6131 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6138 if( IS_INTER( mb_type ) ) {
6139 h->chroma_pred_mode_table[mb_xy] = 0;
6140 write_back_motion( h, mb_type );
6143 if( !IS_INTRA16x16( mb_type ) ) {
6144 cbp = decode_cabac_mb_cbp_luma( h );
6145 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6148 h->cbp_table[mb_xy] = cbp;
6150 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6151 if( decode_cabac_mb_transform_size( h ) )
6152 mb_type |= MB_TYPE_8x8DCT;
6154 s->current_picture.mb_type[mb_xy]= mb_type;
6156 if( cbp || IS_INTRA16x16( mb_type ) ) {
6157 const uint8_t *scan, *dc_scan;
6160 if(IS_INTERLACED(mb_type)){
6161 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6162 dc_scan= luma_dc_field_scan;
6164 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6165 dc_scan= luma_dc_zigzag_scan;
6168 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6169 if( dqp == INT_MIN ){
6170 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6174 if(((unsigned)s->qscale) > 51){
6175 if(s->qscale<0) s->qscale+= 52;
6176 else s->qscale-= 52;
6178 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6180 if( IS_INTRA16x16( mb_type ) ) {
6182 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6183 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6186 for( i = 0; i < 16; i++ ) {
6187 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6188 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6192 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6196 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6197 if( cbp & (1<<i8x8) ) {
6198 if( IS_8x8DCT(mb_type) ) {
6199 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6200 zigzag_scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6203 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6204 const int index = 4*i8x8 + i4x4;
6205 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6206 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6210 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6211 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6218 for( c = 0; c < 2; c++ ) {
6219 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6220 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6227 for( c = 0; c < 2; c++ ) {
6228 for( i = 0; i < 4; i++ ) {
6229 const int index = 16 + 4 * c + i;
6230 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6231 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6236 uint8_t * const nnz= &h->non_zero_count_cache[0];
6237 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6238 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6241 uint8_t * const nnz= &h->non_zero_count_cache[0];
6242 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6243 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6244 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6247 s->current_picture.qscale_table[mb_xy]= s->qscale;
6248 write_back_non_zero_count(h);
6254 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6256 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6257 const int alpha = alpha_table[index_a];
6258 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6263 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6264 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6266 /* 16px edge length, because bS=4 is triggered by being at
6267 * the edge of an intra MB, so all 4 bS are the same */
6268 for( d = 0; d < 16; d++ ) {
6269 const int p0 = pix[-1];
6270 const int p1 = pix[-2];
6271 const int p2 = pix[-3];
6273 const int q0 = pix[0];
6274 const int q1 = pix[1];
6275 const int q2 = pix[2];
6277 if( ABS( p0 - q0 ) < alpha &&
6278 ABS( p1 - p0 ) < beta &&
6279 ABS( q1 - q0 ) < beta ) {
6281 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6282 if( ABS( p2 - p0 ) < beta)
6284 const int p3 = pix[-4];
6286 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6287 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6288 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6291 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6293 if( ABS( q2 - q0 ) < beta)
6295 const int q3 = pix[3];
6297 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6298 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6299 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6302 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6306 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6307 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6309 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6315 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6317 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6318 const int alpha = alpha_table[index_a];
6319 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6324 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6325 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6327 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6331 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6333 for( i = 0; i < 16; i++, pix += stride) {
6339 int bS_index = (i >> 1);
6340 if (h->mb_field_decoding_flag) {
6342 bS_index |= (i & 1);
6345 if( bS[bS_index] == 0 ) {
6349 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6350 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6351 alpha = alpha_table[index_a];
6352 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6355 if( bS[bS_index] < 4 ) {
6356 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6357 /* 4px edge length */
6358 const int p0 = pix[-1];
6359 const int p1 = pix[-2];
6360 const int p2 = pix[-3];
6361 const int q0 = pix[0];
6362 const int q1 = pix[1];
6363 const int q2 = pix[2];
6365 if( ABS( p0 - q0 ) < alpha &&
6366 ABS( p1 - p0 ) < beta &&
6367 ABS( q1 - q0 ) < beta ) {
6371 if( ABS( p2 - p0 ) < beta ) {
6372 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6375 if( ABS( q2 - q0 ) < beta ) {
6376 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6380 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6381 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6382 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6383 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6386 /* 4px edge length */
6387 const int p0 = pix[-1];
6388 const int p1 = pix[-2];
6389 const int p2 = pix[-3];
6391 const int q0 = pix[0];
6392 const int q1 = pix[1];
6393 const int q2 = pix[2];
6395 if( ABS( p0 - q0 ) < alpha &&
6396 ABS( p1 - p0 ) < beta &&
6397 ABS( q1 - q0 ) < beta ) {
6399 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6400 if( ABS( p2 - p0 ) < beta)
6402 const int p3 = pix[-4];
6404 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6405 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6406 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6409 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6411 if( ABS( q2 - q0 ) < beta)
6413 const int q3 = pix[3];
6415 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6416 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6417 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6420 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6424 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6425 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6427 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6432 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp[2] ) {
6434 for( i = 0; i < 8; i++, pix += stride) {
6442 if( bS[bS_index] == 0 ) {
6446 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6447 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6448 alpha = alpha_table[index_a];
6449 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6450 if( bS[bS_index] < 4 ) {
6451 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6452 /* 2px edge length (because we use same bS than the one for luma) */
6453 const int p0 = pix[-1];
6454 const int p1 = pix[-2];
6455 const int q0 = pix[0];
6456 const int q1 = pix[1];
6458 if( ABS( p0 - q0 ) < alpha &&
6459 ABS( p1 - p0 ) < beta &&
6460 ABS( q1 - q0 ) < beta ) {
6461 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6463 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6464 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6465 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6468 const int p0 = pix[-1];
6469 const int p1 = pix[-2];
6470 const int q0 = pix[0];
6471 const int q1 = pix[1];
6473 if( ABS( p0 - q0 ) < alpha &&
6474 ABS( p1 - p0 ) < beta &&
6475 ABS( q1 - q0 ) < beta ) {
6477 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6478 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6479 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6485 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6487 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6488 const int alpha = alpha_table[index_a];
6489 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6490 const int pix_next = stride;
6495 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6496 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6498 /* 16px edge length, see filter_mb_edgev */
6499 for( d = 0; d < 16; d++ ) {
6500 const int p0 = pix[-1*pix_next];
6501 const int p1 = pix[-2*pix_next];
6502 const int p2 = pix[-3*pix_next];
6503 const int q0 = pix[0];
6504 const int q1 = pix[1*pix_next];
6505 const int q2 = pix[2*pix_next];
6507 if( ABS( p0 - q0 ) < alpha &&
6508 ABS( p1 - p0 ) < beta &&
6509 ABS( q1 - q0 ) < beta ) {
6511 const int p3 = pix[-4*pix_next];
6512 const int q3 = pix[ 3*pix_next];
6514 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6515 if( ABS( p2 - p0 ) < beta) {
6517 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6518 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6519 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6522 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6524 if( ABS( q2 - q0 ) < beta) {
6526 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6527 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6528 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6531 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6535 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6536 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6538 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6545 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6547 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6548 const int alpha = alpha_table[index_a];
6549 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6554 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6555 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6557 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6561 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6562 MpegEncContext * const s = &h->s;
6563 const int mb_xy= mb_x + mb_y*s->mb_stride;
6564 int first_vertical_edge_done = 0;
6566 /* FIXME: A given frame may occupy more than one position in
6567 * the reference list. So ref2frm should be populated with
6568 * frame numbers, not indices. */
6569 static const int ref2frm[18] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6571 //for sufficiently low qp, filtering wouldn't do anything
6572 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6573 if(!h->mb_aff_frame){
6574 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
6575 int qp = s->current_picture.qscale_table[mb_xy];
6577 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6578 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6584 // left mb is in picture
6585 && h->slice_table[mb_xy-1] != 255
6586 // and current and left pair do not have the same interlaced type
6587 && (IS_INTERLACED(s->current_picture.mb_type[mb_xy]) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6588 // and left mb is in the same slice if deblocking_filter == 2
6589 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6590 /* First vertical edge is different in MBAFF frames
6591 * There are 8 different bS to compute and 2 different Qp
6598 first_vertical_edge_done = 1;
6599 for( i = 0; i < 8; i++ ) {
6601 int b_idx= 8 + 4 + 8*y;
6602 int bn_idx= b_idx - 1;
6604 int mbn_xy = h->mb_field_decoding_flag ? h->left_mb_xy[i>>2] : h->left_mb_xy[i&1];
6606 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6607 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6609 } else if( h->non_zero_count_cache[b_idx] != 0 ||
6610 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6611 h->non_zero_count_cache[bn_idx] != 0 ) {
6616 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6617 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6618 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6619 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6626 if(bS[0]+bS[1]+bS[2]+bS[3] != 0) {
6627 // Do not use s->qscale as luma quantizer because it has not the same
6628 // value in IPCM macroblocks.
6629 qp[0] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[0]] + 1 ) >> 1;
6630 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6631 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[0]] ) + 1 ) >> 1;
6632 qp[1] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[1]] + 1 ) >> 1;
6633 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6634 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[1]] ) + 1 ) >> 1;
6637 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
6638 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6639 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6640 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
6641 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
6644 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6645 for( dir = 0; dir < 2; dir++ )
6648 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6649 const int mb_type = s->current_picture.mb_type[mb_xy];
6650 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6651 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6653 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6654 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6655 // how often to recheck mv-based bS when iterating between edges
6656 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6657 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6658 // how often to recheck mv-based bS when iterating along each edge
6659 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6661 if (first_vertical_edge_done) {
6663 first_vertical_edge_done = 0;
6666 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6670 for( edge = start; edge < edges; edge++ ) {
6671 /* mbn_xy: neighbor macroblock */
6672 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6673 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6677 if( (edge&1) && IS_8x8DCT(mb_type) )
6680 if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0)
6681 && !IS_INTERLACED(mb_type)
6682 && IS_INTERLACED(mbn_type)
6684 // This is a special case in the norm where the filtering must
6685 // be done twice (one each of the field) even if we are in a
6686 // frame macroblock.
6688 unsigned int tmp_linesize = 2 * linesize;
6689 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6690 int mbn_xy = mb_xy - 2 * s->mb_stride;
6694 if( IS_INTRA(mb_type) ||
6695 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6696 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6699 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6702 // Do not use s->qscale as luma quantizer because it has not the same
6703 // value in IPCM macroblocks.
6704 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6705 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6706 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6707 filter_mb_edgeh( h, &img_y[0], tmp_linesize, bS, qp );
6708 chroma_qp = ( h->chroma_qp +
6709 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6710 filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp );
6711 filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp );
6714 mbn_xy += s->mb_stride;
6715 if( IS_INTRA(mb_type) ||
6716 IS_INTRA(mbn_type) ) {
6717 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6720 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6723 // Do not use s->qscale as luma quantizer because it has not the same
6724 // value in IPCM macroblocks.
6725 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6726 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6727 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6728 filter_mb_edgeh( h, &img_y[linesize], tmp_linesize, bS, qp );
6729 chroma_qp = ( h->chroma_qp +
6730 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6731 filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6732 filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6735 if( IS_INTRA(mb_type) ||
6736 IS_INTRA(mbn_type) ) {
6739 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6740 || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6749 bS[0] = bS[1] = bS[2] = bS[3] = value;
6754 if( edge & mask_edge ) {
6755 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6758 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6759 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6760 int bn_idx= b_idx - (dir ? 8:1);
6762 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6763 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6764 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6765 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4;
6767 bS[0] = bS[1] = bS[2] = bS[3] = v;
6773 for( i = 0; i < 4; i++ ) {
6774 int x = dir == 0 ? edge : i;
6775 int y = dir == 0 ? i : edge;
6776 int b_idx= 8 + 4 + x + 8*y;
6777 int bn_idx= b_idx - (dir ? 8:1);
6779 if( h->non_zero_count_cache[b_idx] != 0 ||
6780 h->non_zero_count_cache[bn_idx] != 0 ) {
6786 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6787 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6788 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6789 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6797 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6802 // Do not use s->qscale as luma quantizer because it has not the same
6803 // value in IPCM macroblocks.
6804 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6805 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6806 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6807 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6809 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6810 if( (edge&1) == 0 ) {
6811 int chroma_qp = ( h->chroma_qp +
6812 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6813 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
6814 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
6817 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6818 if( (edge&1) == 0 ) {
6819 int chroma_qp = ( h->chroma_qp +
6820 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6821 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6822 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6829 static int decode_slice(H264Context *h){
6830 MpegEncContext * const s = &h->s;
6831 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6835 if( h->pps.cabac ) {
6839 align_get_bits( &s->gb );
6842 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
6843 ff_init_cabac_decoder( &h->cabac,
6844 s->gb.buffer + get_bits_count(&s->gb)/8,
6845 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6846 /* calculate pre-state */
6847 for( i= 0; i < 460; i++ ) {
6849 if( h->slice_type == I_TYPE )
6850 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6852 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6855 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6857 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6861 int ret = decode_mb_cabac(h);
6864 if(ret>=0) hl_decode_mb(h);
6866 /* XXX: useless as decode_mb_cabac it doesn't support that ... */
6867 if( ret >= 0 && h->mb_aff_frame ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6870 if(ret>=0) ret = decode_mb_cabac(h);
6872 if(ret>=0) hl_decode_mb(h);
6875 eos = get_cabac_terminate( &h->cabac );
6877 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
6878 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6879 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6883 if( ++s->mb_x >= s->mb_width ) {
6885 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6887 if(h->mb_aff_frame) {
6892 if( eos || s->mb_y >= s->mb_height ) {
6893 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6894 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6901 int ret = decode_mb_cavlc(h);
6903 if(ret>=0) hl_decode_mb(h);
6905 if(ret>=0 && h->mb_aff_frame){ //FIXME optimal? or let mb_decode decode 16x32 ?
6907 ret = decode_mb_cavlc(h);
6909 if(ret>=0) hl_decode_mb(h);
6914 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6915 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6920 if(++s->mb_x >= s->mb_width){
6922 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6924 if(h->mb_aff_frame) {
6927 if(s->mb_y >= s->mb_height){
6928 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6930 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6931 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6935 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6942 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6943 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6944 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6945 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6949 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6958 for(;s->mb_y < s->mb_height; s->mb_y++){
6959 for(;s->mb_x < s->mb_width; s->mb_x++){
6960 int ret= decode_mb(h);
6965 fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6966 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6971 if(++s->mb_x >= s->mb_width){
6973 if(++s->mb_y >= s->mb_height){
6974 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6975 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6979 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6986 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6987 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6988 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6992 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6999 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7002 return -1; //not reached
7005 static int decode_unregistered_user_data(H264Context *h, int size){
7006 MpegEncContext * const s = &h->s;
7007 uint8_t user_data[16+256];
7013 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7014 user_data[i]= get_bits(&s->gb, 8);
7018 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7019 if(e==1 && build>=0)
7020 h->x264_build= build;
7022 if(s->avctx->debug & FF_DEBUG_BUGS)
7023 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7026 skip_bits(&s->gb, 8);
7031 static int decode_sei(H264Context *h){
7032 MpegEncContext * const s = &h->s;
7034 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7039 type+= show_bits(&s->gb, 8);
7040 }while(get_bits(&s->gb, 8) == 255);
7044 size+= show_bits(&s->gb, 8);
7045 }while(get_bits(&s->gb, 8) == 255);
7049 if(decode_unregistered_user_data(h, size) < 0);
7053 skip_bits(&s->gb, 8*size);
7056 //FIXME check bits here
7057 align_get_bits(&s->gb);
7063 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7064 MpegEncContext * const s = &h->s;
7066 cpb_count = get_ue_golomb(&s->gb) + 1;
7067 get_bits(&s->gb, 4); /* bit_rate_scale */
7068 get_bits(&s->gb, 4); /* cpb_size_scale */
7069 for(i=0; i<cpb_count; i++){
7070 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7071 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7072 get_bits1(&s->gb); /* cbr_flag */
7074 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7075 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7076 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7077 get_bits(&s->gb, 5); /* time_offset_length */
7080 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7081 MpegEncContext * const s = &h->s;
7082 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7083 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7085 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7087 if( aspect_ratio_info_present_flag ) {
7088 aspect_ratio_idc= get_bits(&s->gb, 8);
7089 if( aspect_ratio_idc == EXTENDED_SAR ) {
7090 sps->sar.num= get_bits(&s->gb, 16);
7091 sps->sar.den= get_bits(&s->gb, 16);
7092 }else if(aspect_ratio_idc < 16){
7093 sps->sar= pixel_aspect[aspect_ratio_idc];
7095 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7102 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7104 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7105 get_bits1(&s->gb); /* overscan_appropriate_flag */
7108 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7109 get_bits(&s->gb, 3); /* video_format */
7110 get_bits1(&s->gb); /* video_full_range_flag */
7111 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7112 get_bits(&s->gb, 8); /* colour_primaries */
7113 get_bits(&s->gb, 8); /* transfer_characteristics */
7114 get_bits(&s->gb, 8); /* matrix_coefficients */
7118 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7119 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7120 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7123 sps->timing_info_present_flag = get_bits1(&s->gb);
7124 if(sps->timing_info_present_flag){
7125 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7126 sps->time_scale = get_bits_long(&s->gb, 32);
7127 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7130 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7131 if(nal_hrd_parameters_present_flag)
7132 decode_hrd_parameters(h, sps);
7133 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7134 if(vcl_hrd_parameters_present_flag)
7135 decode_hrd_parameters(h, sps);
7136 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7137 get_bits1(&s->gb); /* low_delay_hrd_flag */
7138 get_bits1(&s->gb); /* pic_struct_present_flag */
7140 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7141 if(sps->bitstream_restriction_flag){
7142 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7143 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7144 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7145 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7146 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7147 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7148 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7154 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7155 const uint8_t *jvt_list, const uint8_t *fallback_list){
7156 MpegEncContext * const s = &h->s;
7157 int i, last = 8, next = 8;
7158 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7159 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7160 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7162 for(i=0;i<size;i++){
7164 next = (last + get_se_golomb(&s->gb)) & 0xff;
7165 if(!i && !next){ /* matrix not written, we use the preset one */
7166 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7169 last = factors[scan[i]] = next ? next : last;
7173 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7174 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7175 MpegEncContext * const s = &h->s;
7176 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7177 const uint8_t *fallback[4] = {
7178 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7179 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7180 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7181 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7183 if(get_bits1(&s->gb)){
7184 sps->scaling_matrix_present |= is_sps;
7185 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7186 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7187 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7188 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7189 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7190 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7191 if(is_sps || pps->transform_8x8_mode){
7192 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7193 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7195 } else if(fallback_sps) {
7196 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7197 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7201 static inline int decode_seq_parameter_set(H264Context *h){
7202 MpegEncContext * const s = &h->s;
7203 int profile_idc, level_idc;
7207 profile_idc= get_bits(&s->gb, 8);
7208 get_bits1(&s->gb); //constraint_set0_flag
7209 get_bits1(&s->gb); //constraint_set1_flag
7210 get_bits1(&s->gb); //constraint_set2_flag
7211 get_bits1(&s->gb); //constraint_set3_flag
7212 get_bits(&s->gb, 4); // reserved
7213 level_idc= get_bits(&s->gb, 8);
7214 sps_id= get_ue_golomb(&s->gb);
7216 sps= &h->sps_buffer[ sps_id ];
7217 sps->profile_idc= profile_idc;
7218 sps->level_idc= level_idc;
7220 if(sps->profile_idc >= 100){ //high profile
7221 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7222 get_bits1(&s->gb); //residual_color_transform_flag
7223 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7224 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7225 sps->transform_bypass = get_bits1(&s->gb);
7226 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7228 sps->scaling_matrix_present = 0;
7230 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7231 sps->poc_type= get_ue_golomb(&s->gb);
7233 if(sps->poc_type == 0){ //FIXME #define
7234 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7235 } else if(sps->poc_type == 1){//FIXME #define
7236 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7237 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7238 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7239 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7241 for(i=0; i<sps->poc_cycle_length; i++)
7242 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7244 if(sps->poc_type > 2){
7245 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7249 sps->ref_frame_count= get_ue_golomb(&s->gb);
7250 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7251 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7253 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7254 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7255 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7256 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7257 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7260 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7261 if(!sps->frame_mbs_only_flag)
7262 sps->mb_aff= get_bits1(&s->gb);
7266 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7268 sps->crop= get_bits1(&s->gb);
7270 sps->crop_left = get_ue_golomb(&s->gb);
7271 sps->crop_right = get_ue_golomb(&s->gb);
7272 sps->crop_top = get_ue_golomb(&s->gb);
7273 sps->crop_bottom= get_ue_golomb(&s->gb);
7274 if(sps->crop_left || sps->crop_top){
7275 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7281 sps->crop_bottom= 0;
7284 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7285 if( sps->vui_parameters_present_flag )
7286 decode_vui_parameters(h, sps);
7288 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7289 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7290 sps_id, sps->profile_idc, sps->level_idc,
7292 sps->ref_frame_count,
7293 sps->mb_width, sps->mb_height,
7294 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7295 sps->direct_8x8_inference_flag ? "8B8" : "",
7296 sps->crop_left, sps->crop_right,
7297 sps->crop_top, sps->crop_bottom,
7298 sps->vui_parameters_present_flag ? "VUI" : ""
7304 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7305 MpegEncContext * const s = &h->s;
7306 int pps_id= get_ue_golomb(&s->gb);
7307 PPS *pps= &h->pps_buffer[pps_id];
7309 pps->sps_id= get_ue_golomb(&s->gb);
7310 pps->cabac= get_bits1(&s->gb);
7311 pps->pic_order_present= get_bits1(&s->gb);
7312 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7313 if(pps->slice_group_count > 1 ){
7314 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7315 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7316 switch(pps->mb_slice_group_map_type){
7319 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7320 | run_length[ i ] |1 |ue(v) |
7325 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7327 | top_left_mb[ i ] |1 |ue(v) |
7328 | bottom_right_mb[ i ] |1 |ue(v) |
7336 | slice_group_change_direction_flag |1 |u(1) |
7337 | slice_group_change_rate_minus1 |1 |ue(v) |
7342 | slice_group_id_cnt_minus1 |1 |ue(v) |
7343 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7345 | slice_group_id[ i ] |1 |u(v) |
7350 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7351 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7352 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7353 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7357 pps->weighted_pred= get_bits1(&s->gb);
7358 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7359 pps->init_qp= get_se_golomb(&s->gb) + 26;
7360 pps->init_qs= get_se_golomb(&s->gb) + 26;
7361 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7362 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7363 pps->constrained_intra_pred= get_bits1(&s->gb);
7364 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7366 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7367 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7369 if(get_bits_count(&s->gb) < bit_length){
7370 pps->transform_8x8_mode= get_bits1(&s->gb);
7371 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7372 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7375 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7376 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7377 pps_id, pps->sps_id,
7378 pps->cabac ? "CABAC" : "CAVLC",
7379 pps->slice_group_count,
7380 pps->ref_count[0], pps->ref_count[1],
7381 pps->weighted_pred ? "weighted" : "",
7382 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7383 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7384 pps->constrained_intra_pred ? "CONSTR" : "",
7385 pps->redundant_pic_cnt_present ? "REDU" : "",
7386 pps->transform_8x8_mode ? "8x8DCT" : ""
7394 * finds the end of the current frame in the bitstream.
7395 * @return the position of the first byte of the next frame, or -1
7397 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7400 ParseContext *pc = &(h->s.parse_context);
7401 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7402 // mb_addr= pc->mb_addr - 1;
7404 for(i=0; i<=buf_size; i++){
7405 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7406 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7407 if(pc->frame_start_found){
7408 // If there isn't one more byte in the buffer
7409 // the test on first_mb_in_slice cannot be done yet
7410 // do it at next call.
7411 if (i >= buf_size) break;
7412 if (buf[i] & 0x80) {
7413 // first_mb_in_slice is 0, probably the first nal of a new
7415 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7417 pc->frame_start_found= 0;
7421 pc->frame_start_found = 1;
7423 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7424 if(pc->frame_start_found){
7426 pc->frame_start_found= 0;
7431 state= (state<<8) | buf[i];
7435 return END_NOT_FOUND;
7438 static int h264_parse(AVCodecParserContext *s,
7439 AVCodecContext *avctx,
7440 uint8_t **poutbuf, int *poutbuf_size,
7441 const uint8_t *buf, int buf_size)
7443 H264Context *h = s->priv_data;
7444 ParseContext *pc = &h->s.parse_context;
7447 next= find_frame_end(h, buf, buf_size);
7449 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
7455 *poutbuf = (uint8_t *)buf;
7456 *poutbuf_size = buf_size;
7460 static int h264_split(AVCodecContext *avctx,
7461 const uint8_t *buf, int buf_size)
7464 uint32_t state = -1;
7467 for(i=0; i<=buf_size; i++){
7468 if((state&0xFFFFFF1F) == 0x107)
7470 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7472 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
7474 while(i>4 && buf[i-5]==0) i--;
7479 state= (state<<8) | buf[i];
7485 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7486 MpegEncContext * const s = &h->s;
7487 AVCodecContext * const avctx= s->avctx;
7491 for(i=0; i<50; i++){
7492 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7496 s->current_picture_ptr= NULL;
7505 if(buf_index >= buf_size) break;
7507 for(i = 0; i < h->nal_length_size; i++)
7508 nalsize = (nalsize << 8) | buf[buf_index++];
7510 // start code prefix search
7511 for(; buf_index + 3 < buf_size; buf_index++){
7512 // this should allways succeed in the first iteration
7513 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7517 if(buf_index+3 >= buf_size) break;
7522 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7523 if(ptr[dst_length - 1] == 0) dst_length--;
7524 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
7526 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7527 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7530 if (h->is_avc && (nalsize != consumed))
7531 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7533 buf_index += consumed;
7535 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
7536 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7539 switch(h->nal_unit_type){
7541 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7543 init_get_bits(&s->gb, ptr, bit_length);
7545 h->inter_gb_ptr= &s->gb;
7546 s->data_partitioning = 0;
7548 if(decode_slice_header(h) < 0){
7549 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7552 if(h->redundant_pic_count==0 && s->hurry_up < 5
7553 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7554 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7555 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7556 && avctx->skip_frame < AVDISCARD_ALL)
7560 init_get_bits(&s->gb, ptr, bit_length);
7562 h->inter_gb_ptr= NULL;
7563 s->data_partitioning = 1;
7565 if(decode_slice_header(h) < 0){
7566 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7570 init_get_bits(&h->intra_gb, ptr, bit_length);
7571 h->intra_gb_ptr= &h->intra_gb;
7574 init_get_bits(&h->inter_gb, ptr, bit_length);
7575 h->inter_gb_ptr= &h->inter_gb;
7577 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
7579 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7580 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7581 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7582 && avctx->skip_frame < AVDISCARD_ALL)
7586 init_get_bits(&s->gb, ptr, bit_length);
7590 init_get_bits(&s->gb, ptr, bit_length);
7591 decode_seq_parameter_set(h);
7593 if(s->flags& CODEC_FLAG_LOW_DELAY)
7596 if(avctx->has_b_frames < 2)
7597 avctx->has_b_frames= !s->low_delay;
7600 init_get_bits(&s->gb, ptr, bit_length);
7602 decode_picture_parameter_set(h, bit_length);
7606 case NAL_END_SEQUENCE:
7607 case NAL_END_STREAM:
7608 case NAL_FILLER_DATA:
7610 case NAL_AUXILIARY_SLICE:
7613 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
7617 if(!s->current_picture_ptr) return buf_index; //no frame
7619 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7620 s->current_picture_ptr->pict_type= s->pict_type;
7621 s->current_picture_ptr->key_frame= s->pict_type == I_TYPE && h->nal_unit_type == NAL_IDR_SLICE;
7623 h->prev_frame_num_offset= h->frame_num_offset;
7624 h->prev_frame_num= h->frame_num;
7625 if(s->current_picture_ptr->reference){
7626 h->prev_poc_msb= h->poc_msb;
7627 h->prev_poc_lsb= h->poc_lsb;
7629 if(s->current_picture_ptr->reference)
7630 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7640 * returns the number of bytes consumed for building the current frame
7642 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7643 if(s->flags&CODEC_FLAG_TRUNCATED){
7644 pos -= s->parse_context.last_index;
7645 if(pos<0) pos=0; // FIXME remove (unneeded?)
7649 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
7650 if(pos+10>buf_size) pos=buf_size; // oops ;)
7656 static int decode_frame(AVCodecContext *avctx,
7657 void *data, int *data_size,
7658 uint8_t *buf, int buf_size)
7660 H264Context *h = avctx->priv_data;
7661 MpegEncContext *s = &h->s;
7662 AVFrame *pict = data;
7665 s->flags= avctx->flags;
7666 s->flags2= avctx->flags2;
7668 /* no supplementary picture */
7669 if (buf_size == 0) {
7673 if(s->flags&CODEC_FLAG_TRUNCATED){
7674 int next= find_frame_end(h, buf, buf_size);
7676 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7678 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7681 if(h->is_avc && !h->got_avcC) {
7682 int i, cnt, nalsize;
7683 unsigned char *p = avctx->extradata;
7684 if(avctx->extradata_size < 7) {
7685 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7689 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7692 /* sps and pps in the avcC always have length coded with 2 bytes,
7693 so put a fake nal_length_size = 2 while parsing them */
7694 h->nal_length_size = 2;
7695 // Decode sps from avcC
7696 cnt = *(p+5) & 0x1f; // Number of sps
7698 for (i = 0; i < cnt; i++) {
7699 nalsize = BE_16(p) + 2;
7700 if(decode_nal_units(h, p, nalsize) < 0) {
7701 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7706 // Decode pps from avcC
7707 cnt = *(p++); // Number of pps
7708 for (i = 0; i < cnt; i++) {
7709 nalsize = BE_16(p) + 2;
7710 if(decode_nal_units(h, p, nalsize) != nalsize) {
7711 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7716 // Now store right nal length size, that will be use to parse all other nals
7717 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7718 // Do not reparse avcC
7722 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
7723 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7727 buf_index=decode_nal_units(h, buf, buf_size);
7731 //FIXME do something with unavailable reference frames
7733 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
7734 if(!s->current_picture_ptr){
7735 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
7740 Picture *out = s->current_picture_ptr;
7741 #if 0 //decode order
7742 *data_size = sizeof(AVFrame);
7744 /* Sort B-frames into display order */
7745 Picture *cur = s->current_picture_ptr;
7746 Picture *prev = h->delayed_output_pic;
7751 int dropped_frame = 0;
7754 if(h->sps.bitstream_restriction_flag
7755 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7756 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7760 while(h->delayed_pic[pics]) pics++;
7761 h->delayed_pic[pics++] = cur;
7762 if(cur->reference == 0)
7765 for(i=0; h->delayed_pic[i]; i++)
7766 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7769 out = h->delayed_pic[0];
7770 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7771 if(h->delayed_pic[i]->poc < out->poc){
7772 out = h->delayed_pic[i];
7776 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7777 if(prev && pics <= s->avctx->has_b_frames)
7779 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7781 ((!cross_idr && prev && out->poc > prev->poc + 2)
7782 || cur->pict_type == B_TYPE)))
7785 s->avctx->has_b_frames++;
7788 else if(out_of_order)
7791 if(out_of_order || pics > s->avctx->has_b_frames){
7792 dropped_frame = (out != h->delayed_pic[out_idx]);
7793 for(i=out_idx; h->delayed_pic[i]; i++)
7794 h->delayed_pic[i] = h->delayed_pic[i+1];
7797 if(prev == out && !dropped_frame)
7800 *data_size = sizeof(AVFrame);
7801 if(prev && prev != out && prev->reference == 1)
7802 prev->reference = 0;
7803 h->delayed_output_pic = out;
7807 *pict= *(AVFrame*)out;
7809 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7812 assert(pict->data[0] || !*data_size);
7813 ff_print_debug_info(s, pict);
7814 //printf("out %d\n", (int)pict->data[0]);
7817 /* Return the Picture timestamp as the frame number */
7818 /* we substract 1 because it is added on utils.c */
7819 avctx->frame_number = s->picture_number - 1;
7821 return get_consumed_bytes(s, buf_index, buf_size);
7824 static inline void fill_mb_avail(H264Context *h){
7825 MpegEncContext * const s = &h->s;
7826 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7829 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7830 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7831 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7837 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7838 h->mb_avail[4]= 1; //FIXME move out
7839 h->mb_avail[5]= 0; //FIXME move out
7845 #define SIZE (COUNT*40)
7851 // int int_temp[10000];
7853 AVCodecContext avctx;
7855 dsputil_init(&dsp, &avctx);
7857 init_put_bits(&pb, temp, SIZE);
7858 printf("testing unsigned exp golomb\n");
7859 for(i=0; i<COUNT; i++){
7861 set_ue_golomb(&pb, i);
7862 STOP_TIMER("set_ue_golomb");
7864 flush_put_bits(&pb);
7866 init_get_bits(&gb, temp, 8*SIZE);
7867 for(i=0; i<COUNT; i++){
7870 s= show_bits(&gb, 24);
7873 j= get_ue_golomb(&gb);
7875 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7878 STOP_TIMER("get_ue_golomb");
7882 init_put_bits(&pb, temp, SIZE);
7883 printf("testing signed exp golomb\n");
7884 for(i=0; i<COUNT; i++){
7886 set_se_golomb(&pb, i - COUNT/2);
7887 STOP_TIMER("set_se_golomb");
7889 flush_put_bits(&pb);
7891 init_get_bits(&gb, temp, 8*SIZE);
7892 for(i=0; i<COUNT; i++){
7895 s= show_bits(&gb, 24);
7898 j= get_se_golomb(&gb);
7899 if(j != i - COUNT/2){
7900 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7903 STOP_TIMER("get_se_golomb");
7906 printf("testing 4x4 (I)DCT\n");
7909 uint8_t src[16], ref[16];
7910 uint64_t error= 0, max_error=0;
7912 for(i=0; i<COUNT; i++){
7914 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7915 for(j=0; j<16; j++){
7916 ref[j]= random()%255;
7917 src[j]= random()%255;
7920 h264_diff_dct_c(block, src, ref, 4);
7923 for(j=0; j<16; j++){
7924 // printf("%d ", block[j]);
7925 block[j]= block[j]*4;
7926 if(j&1) block[j]= (block[j]*4 + 2)/5;
7927 if(j&4) block[j]= (block[j]*4 + 2)/5;
7931 s->dsp.h264_idct_add(ref, block, 4);
7932 /* for(j=0; j<16; j++){
7933 printf("%d ", ref[j]);
7937 for(j=0; j<16; j++){
7938 int diff= ABS(src[j] - ref[j]);
7941 max_error= FFMAX(max_error, diff);
7944 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7946 printf("testing quantizer\n");
7947 for(qp=0; qp<52; qp++){
7949 src1_block[i]= src2_block[i]= random()%255;
7953 printf("Testing NAL layer\n");
7955 uint8_t bitstream[COUNT];
7956 uint8_t nal[COUNT*2];
7958 memset(&h, 0, sizeof(H264Context));
7960 for(i=0; i<COUNT; i++){
7968 for(j=0; j<COUNT; j++){
7969 bitstream[j]= (random() % 255) + 1;
7972 for(j=0; j<zeros; j++){
7973 int pos= random() % COUNT;
7974 while(bitstream[pos] == 0){
7983 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7985 printf("encoding failed\n");
7989 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7993 if(out_length != COUNT){
7994 printf("incorrect length %d %d\n", out_length, COUNT);
7998 if(consumed != nal_length){
7999 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8003 if(memcmp(bitstream, out, COUNT)){
8004 printf("missmatch\n");
8009 printf("Testing RBSP\n");
8017 static int decode_end(AVCodecContext *avctx)
8019 H264Context *h = avctx->priv_data;
8020 MpegEncContext *s = &h->s;
8022 av_freep(&h->rbsp_buffer);
8023 free_tables(h); //FIXME cleanup init stuff perhaps
8026 // memset(h, 0, sizeof(H264Context));
8032 AVCodec h264_decoder = {
8036 sizeof(H264Context),
8041 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8045 AVCodecParser h264_parser = {
8047 sizeof(H264Context),