2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
58 * Sequence parameter set
64 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
65 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
66 int poc_type; ///< pic_order_cnt_type
67 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
68 int delta_pic_order_always_zero_flag;
69 int offset_for_non_ref_pic;
70 int offset_for_top_to_bottom_field;
71 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
72 int ref_frame_count; ///< num_ref_frames
73 int gaps_in_frame_num_allowed_flag;
74 int mb_width; ///< frame_width_in_mbs_minus1 + 1
75 int mb_height; ///< frame_height_in_mbs_minus1 + 1
76 int frame_mbs_only_flag;
77 int mb_aff; ///<mb_adaptive_frame_field_flag
78 int direct_8x8_inference_flag;
79 int crop; ///< frame_cropping_flag
80 int crop_left; ///< frame_cropping_rect_left_offset
81 int crop_right; ///< frame_cropping_rect_right_offset
82 int crop_top; ///< frame_cropping_rect_top_offset
83 int crop_bottom; ///< frame_cropping_rect_bottom_offset
84 int vui_parameters_present_flag;
86 int timing_info_present_flag;
87 uint32_t num_units_in_tick;
89 int fixed_frame_rate_flag;
90 short offset_for_ref_frame[256]; //FIXME dyn aloc?
91 int bitstream_restriction_flag;
92 int num_reorder_frames;
93 int scaling_matrix_present;
94 uint8_t scaling_matrix4[6][16];
95 uint8_t scaling_matrix8[2][64];
99 * Picture parameter set
103 int cabac; ///< entropy_coding_mode_flag
104 int pic_order_present; ///< pic_order_present_flag
105 int slice_group_count; ///< num_slice_groups_minus1 + 1
106 int mb_slice_group_map_type;
107 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
108 int weighted_pred; ///< weighted_pred_flag
109 int weighted_bipred_idc;
110 int init_qp; ///< pic_init_qp_minus26 + 26
111 int init_qs; ///< pic_init_qs_minus26 + 26
112 int chroma_qp_index_offset;
113 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
114 int constrained_intra_pred; ///< constrained_intra_pred_flag
115 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
116 int transform_8x8_mode; ///< transform_8x8_mode_flag
117 uint8_t scaling_matrix4[6][16];
118 uint8_t scaling_matrix8[2][64];
122 * Memory management control operation opcode.
124 typedef enum MMCOOpcode{
135 * Memory management control operation.
146 typedef struct H264Context{
154 #define NAL_IDR_SLICE 5
159 #define NAL_END_SEQUENCE 10
160 #define NAL_END_STREAM 11
161 #define NAL_FILLER_DATA 12
162 #define NAL_SPS_EXT 13
163 #define NAL_AUXILIARY_SLICE 19
164 uint8_t *rbsp_buffer;
165 int rbsp_buffer_size;
168 * Used to parse AVC variant of h264
170 int is_avc; ///< this flag is != 0 if codec is avc1
171 int got_avcC; ///< flag used to parse avcC data only once
172 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
176 int prev_mb_skipped; //FIXME remove (IMHO not used)
179 int chroma_pred_mode;
180 int intra16x16_pred_mode;
185 int8_t intra4x4_pred_mode_cache[5*8];
186 int8_t (*intra4x4_pred_mode)[8];
187 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
188 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
189 void (*pred8x8 [4+3])(uint8_t *src, int stride);
190 void (*pred16x16[4+3])(uint8_t *src, int stride);
191 unsigned int topleft_samples_available;
192 unsigned int top_samples_available;
193 unsigned int topright_samples_available;
194 unsigned int left_samples_available;
195 uint8_t (*top_borders[2])[16+2*8];
196 uint8_t left_border[2*(17+2*9)];
199 * non zero coeff count cache.
200 * is 64 if not available.
202 uint8_t non_zero_count_cache[6*8] __align8;
203 uint8_t (*non_zero_count)[16];
206 * Motion vector cache.
208 int16_t mv_cache[2][5*8][2] __align8;
209 int8_t ref_cache[2][5*8] __align8;
210 #define LIST_NOT_USED -1 //FIXME rename?
211 #define PART_NOT_AVAILABLE -2
214 * is 1 if the specific list MV&references are set to 0,0,-2.
216 int mv_cache_clean[2];
219 * number of neighbors (top and/or left) that used 8x8 dct
221 int neighbor_transform_size;
224 * block_offset[ 0..23] for frame macroblocks
225 * block_offset[24..47] for field macroblocks
227 int block_offset[2*(16+8)];
229 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
231 int b_stride; //FIXME use s->b4_stride
237 int unknown_svq3_flag;
238 int next_slice_index;
240 SPS sps_buffer[MAX_SPS_COUNT];
241 SPS sps; ///< current sps
243 PPS pps_buffer[MAX_PPS_COUNT];
247 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
249 uint32_t dequant4_buffer[6][52][16];
250 uint32_t dequant8_buffer[2][52][64];
251 uint32_t (*dequant4_coeff[6])[16];
252 uint32_t (*dequant8_coeff[2])[64];
253 int dequant_coeff_pps; ///< reinit tables when pps changes
256 uint8_t *slice_table_base;
257 uint8_t *slice_table; ///< slice_table_base + mb_stride + 1
259 int slice_type_fixed;
261 //interlacing specific flags
263 int mb_field_decoding_flag;
270 int delta_poc_bottom;
273 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
274 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
275 int frame_num_offset; ///< for POC type 2
276 int prev_frame_num_offset; ///< for POC type 2
277 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
280 * frame_num for frames or 2*frame_num for field pics.
285 * max_frame_num or 2*max_frame_num for field pics.
289 //Weighted pred stuff
291 int use_weight_chroma;
292 int luma_log2_weight_denom;
293 int chroma_log2_weight_denom;
294 int luma_weight[2][16];
295 int luma_offset[2][16];
296 int chroma_weight[2][16][2];
297 int chroma_offset[2][16][2];
298 int implicit_weight[16][16];
301 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
302 int slice_alpha_c0_offset;
303 int slice_beta_offset;
305 int redundant_pic_count;
307 int direct_spatial_mv_pred;
308 int dist_scale_factor[16];
309 int map_col_to_list0[2][16];
312 * num_ref_idx_l0/1_active_minus1 + 1
314 int ref_count[2];// FIXME split for AFF
315 Picture *short_ref[32];
316 Picture *long_ref[32];
317 Picture default_ref_list[2][32];
318 Picture ref_list[2][32]; //FIXME size?
319 Picture field_ref_list[2][32]; //FIXME size?
320 Picture *delayed_pic[16]; //FIXME size?
321 Picture *delayed_output_pic;
324 * memory management control operations buffer.
326 MMCO mmco[MAX_MMCO_COUNT];
329 int long_ref_count; ///< number of actual long term references
330 int short_ref_count; ///< number of actual short term references
333 GetBitContext intra_gb;
334 GetBitContext inter_gb;
335 GetBitContext *intra_gb_ptr;
336 GetBitContext *inter_gb_ptr;
338 DCTELEM mb[16*24] __align8;
344 uint8_t cabac_state[460];
347 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
351 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
352 uint8_t *chroma_pred_mode_table;
353 int last_qscale_diff;
354 int16_t (*mvd_table[2])[2];
355 int16_t mvd_cache[2][5*8][2] __align8;
356 uint8_t *direct_table;
357 uint8_t direct_cache[5*8];
359 uint8_t zigzag_scan[16];
360 uint8_t field_scan[16];
361 const uint8_t *zigzag_scan_q0;
362 const uint8_t *field_scan_q0;
367 static VLC coeff_token_vlc[4];
368 static VLC chroma_dc_coeff_token_vlc;
370 static VLC total_zeros_vlc[15];
371 static VLC chroma_dc_total_zeros_vlc[3];
373 static VLC run_vlc[6];
376 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
377 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
378 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
380 static always_inline uint32_t pack16to32(int a, int b){
381 #ifdef WORDS_BIGENDIAN
382 return (b&0xFFFF) + (a<<16);
384 return (a&0xFFFF) + (b<<16);
390 * @param h height of the rectangle, should be a constant
391 * @param w width of the rectangle, should be a constant
392 * @param size the size of val (1 or 4), should be a constant
394 static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
395 uint8_t *p= (uint8_t*)vp;
396 assert(size==1 || size==4);
401 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
402 assert((stride&(w-1))==0);
403 //FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it
406 *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
407 }else if(w==2 && h==4){
408 *(uint16_t*)(p + 0*stride)=
409 *(uint16_t*)(p + 1*stride)=
410 *(uint16_t*)(p + 2*stride)=
411 *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
412 }else if(w==4 && h==1){
413 *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
414 }else if(w==4 && h==2){
415 *(uint32_t*)(p + 0*stride)=
416 *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
417 }else if(w==4 && h==4){
418 *(uint32_t*)(p + 0*stride)=
419 *(uint32_t*)(p + 1*stride)=
420 *(uint32_t*)(p + 2*stride)=
421 *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
422 }else if(w==8 && h==1){
424 *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
425 }else if(w==8 && h==2){
426 *(uint32_t*)(p + 0 + 0*stride)=
427 *(uint32_t*)(p + 4 + 0*stride)=
428 *(uint32_t*)(p + 0 + 1*stride)=
429 *(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101;
430 }else if(w==8 && h==4){
431 *(uint64_t*)(p + 0*stride)=
432 *(uint64_t*)(p + 1*stride)=
433 *(uint64_t*)(p + 2*stride)=
434 *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
435 }else if(w==16 && h==2){
436 *(uint64_t*)(p + 0+0*stride)=
437 *(uint64_t*)(p + 8+0*stride)=
438 *(uint64_t*)(p + 0+1*stride)=
439 *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
440 }else if(w==16 && h==4){
441 *(uint64_t*)(p + 0+0*stride)=
442 *(uint64_t*)(p + 8+0*stride)=
443 *(uint64_t*)(p + 0+1*stride)=
444 *(uint64_t*)(p + 8+1*stride)=
445 *(uint64_t*)(p + 0+2*stride)=
446 *(uint64_t*)(p + 8+2*stride)=
447 *(uint64_t*)(p + 0+3*stride)=
448 *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
453 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
454 MpegEncContext * const s = &h->s;
455 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
456 int topleft_xy, top_xy, topright_xy, left_xy[2];
457 int topleft_type, top_type, topright_type, left_type[2];
461 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
462 // the actual condition is whether we're on the edge of a slice,
463 // and even then the intra and nnz parts are unnecessary.
464 if(for_deblock && h->slice_num == 1)
467 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
469 top_xy = mb_xy - s->mb_stride;
470 topleft_xy = top_xy - 1;
471 topright_xy= top_xy + 1;
472 left_xy[1] = left_xy[0] = mb_xy-1;
482 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
483 const int top_pair_xy = pair_xy - s->mb_stride;
484 const int topleft_pair_xy = top_pair_xy - 1;
485 const int topright_pair_xy = top_pair_xy + 1;
486 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
487 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
488 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
489 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
490 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
491 const int bottom = (s->mb_y & 1);
492 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
494 ? !curr_mb_frame_flag // bottom macroblock
495 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
497 top_xy -= s->mb_stride;
500 ? !curr_mb_frame_flag // bottom macroblock
501 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
503 topleft_xy -= s->mb_stride;
506 ? !curr_mb_frame_flag // bottom macroblock
507 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
509 topright_xy -= s->mb_stride;
511 if (left_mb_frame_flag != curr_mb_frame_flag) {
512 left_xy[1] = left_xy[0] = pair_xy - 1;
513 if (curr_mb_frame_flag) {
534 left_xy[1] += s->mb_stride;
547 h->top_mb_xy = top_xy;
548 h->left_mb_xy[0] = left_xy[0];
549 h->left_mb_xy[1] = left_xy[1];
551 topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
552 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
553 topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
554 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
555 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
557 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
558 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
559 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
560 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
561 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
564 if(IS_INTRA(mb_type)){
565 h->topleft_samples_available=
566 h->top_samples_available=
567 h->left_samples_available= 0xFFFF;
568 h->topright_samples_available= 0xEEEA;
570 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
571 h->topleft_samples_available= 0xB3FF;
572 h->top_samples_available= 0x33FF;
573 h->topright_samples_available= 0x26EA;
576 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
577 h->topleft_samples_available&= 0xDF5F;
578 h->left_samples_available&= 0x5F5F;
582 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
583 h->topleft_samples_available&= 0x7FFF;
585 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
586 h->topright_samples_available&= 0xFBFF;
588 if(IS_INTRA4x4(mb_type)){
589 if(IS_INTRA4x4(top_type)){
590 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
591 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
592 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
593 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
596 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
601 h->intra4x4_pred_mode_cache[4+8*0]=
602 h->intra4x4_pred_mode_cache[5+8*0]=
603 h->intra4x4_pred_mode_cache[6+8*0]=
604 h->intra4x4_pred_mode_cache[7+8*0]= pred;
607 if(IS_INTRA4x4(left_type[i])){
608 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
609 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
612 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
617 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
618 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
633 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
635 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
636 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
637 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
638 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
640 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
641 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
643 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
644 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
647 h->non_zero_count_cache[4+8*0]=
648 h->non_zero_count_cache[5+8*0]=
649 h->non_zero_count_cache[6+8*0]=
650 h->non_zero_count_cache[7+8*0]=
652 h->non_zero_count_cache[1+8*0]=
653 h->non_zero_count_cache[2+8*0]=
655 h->non_zero_count_cache[1+8*3]=
656 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
660 for (i=0; i<2; i++) {
662 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
663 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
664 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
665 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
667 h->non_zero_count_cache[3+8*1 + 2*8*i]=
668 h->non_zero_count_cache[3+8*2 + 2*8*i]=
669 h->non_zero_count_cache[0+8*1 + 8*i]=
670 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
677 h->top_cbp = h->cbp_table[top_xy];
678 } else if(IS_INTRA(mb_type)) {
685 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
686 } else if(IS_INTRA(mb_type)) {
692 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
695 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
700 //FIXME direct mb can skip much of this
701 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
703 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
704 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
705 /*if(!h->mv_cache_clean[list]){
706 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
707 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
708 h->mv_cache_clean[list]= 1;
712 h->mv_cache_clean[list]= 0;
714 if(IS_INTER(top_type)){
715 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
716 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
717 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
718 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
719 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
720 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
721 h->ref_cache[list][scan8[0] + 0 - 1*8]=
722 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
723 h->ref_cache[list][scan8[0] + 2 - 1*8]=
724 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
726 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
727 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
728 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
729 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
730 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
733 //FIXME unify cleanup or sth
734 if(IS_INTER(left_type[0])){
735 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
736 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
737 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
738 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
739 h->ref_cache[list][scan8[0] - 1 + 0*8]=
740 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
742 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
743 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
744 h->ref_cache[list][scan8[0] - 1 + 0*8]=
745 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
748 if(IS_INTER(left_type[1])){
749 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
750 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
751 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
752 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
753 h->ref_cache[list][scan8[0] - 1 + 2*8]=
754 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
756 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
757 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
758 h->ref_cache[list][scan8[0] - 1 + 2*8]=
759 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
760 assert((!left_type[0]) == (!left_type[1]));
763 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
766 if(IS_INTER(topleft_type)){
767 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
768 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
769 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
770 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
772 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
773 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
776 if(IS_INTER(topright_type)){
777 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
778 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
779 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
780 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
782 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
783 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
787 h->ref_cache[list][scan8[5 ]+1] =
788 h->ref_cache[list][scan8[7 ]+1] =
789 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
790 h->ref_cache[list][scan8[4 ]] =
791 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
792 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
793 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
794 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
795 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
796 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
799 /* XXX beurk, Load mvd */
800 if(IS_INTER(topleft_type)){
801 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
802 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy];
804 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0;
807 if(IS_INTER(top_type)){
808 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
809 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
810 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
811 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
812 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
814 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
815 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
816 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
817 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
819 if(IS_INTER(left_type[0])){
820 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
821 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
822 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
824 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
825 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
827 if(IS_INTER(left_type[1])){
828 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
829 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
830 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
832 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
833 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
835 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
836 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
837 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
838 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
839 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
841 if(h->slice_type == B_TYPE){
842 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
844 if(IS_DIRECT(top_type)){
845 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
846 }else if(IS_8X8(top_type)){
847 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
848 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
849 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
851 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
855 if(IS_DIRECT(left_type[0])){
856 h->direct_cache[scan8[0] - 1 + 0*8]=
857 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
858 }else if(IS_8X8(left_type[0])){
859 int b8_xy = h->mb2b8_xy[left_xy[0]] + 1;
860 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[b8_xy];
861 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[b8_xy + h->b8_stride];
863 h->direct_cache[scan8[0] - 1 + 0*8]=
864 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
872 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
875 static inline void write_back_intra_pred_mode(H264Context *h){
876 MpegEncContext * const s = &h->s;
877 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
879 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
880 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
881 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
882 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
883 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
884 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
885 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
889 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
891 static inline int check_intra4x4_pred_mode(H264Context *h){
892 MpegEncContext * const s = &h->s;
893 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
894 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
897 if(!(h->top_samples_available&0x8000)){
899 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
901 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
904 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
909 if(!(h->left_samples_available&0x8000)){
911 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
913 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
916 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
922 } //FIXME cleanup like next
925 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
927 static inline int check_intra_pred_mode(H264Context *h, int mode){
928 MpegEncContext * const s = &h->s;
929 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
930 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
932 if(mode < 0 || mode > 6) {
933 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
937 if(!(h->top_samples_available&0x8000)){
940 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
945 if(!(h->left_samples_available&0x8000)){
948 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
957 * gets the predicted intra4x4 prediction mode.
959 static inline int pred_intra_mode(H264Context *h, int n){
960 const int index8= scan8[n];
961 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
962 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
963 const int min= FFMIN(left, top);
965 tprintf("mode:%d %d min:%d\n", left ,top, min);
967 if(min<0) return DC_PRED;
971 static inline void write_back_non_zero_count(H264Context *h){
972 MpegEncContext * const s = &h->s;
973 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
975 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
976 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
977 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
978 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
979 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
980 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
981 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
983 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
984 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
985 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
987 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
988 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
989 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
993 * gets the predicted number of non zero coefficients.
994 * @param n block index
996 static inline int pred_non_zero_count(H264Context *h, int n){
997 const int index8= scan8[n];
998 const int left= h->non_zero_count_cache[index8 - 1];
999 const int top = h->non_zero_count_cache[index8 - 8];
1002 if(i<64) i= (i+1)>>1;
1004 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1009 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1010 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1012 if(topright_ref != PART_NOT_AVAILABLE){
1013 *C= h->mv_cache[list][ i - 8 + part_width ];
1014 return topright_ref;
1016 tprintf("topright MV not available\n");
1018 *C= h->mv_cache[list][ i - 8 - 1 ];
1019 return h->ref_cache[list][ i - 8 - 1 ];
1024 * gets the predicted MV.
1025 * @param n the block index
1026 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1027 * @param mx the x component of the predicted motion vector
1028 * @param my the y component of the predicted motion vector
1030 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1031 const int index8= scan8[n];
1032 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1033 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1034 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1035 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1037 int diagonal_ref, match_count;
1039 assert(part_width==1 || part_width==2 || part_width==4);
1049 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1050 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1051 tprintf("pred_motion match_count=%d\n", match_count);
1052 if(match_count > 1){ //most common
1053 *mx= mid_pred(A[0], B[0], C[0]);
1054 *my= mid_pred(A[1], B[1], C[1]);
1055 }else if(match_count==1){
1059 }else if(top_ref==ref){
1067 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1071 *mx= mid_pred(A[0], B[0], C[0]);
1072 *my= mid_pred(A[1], B[1], C[1]);
1076 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1080 * gets the directionally predicted 16x8 MV.
1081 * @param n the block index
1082 * @param mx the x component of the predicted motion vector
1083 * @param my the y component of the predicted motion vector
1085 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1087 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1088 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1090 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1098 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1099 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1101 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1103 if(left_ref == ref){
1111 pred_motion(h, n, 4, list, ref, mx, my);
1115 * gets the directionally predicted 8x16 MV.
1116 * @param n the block index
1117 * @param mx the x component of the predicted motion vector
1118 * @param my the y component of the predicted motion vector
1120 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1122 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1123 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1125 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1127 if(left_ref == ref){
1136 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1138 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1140 if(diagonal_ref == ref){
1148 pred_motion(h, n, 2, list, ref, mx, my);
1151 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1152 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1153 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1155 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1157 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1158 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1159 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1165 pred_motion(h, 0, 4, 0, 0, mx, my);
1170 static inline void direct_dist_scale_factor(H264Context * const h){
1171 const int poc = h->s.current_picture_ptr->poc;
1172 const int poc1 = h->ref_list[1][0].poc;
1174 for(i=0; i<h->ref_count[0]; i++){
1175 int poc0 = h->ref_list[0][i].poc;
1176 int td = clip(poc1 - poc0, -128, 127);
1177 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1178 h->dist_scale_factor[i] = 256;
1180 int tb = clip(poc - poc0, -128, 127);
1181 int tx = (16384 + (ABS(td) >> 1)) / td;
1182 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1186 static inline void direct_ref_list_init(H264Context * const h){
1187 MpegEncContext * const s = &h->s;
1188 Picture * const ref1 = &h->ref_list[1][0];
1189 Picture * const cur = s->current_picture_ptr;
1191 if(cur->pict_type == I_TYPE)
1192 cur->ref_count[0] = 0;
1193 if(cur->pict_type != B_TYPE)
1194 cur->ref_count[1] = 0;
1195 for(list=0; list<2; list++){
1196 cur->ref_count[list] = h->ref_count[list];
1197 for(j=0; j<h->ref_count[list]; j++)
1198 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1200 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1202 for(list=0; list<2; list++){
1203 for(i=0; i<ref1->ref_count[list]; i++){
1204 const int poc = ref1->ref_poc[list][i];
1205 h->map_col_to_list0[list][i] = PART_NOT_AVAILABLE;
1206 for(j=0; j<h->ref_count[list]; j++)
1207 if(h->ref_list[list][j].poc == poc){
1208 h->map_col_to_list0[list][i] = j;
1215 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1216 MpegEncContext * const s = &h->s;
1217 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1218 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1219 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1220 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1221 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1222 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1223 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1224 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1225 const int is_b8x8 = IS_8X8(*mb_type);
1229 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1230 /* FIXME save sub mb types from previous frames (or derive from MVs)
1231 * so we know exactly what block size to use */
1232 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1233 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1234 }else if(!is_b8x8 && (IS_16X16(mb_type_col) || IS_INTRA(mb_type_col))){
1235 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1236 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1238 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1239 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1242 *mb_type |= MB_TYPE_DIRECT2;
1244 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1246 if(h->direct_spatial_mv_pred){
1251 /* ref = min(neighbors) */
1252 for(list=0; list<2; list++){
1253 int refa = h->ref_cache[list][scan8[0] - 1];
1254 int refb = h->ref_cache[list][scan8[0] - 8];
1255 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1257 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1259 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1261 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1267 if(ref[0] < 0 && ref[1] < 0){
1268 ref[0] = ref[1] = 0;
1269 mv[0][0] = mv[0][1] =
1270 mv[1][0] = mv[1][1] = 0;
1272 for(list=0; list<2; list++){
1274 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1276 mv[list][0] = mv[list][1] = 0;
1281 *mb_type &= ~MB_TYPE_P0L1;
1282 sub_mb_type &= ~MB_TYPE_P0L1;
1283 }else if(ref[0] < 0){
1284 *mb_type &= ~MB_TYPE_P0L0;
1285 sub_mb_type &= ~MB_TYPE_P0L0;
1288 if(IS_16X16(*mb_type)){
1289 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1290 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1291 if(!IS_INTRA(mb_type_col)
1292 && ( (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1)
1293 || (l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1
1294 && (h->x264_build>33 || !h->x264_build)))){
1296 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1298 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1300 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1302 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1304 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1305 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1308 for(i8=0; i8<4; i8++){
1309 const int x8 = i8&1;
1310 const int y8 = i8>>1;
1312 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1314 h->sub_mb_type[i8] = sub_mb_type;
1316 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1317 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1318 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1319 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1322 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1323 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1324 && (h->x264_build>33 || !h->x264_build)))){
1325 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1326 if(IS_SUB_8X8(sub_mb_type)){
1327 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1328 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1330 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1332 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1335 for(i4=0; i4<4; i4++){
1336 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1337 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1339 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1341 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1347 }else{ /* direct temporal mv pred */
1348 if(IS_16X16(*mb_type)){
1349 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1350 if(IS_INTRA(mb_type_col)){
1351 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1352 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1353 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1355 const int ref0 = l1ref0[0] >= 0 ? h->map_col_to_list0[0][l1ref0[0]]
1356 : h->map_col_to_list0[1][l1ref1[0]];
1357 const int dist_scale_factor = h->dist_scale_factor[ref0];
1358 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1360 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1361 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1362 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1363 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1364 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1367 for(i8=0; i8<4; i8++){
1368 const int x8 = i8&1;
1369 const int y8 = i8>>1;
1370 int ref0, dist_scale_factor;
1371 const int16_t (*l1mv)[2]= l1mv0;
1373 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1375 h->sub_mb_type[i8] = sub_mb_type;
1376 if(IS_INTRA(mb_type_col)){
1377 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1378 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1379 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1380 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1384 ref0 = l1ref0[x8 + y8*h->b8_stride];
1386 ref0 = h->map_col_to_list0[0][ref0];
1388 ref0 = h->map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1391 dist_scale_factor = h->dist_scale_factor[ref0];
1393 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1394 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1395 if(IS_SUB_8X8(sub_mb_type)){
1396 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1397 int mx = (dist_scale_factor * mv_col[0] + 128) >> 8;
1398 int my = (dist_scale_factor * mv_col[1] + 128) >> 8;
1399 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1400 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1402 for(i4=0; i4<4; i4++){
1403 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1404 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1405 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1406 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1407 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1408 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1415 static inline void write_back_motion(H264Context *h, int mb_type){
1416 MpegEncContext * const s = &h->s;
1417 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1418 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1421 for(list=0; list<2; list++){
1423 if(!USES_LIST(mb_type, list)){
1424 if(1){ //FIXME skip or never read if mb_type doesn't use it
1426 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
1427 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
1429 if( h->pps.cabac ) {
1430 /* FIXME needed ? */
1432 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]=
1433 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= 0;
1437 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]=
1438 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= LIST_NOT_USED;
1445 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1446 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1448 if( h->pps.cabac ) {
1450 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1451 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1455 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
1456 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
1460 if(h->slice_type == B_TYPE && h->pps.cabac){
1461 if(IS_8X8(mb_type)){
1462 h->direct_table[b8_xy+1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1463 h->direct_table[b8_xy+0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1464 h->direct_table[b8_xy+1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1470 * Decodes a network abstraction layer unit.
1471 * @param consumed is the number of bytes used as input
1472 * @param length is the length of the array
1473 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1474 * @returns decoded bytes, might be src+1 if no escapes
1476 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1480 // src[0]&0x80; //forbidden bit
1481 h->nal_ref_idc= src[0]>>5;
1482 h->nal_unit_type= src[0]&0x1F;
1486 for(i=0; i<length; i++)
1487 printf("%2X ", src[i]);
1489 for(i=0; i+1<length; i+=2){
1490 if(src[i]) continue;
1491 if(i>0 && src[i-1]==0) i--;
1492 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1494 /* startcode, so we must be past the end */
1501 if(i>=length-1){ //no escaped 0
1502 *dst_length= length;
1503 *consumed= length+1; //+1 for the header
1507 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1508 dst= h->rbsp_buffer;
1510 //printf("decoding esc\n");
1513 //remove escapes (very rare 1:2^22)
1514 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1515 if(src[si+2]==3){ //escape
1520 }else //next start code
1524 dst[di++]= src[si++];
1528 *consumed= si + 1;//+1 for the header
1529 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1535 * @param src the data which should be escaped
1536 * @param dst the target buffer, dst+1 == src is allowed as a special case
1537 * @param length the length of the src data
1538 * @param dst_length the length of the dst array
1539 * @returns length of escaped data in bytes or -1 if an error occured
1541 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1542 int i, escape_count, si, di;
1546 assert(dst_length>0);
1548 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1550 if(length==0) return 1;
1553 for(i=0; i<length; i+=2){
1554 if(src[i]) continue;
1555 if(i>0 && src[i-1]==0)
1557 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1563 if(escape_count==0){
1565 memcpy(dst+1, src, length);
1569 if(length + escape_count + 1> dst_length)
1572 //this should be damn rare (hopefully)
1574 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1575 temp= h->rbsp_buffer;
1576 //printf("encoding esc\n");
1581 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1582 temp[di++]= 0; si++;
1583 temp[di++]= 0; si++;
1585 temp[di++]= src[si++];
1588 temp[di++]= src[si++];
1590 memcpy(dst+1, temp, length+escape_count);
1592 assert(di == length+escape_count);
1598 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1600 static void encode_rbsp_trailing(PutBitContext *pb){
1603 length= (-put_bits_count(pb))&7;
1604 if(length) put_bits(pb, length, 0);
1609 * identifies the exact end of the bitstream
1610 * @return the length of the trailing, or 0 if damaged
1612 static int decode_rbsp_trailing(uint8_t *src){
1616 tprintf("rbsp trailing %X\n", v);
1626 * idct tranforms the 16 dc values and dequantize them.
1627 * @param qp quantization parameter
1629 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1632 int temp[16]; //FIXME check if this is a good idea
1633 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1634 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1636 //memset(block, 64, 2*256);
1639 const int offset= y_offset[i];
1640 const int z0= block[offset+stride*0] + block[offset+stride*4];
1641 const int z1= block[offset+stride*0] - block[offset+stride*4];
1642 const int z2= block[offset+stride*1] - block[offset+stride*5];
1643 const int z3= block[offset+stride*1] + block[offset+stride*5];
1652 const int offset= x_offset[i];
1653 const int z0= temp[4*0+i] + temp[4*2+i];
1654 const int z1= temp[4*0+i] - temp[4*2+i];
1655 const int z2= temp[4*1+i] - temp[4*3+i];
1656 const int z3= temp[4*1+i] + temp[4*3+i];
1658 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1659 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1660 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1661 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1667 * dct tranforms the 16 dc values.
1668 * @param qp quantization parameter ??? FIXME
1670 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1671 // const int qmul= dequant_coeff[qp][0];
1673 int temp[16]; //FIXME check if this is a good idea
1674 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1675 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1678 const int offset= y_offset[i];
1679 const int z0= block[offset+stride*0] + block[offset+stride*4];
1680 const int z1= block[offset+stride*0] - block[offset+stride*4];
1681 const int z2= block[offset+stride*1] - block[offset+stride*5];
1682 const int z3= block[offset+stride*1] + block[offset+stride*5];
1691 const int offset= x_offset[i];
1692 const int z0= temp[4*0+i] + temp[4*2+i];
1693 const int z1= temp[4*0+i] - temp[4*2+i];
1694 const int z2= temp[4*1+i] - temp[4*3+i];
1695 const int z3= temp[4*1+i] + temp[4*3+i];
1697 block[stride*0 +offset]= (z0 + z3)>>1;
1698 block[stride*2 +offset]= (z1 + z2)>>1;
1699 block[stride*8 +offset]= (z1 - z2)>>1;
1700 block[stride*10+offset]= (z0 - z3)>>1;
1708 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1709 const int stride= 16*2;
1710 const int xStride= 16;
1713 a= block[stride*0 + xStride*0];
1714 b= block[stride*0 + xStride*1];
1715 c= block[stride*1 + xStride*0];
1716 d= block[stride*1 + xStride*1];
1723 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1724 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1725 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1726 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1730 static void chroma_dc_dct_c(DCTELEM *block){
1731 const int stride= 16*2;
1732 const int xStride= 16;
1735 a= block[stride*0 + xStride*0];
1736 b= block[stride*0 + xStride*1];
1737 c= block[stride*1 + xStride*0];
1738 d= block[stride*1 + xStride*1];
1745 block[stride*0 + xStride*0]= (a+c);
1746 block[stride*0 + xStride*1]= (e+b);
1747 block[stride*1 + xStride*0]= (a-c);
1748 block[stride*1 + xStride*1]= (e-b);
1753 * gets the chroma qp.
1755 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1757 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
1762 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1764 //FIXME try int temp instead of block
1767 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1768 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1769 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1770 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1771 const int z0= d0 + d3;
1772 const int z3= d0 - d3;
1773 const int z1= d1 + d2;
1774 const int z2= d1 - d2;
1776 block[0 + 4*i]= z0 + z1;
1777 block[1 + 4*i]= 2*z3 + z2;
1778 block[2 + 4*i]= z0 - z1;
1779 block[3 + 4*i]= z3 - 2*z2;
1783 const int z0= block[0*4 + i] + block[3*4 + i];
1784 const int z3= block[0*4 + i] - block[3*4 + i];
1785 const int z1= block[1*4 + i] + block[2*4 + i];
1786 const int z2= block[1*4 + i] - block[2*4 + i];
1788 block[0*4 + i]= z0 + z1;
1789 block[1*4 + i]= 2*z3 + z2;
1790 block[2*4 + i]= z0 - z1;
1791 block[3*4 + i]= z3 - 2*z2;
1796 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1797 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1798 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1800 const int * const quant_table= quant_coeff[qscale];
1801 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1802 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1803 const unsigned int threshold2= (threshold1<<1);
1809 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1810 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1811 const unsigned int dc_threshold2= (dc_threshold1<<1);
1813 int level= block[0]*quant_coeff[qscale+18][0];
1814 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1816 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1819 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1822 // last_non_zero = i;
1827 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1828 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1829 const unsigned int dc_threshold2= (dc_threshold1<<1);
1831 int level= block[0]*quant_table[0];
1832 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1834 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1837 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1840 // last_non_zero = i;
1853 const int j= scantable[i];
1854 int level= block[j]*quant_table[j];
1856 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1857 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1858 if(((unsigned)(level+threshold1))>threshold2){
1860 level= (bias + level)>>QUANT_SHIFT;
1863 level= (bias - level)>>QUANT_SHIFT;
1872 return last_non_zero;
1875 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1876 const uint32_t a= ((uint32_t*)(src-stride))[0];
1877 ((uint32_t*)(src+0*stride))[0]= a;
1878 ((uint32_t*)(src+1*stride))[0]= a;
1879 ((uint32_t*)(src+2*stride))[0]= a;
1880 ((uint32_t*)(src+3*stride))[0]= a;
1883 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1884 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1885 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1886 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1887 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1890 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1891 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1892 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1894 ((uint32_t*)(src+0*stride))[0]=
1895 ((uint32_t*)(src+1*stride))[0]=
1896 ((uint32_t*)(src+2*stride))[0]=
1897 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1900 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1901 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1903 ((uint32_t*)(src+0*stride))[0]=
1904 ((uint32_t*)(src+1*stride))[0]=
1905 ((uint32_t*)(src+2*stride))[0]=
1906 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1909 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1910 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1912 ((uint32_t*)(src+0*stride))[0]=
1913 ((uint32_t*)(src+1*stride))[0]=
1914 ((uint32_t*)(src+2*stride))[0]=
1915 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1918 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1919 ((uint32_t*)(src+0*stride))[0]=
1920 ((uint32_t*)(src+1*stride))[0]=
1921 ((uint32_t*)(src+2*stride))[0]=
1922 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1926 #define LOAD_TOP_RIGHT_EDGE\
1927 const int t4= topright[0];\
1928 const int t5= topright[1];\
1929 const int t6= topright[2];\
1930 const int t7= topright[3];\
1932 #define LOAD_LEFT_EDGE\
1933 const int l0= src[-1+0*stride];\
1934 const int l1= src[-1+1*stride];\
1935 const int l2= src[-1+2*stride];\
1936 const int l3= src[-1+3*stride];\
1938 #define LOAD_TOP_EDGE\
1939 const int t0= src[ 0-1*stride];\
1940 const int t1= src[ 1-1*stride];\
1941 const int t2= src[ 2-1*stride];\
1942 const int t3= src[ 3-1*stride];\
1944 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1945 const int lt= src[-1-1*stride];
1949 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
1951 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
1954 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
1958 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1961 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1963 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1964 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1967 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1972 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1974 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1977 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1981 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1984 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1986 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1987 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1990 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1991 const int lt= src[-1-1*stride];
1994 const __attribute__((unused)) int unu= l3;
1997 src[1+2*stride]=(lt + t0 + 1)>>1;
1999 src[2+2*stride]=(t0 + t1 + 1)>>1;
2001 src[3+2*stride]=(t1 + t2 + 1)>>1;
2002 src[3+0*stride]=(t2 + t3 + 1)>>1;
2004 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2006 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2008 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2009 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2010 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2011 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2014 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2017 const __attribute__((unused)) int unu= t7;
2019 src[0+0*stride]=(t0 + t1 + 1)>>1;
2021 src[0+2*stride]=(t1 + t2 + 1)>>1;
2023 src[1+2*stride]=(t2 + t3 + 1)>>1;
2025 src[2+2*stride]=(t3 + t4+ 1)>>1;
2026 src[3+2*stride]=(t4 + t5+ 1)>>1;
2027 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2029 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2031 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2033 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2034 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2037 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2040 src[0+0*stride]=(l0 + l1 + 1)>>1;
2041 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2043 src[0+1*stride]=(l1 + l2 + 1)>>1;
2045 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2047 src[0+2*stride]=(l2 + l3 + 1)>>1;
2049 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2058 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2059 const int lt= src[-1-1*stride];
2062 const __attribute__((unused)) int unu= t3;
2065 src[2+1*stride]=(lt + l0 + 1)>>1;
2067 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2068 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2069 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2071 src[2+2*stride]=(l0 + l1 + 1)>>1;
2073 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2075 src[2+3*stride]=(l1 + l2+ 1)>>1;
2077 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2078 src[0+3*stride]=(l2 + l3 + 1)>>1;
2079 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2082 static void pred16x16_vertical_c(uint8_t *src, int stride){
2084 const uint32_t a= ((uint32_t*)(src-stride))[0];
2085 const uint32_t b= ((uint32_t*)(src-stride))[1];
2086 const uint32_t c= ((uint32_t*)(src-stride))[2];
2087 const uint32_t d= ((uint32_t*)(src-stride))[3];
2089 for(i=0; i<16; i++){
2090 ((uint32_t*)(src+i*stride))[0]= a;
2091 ((uint32_t*)(src+i*stride))[1]= b;
2092 ((uint32_t*)(src+i*stride))[2]= c;
2093 ((uint32_t*)(src+i*stride))[3]= d;
2097 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2100 for(i=0; i<16; i++){
2101 ((uint32_t*)(src+i*stride))[0]=
2102 ((uint32_t*)(src+i*stride))[1]=
2103 ((uint32_t*)(src+i*stride))[2]=
2104 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2108 static void pred16x16_dc_c(uint8_t *src, int stride){
2112 dc+= src[-1+i*stride];
2119 dc= 0x01010101*((dc + 16)>>5);
2121 for(i=0; i<16; i++){
2122 ((uint32_t*)(src+i*stride))[0]=
2123 ((uint32_t*)(src+i*stride))[1]=
2124 ((uint32_t*)(src+i*stride))[2]=
2125 ((uint32_t*)(src+i*stride))[3]= dc;
2129 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2133 dc+= src[-1+i*stride];
2136 dc= 0x01010101*((dc + 8)>>4);
2138 for(i=0; i<16; i++){
2139 ((uint32_t*)(src+i*stride))[0]=
2140 ((uint32_t*)(src+i*stride))[1]=
2141 ((uint32_t*)(src+i*stride))[2]=
2142 ((uint32_t*)(src+i*stride))[3]= dc;
2146 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2152 dc= 0x01010101*((dc + 8)>>4);
2154 for(i=0; i<16; i++){
2155 ((uint32_t*)(src+i*stride))[0]=
2156 ((uint32_t*)(src+i*stride))[1]=
2157 ((uint32_t*)(src+i*stride))[2]=
2158 ((uint32_t*)(src+i*stride))[3]= dc;
2162 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2165 for(i=0; i<16; i++){
2166 ((uint32_t*)(src+i*stride))[0]=
2167 ((uint32_t*)(src+i*stride))[1]=
2168 ((uint32_t*)(src+i*stride))[2]=
2169 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2173 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2176 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2177 const uint8_t * const src0 = src+7-stride;
2178 const uint8_t *src1 = src+8*stride-1;
2179 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2180 int H = src0[1] - src0[-1];
2181 int V = src1[0] - src2[ 0];
2182 for(k=2; k<=8; ++k) {
2183 src1 += stride; src2 -= stride;
2184 H += k*(src0[k] - src0[-k]);
2185 V += k*(src1[0] - src2[ 0]);
2188 H = ( 5*(H/4) ) / 16;
2189 V = ( 5*(V/4) ) / 16;
2191 /* required for 100% accuracy */
2192 i = H; H = V; V = i;
2194 H = ( 5*H+32 ) >> 6;
2195 V = ( 5*V+32 ) >> 6;
2198 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2199 for(j=16; j>0; --j) {
2202 for(i=-16; i<0; i+=4) {
2203 src[16+i] = cm[ (b ) >> 5 ];
2204 src[17+i] = cm[ (b+ H) >> 5 ];
2205 src[18+i] = cm[ (b+2*H) >> 5 ];
2206 src[19+i] = cm[ (b+3*H) >> 5 ];
2213 static void pred16x16_plane_c(uint8_t *src, int stride){
2214 pred16x16_plane_compat_c(src, stride, 0);
2217 static void pred8x8_vertical_c(uint8_t *src, int stride){
2219 const uint32_t a= ((uint32_t*)(src-stride))[0];
2220 const uint32_t b= ((uint32_t*)(src-stride))[1];
2223 ((uint32_t*)(src+i*stride))[0]= a;
2224 ((uint32_t*)(src+i*stride))[1]= b;
2228 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2232 ((uint32_t*)(src+i*stride))[0]=
2233 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2237 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2241 ((uint32_t*)(src+i*stride))[0]=
2242 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2246 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2252 dc0+= src[-1+i*stride];
2253 dc2+= src[-1+(i+4)*stride];
2255 dc0= 0x01010101*((dc0 + 2)>>2);
2256 dc2= 0x01010101*((dc2 + 2)>>2);
2259 ((uint32_t*)(src+i*stride))[0]=
2260 ((uint32_t*)(src+i*stride))[1]= dc0;
2263 ((uint32_t*)(src+i*stride))[0]=
2264 ((uint32_t*)(src+i*stride))[1]= dc2;
2268 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2274 dc0+= src[i-stride];
2275 dc1+= src[4+i-stride];
2277 dc0= 0x01010101*((dc0 + 2)>>2);
2278 dc1= 0x01010101*((dc1 + 2)>>2);
2281 ((uint32_t*)(src+i*stride))[0]= dc0;
2282 ((uint32_t*)(src+i*stride))[1]= dc1;
2285 ((uint32_t*)(src+i*stride))[0]= dc0;
2286 ((uint32_t*)(src+i*stride))[1]= dc1;
2291 static void pred8x8_dc_c(uint8_t *src, int stride){
2293 int dc0, dc1, dc2, dc3;
2297 dc0+= src[-1+i*stride] + src[i-stride];
2298 dc1+= src[4+i-stride];
2299 dc2+= src[-1+(i+4)*stride];
2301 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2302 dc0= 0x01010101*((dc0 + 4)>>3);
2303 dc1= 0x01010101*((dc1 + 2)>>2);
2304 dc2= 0x01010101*((dc2 + 2)>>2);
2307 ((uint32_t*)(src+i*stride))[0]= dc0;
2308 ((uint32_t*)(src+i*stride))[1]= dc1;
2311 ((uint32_t*)(src+i*stride))[0]= dc2;
2312 ((uint32_t*)(src+i*stride))[1]= dc3;
2316 static void pred8x8_plane_c(uint8_t *src, int stride){
2319 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2320 const uint8_t * const src0 = src+3-stride;
2321 const uint8_t *src1 = src+4*stride-1;
2322 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2323 int H = src0[1] - src0[-1];
2324 int V = src1[0] - src2[ 0];
2325 for(k=2; k<=4; ++k) {
2326 src1 += stride; src2 -= stride;
2327 H += k*(src0[k] - src0[-k]);
2328 V += k*(src1[0] - src2[ 0]);
2330 H = ( 17*H+16 ) >> 5;
2331 V = ( 17*V+16 ) >> 5;
2333 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2334 for(j=8; j>0; --j) {
2337 src[0] = cm[ (b ) >> 5 ];
2338 src[1] = cm[ (b+ H) >> 5 ];
2339 src[2] = cm[ (b+2*H) >> 5 ];
2340 src[3] = cm[ (b+3*H) >> 5 ];
2341 src[4] = cm[ (b+4*H) >> 5 ];
2342 src[5] = cm[ (b+5*H) >> 5 ];
2343 src[6] = cm[ (b+6*H) >> 5 ];
2344 src[7] = cm[ (b+7*H) >> 5 ];
2349 #define SRC(x,y) src[(x)+(y)*stride]
2351 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2352 #define PREDICT_8x8_LOAD_LEFT \
2353 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2354 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2355 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2356 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2359 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2360 #define PREDICT_8x8_LOAD_TOP \
2361 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2362 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2363 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2364 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2365 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2368 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2369 #define PREDICT_8x8_LOAD_TOPRIGHT \
2370 int t8, t9, t10, t11, t12, t13, t14, t15; \
2371 if(has_topright) { \
2372 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2373 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2374 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2376 #define PREDICT_8x8_LOAD_TOPLEFT \
2377 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2379 #define PREDICT_8x8_DC(v) \
2381 for( y = 0; y < 8; y++ ) { \
2382 ((uint32_t*)src)[0] = \
2383 ((uint32_t*)src)[1] = v; \
2387 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2389 PREDICT_8x8_DC(0x80808080);
2391 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2393 PREDICT_8x8_LOAD_LEFT;
2394 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2397 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2399 PREDICT_8x8_LOAD_TOP;
2400 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2403 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2405 PREDICT_8x8_LOAD_LEFT;
2406 PREDICT_8x8_LOAD_TOP;
2407 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2408 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2411 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2413 PREDICT_8x8_LOAD_LEFT;
2414 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2415 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2416 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2419 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2422 PREDICT_8x8_LOAD_TOP;
2431 for( y = 1; y < 8; y++ )
2432 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2434 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2436 PREDICT_8x8_LOAD_TOP;
2437 PREDICT_8x8_LOAD_TOPRIGHT;
2438 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2439 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2440 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2441 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2442 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2443 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2444 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2445 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2446 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2447 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2448 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2449 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2450 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2451 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2452 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2454 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2456 PREDICT_8x8_LOAD_TOP;
2457 PREDICT_8x8_LOAD_LEFT;
2458 PREDICT_8x8_LOAD_TOPLEFT;
2459 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2460 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2461 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2462 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2463 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2464 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2465 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2466 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2467 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2468 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2469 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2470 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2471 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2472 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2473 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2476 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2478 PREDICT_8x8_LOAD_TOP;
2479 PREDICT_8x8_LOAD_LEFT;
2480 PREDICT_8x8_LOAD_TOPLEFT;
2481 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2482 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2483 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2484 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2485 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2486 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2487 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2488 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2489 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2490 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2491 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2492 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2493 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2494 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2495 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2496 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2497 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2498 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2499 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2500 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2501 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2502 SRC(7,0)= (t6 + t7 + 1) >> 1;
2504 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2506 PREDICT_8x8_LOAD_TOP;
2507 PREDICT_8x8_LOAD_LEFT;
2508 PREDICT_8x8_LOAD_TOPLEFT;
2509 SRC(0,7)= (l6 + l7 + 1) >> 1;
2510 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2511 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2512 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2513 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2514 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2515 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2516 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2517 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2518 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2519 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2520 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2521 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2522 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2523 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2524 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2525 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2526 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2527 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2528 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2529 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2530 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2532 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2534 PREDICT_8x8_LOAD_TOP;
2535 PREDICT_8x8_LOAD_TOPRIGHT;
2536 SRC(0,0)= (t0 + t1 + 1) >> 1;
2537 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2538 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2539 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2540 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2541 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2542 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2543 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2544 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2545 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2546 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2547 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2548 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2549 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2550 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2551 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2552 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2553 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2554 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2555 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2556 SRC(7,6)= (t10 + t11 + 1) >> 1;
2557 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2559 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2561 PREDICT_8x8_LOAD_LEFT;
2562 SRC(0,0)= (l0 + l1 + 1) >> 1;
2563 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2564 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2565 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2566 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2567 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2568 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2569 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2570 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2571 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2572 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2573 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2574 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2575 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2576 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2577 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2578 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2579 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2581 #undef PREDICT_8x8_LOAD_LEFT
2582 #undef PREDICT_8x8_LOAD_TOP
2583 #undef PREDICT_8x8_LOAD_TOPLEFT
2584 #undef PREDICT_8x8_LOAD_TOPRIGHT
2585 #undef PREDICT_8x8_DC
2591 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2592 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2593 int src_x_offset, int src_y_offset,
2594 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2595 MpegEncContext * const s = &h->s;
2596 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2597 const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2598 const int luma_xy= (mx&3) + ((my&3)<<2);
2599 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
2600 uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
2601 uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
2602 int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
2603 int extra_height= extra_width;
2605 const int full_mx= mx>>2;
2606 const int full_my= my>>2;
2607 const int pic_width = 16*s->mb_width;
2608 const int pic_height = 16*s->mb_height;
2610 assert(pic->data[0]);
2612 if(mx&7) extra_width -= 3;
2613 if(my&7) extra_height -= 3;
2615 if( full_mx < 0-extra_width
2616 || full_my < 0-extra_height
2617 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2618 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2619 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2620 src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
2624 qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
2626 qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
2629 if(s->flags&CODEC_FLAG_GRAY) return;
2632 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2633 src_cb= s->edge_emu_buffer;
2635 chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
2638 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2639 src_cr= s->edge_emu_buffer;
2641 chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
2644 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2645 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2646 int x_offset, int y_offset,
2647 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2648 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2649 int list0, int list1){
2650 MpegEncContext * const s = &h->s;
2651 qpel_mc_func *qpix_op= qpix_put;
2652 h264_chroma_mc_func chroma_op= chroma_put;
2654 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2655 dest_cb += x_offset + y_offset*s->uvlinesize;
2656 dest_cr += x_offset + y_offset*s->uvlinesize;
2657 x_offset += 8*s->mb_x;
2658 y_offset += 8*s->mb_y;
2661 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2662 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2663 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2664 qpix_op, chroma_op);
2667 chroma_op= chroma_avg;
2671 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2672 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2673 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2674 qpix_op, chroma_op);
2678 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2679 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2680 int x_offset, int y_offset,
2681 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2682 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2683 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2684 int list0, int list1){
2685 MpegEncContext * const s = &h->s;
2687 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2688 dest_cb += x_offset + y_offset*s->uvlinesize;
2689 dest_cr += x_offset + y_offset*s->uvlinesize;
2690 x_offset += 8*s->mb_x;
2691 y_offset += 8*s->mb_y;
2694 /* don't optimize for luma-only case, since B-frames usually
2695 * use implicit weights => chroma too. */
2696 uint8_t *tmp_cb = s->obmc_scratchpad;
2697 uint8_t *tmp_cr = tmp_cb + 8*s->uvlinesize;
2698 uint8_t *tmp_y = tmp_cr + 8*s->uvlinesize;
2699 int refn0 = h->ref_cache[0][ scan8[n] ];
2700 int refn1 = h->ref_cache[1][ scan8[n] ];
2702 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2703 dest_y, dest_cb, dest_cr,
2704 x_offset, y_offset, qpix_put, chroma_put);
2705 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2706 tmp_y, tmp_cb, tmp_cr,
2707 x_offset, y_offset, qpix_put, chroma_put);
2709 if(h->use_weight == 2){
2710 int weight0 = h->implicit_weight[refn0][refn1];
2711 int weight1 = 64 - weight0;
2712 luma_weight_avg( dest_y, tmp_y, s-> linesize, 5, weight0, weight1, 0);
2713 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0);
2714 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0);
2716 luma_weight_avg(dest_y, tmp_y, s->linesize, h->luma_log2_weight_denom,
2717 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2718 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
2719 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2720 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2721 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
2722 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2723 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2724 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
2727 int list = list1 ? 1 : 0;
2728 int refn = h->ref_cache[list][ scan8[n] ];
2729 Picture *ref= &h->ref_list[list][refn];
2730 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2731 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2732 qpix_put, chroma_put);
2734 luma_weight_op(dest_y, s->linesize, h->luma_log2_weight_denom,
2735 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2736 if(h->use_weight_chroma){
2737 chroma_weight_op(dest_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2738 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2739 chroma_weight_op(dest_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2740 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2745 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2746 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2747 int x_offset, int y_offset,
2748 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2749 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2750 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2751 int list0, int list1){
2752 if((h->use_weight==2 && list0 && list1
2753 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2754 || h->use_weight==1)
2755 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2756 x_offset, y_offset, qpix_put, chroma_put,
2757 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2759 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2760 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2763 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2764 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2765 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2766 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2767 MpegEncContext * const s = &h->s;
2768 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2769 const int mb_type= s->current_picture.mb_type[mb_xy];
2771 assert(IS_INTER(mb_type));
2773 if(IS_16X16(mb_type)){
2774 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2775 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2776 &weight_op[0], &weight_avg[0],
2777 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2778 }else if(IS_16X8(mb_type)){
2779 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2780 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2781 &weight_op[1], &weight_avg[1],
2782 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2783 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2784 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2785 &weight_op[1], &weight_avg[1],
2786 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2787 }else if(IS_8X16(mb_type)){
2788 mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
2789 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2790 &weight_op[2], &weight_avg[2],
2791 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2792 mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
2793 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2794 &weight_op[2], &weight_avg[2],
2795 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2799 assert(IS_8X8(mb_type));
2802 const int sub_mb_type= h->sub_mb_type[i];
2804 int x_offset= (i&1)<<2;
2805 int y_offset= (i&2)<<1;
2807 if(IS_SUB_8X8(sub_mb_type)){
2808 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2809 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2810 &weight_op[3], &weight_avg[3],
2811 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2812 }else if(IS_SUB_8X4(sub_mb_type)){
2813 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2814 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2815 &weight_op[4], &weight_avg[4],
2816 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2817 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2818 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2819 &weight_op[4], &weight_avg[4],
2820 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2821 }else if(IS_SUB_4X8(sub_mb_type)){
2822 mc_part(h, n , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2823 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2824 &weight_op[5], &weight_avg[5],
2825 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2826 mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2827 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2828 &weight_op[5], &weight_avg[5],
2829 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2832 assert(IS_SUB_4X4(sub_mb_type));
2834 int sub_x_offset= x_offset + 2*(j&1);
2835 int sub_y_offset= y_offset + (j&2);
2836 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2837 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2838 &weight_op[6], &weight_avg[6],
2839 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2846 static void decode_init_vlc(H264Context *h){
2847 static int done = 0;
2853 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
2854 &chroma_dc_coeff_token_len [0], 1, 1,
2855 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
2858 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
2859 &coeff_token_len [i][0], 1, 1,
2860 &coeff_token_bits[i][0], 1, 1, 1);
2864 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2865 &chroma_dc_total_zeros_len [i][0], 1, 1,
2866 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
2868 for(i=0; i<15; i++){
2869 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
2870 &total_zeros_len [i][0], 1, 1,
2871 &total_zeros_bits[i][0], 1, 1, 1);
2875 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2876 &run_len [i][0], 1, 1,
2877 &run_bits[i][0], 1, 1, 1);
2879 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2880 &run_len [6][0], 1, 1,
2881 &run_bits[6][0], 1, 1, 1);
2886 * Sets the intra prediction function pointers.
2888 static void init_pred_ptrs(H264Context *h){
2889 // MpegEncContext * const s = &h->s;
2891 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
2892 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
2893 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
2894 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2895 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2896 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
2897 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
2898 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
2899 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
2900 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
2901 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
2902 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
2904 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
2905 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
2906 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
2907 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
2908 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
2909 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
2910 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
2911 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
2912 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
2913 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
2914 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
2915 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
2917 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
2918 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
2919 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
2920 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
2921 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
2922 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
2923 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
2925 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
2926 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
2927 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
2928 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
2929 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
2930 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
2931 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
2934 static void free_tables(H264Context *h){
2935 av_freep(&h->intra4x4_pred_mode);
2936 av_freep(&h->chroma_pred_mode_table);
2937 av_freep(&h->cbp_table);
2938 av_freep(&h->mvd_table[0]);
2939 av_freep(&h->mvd_table[1]);
2940 av_freep(&h->direct_table);
2941 av_freep(&h->non_zero_count);
2942 av_freep(&h->slice_table_base);
2943 av_freep(&h->top_borders[1]);
2944 av_freep(&h->top_borders[0]);
2945 h->slice_table= NULL;
2947 av_freep(&h->mb2b_xy);
2948 av_freep(&h->mb2b8_xy);
2950 av_freep(&h->s.obmc_scratchpad);
2953 static void init_dequant8_coeff_table(H264Context *h){
2955 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2956 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2958 for(i=0; i<2; i++ ){
2959 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2960 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2964 for(q=0; q<52; q++){
2965 int shift = div6[q];
2968 h->dequant8_coeff[i][q][x] = ((uint32_t)dequant8_coeff_init[idx][
2969 dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * h->pps.scaling_matrix8[i][x]) << shift;
2974 static void init_dequant4_coeff_table(H264Context *h){
2976 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2977 for(i=0; i<6; i++ ){
2978 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2980 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2981 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2988 for(q=0; q<52; q++){
2989 int shift = div6[q] + 2;
2992 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2993 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2994 h->pps.scaling_matrix4[i][x]) << shift;
2999 static void init_dequant_tables(H264Context *h){
3001 init_dequant4_coeff_table(h);
3002 if(h->pps.transform_8x8_mode)
3003 init_dequant8_coeff_table(h);
3004 if(h->sps.transform_bypass){
3007 h->dequant4_coeff[i][0][x] = 1<<6;
3008 if(h->pps.transform_8x8_mode)
3011 h->dequant8_coeff[i][0][x] = 1<<6;
3018 * needs width/height
3020 static int alloc_tables(H264Context *h){
3021 MpegEncContext * const s = &h->s;
3022 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3025 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3027 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3028 CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t))
3029 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3030 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3031 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3033 if( h->pps.cabac ) {
3034 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3035 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3036 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3037 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3040 memset(h->slice_table_base, -1, big_mb_num * sizeof(uint8_t));
3041 h->slice_table= h->slice_table_base + s->mb_stride + 1;
3043 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3044 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3045 for(y=0; y<s->mb_height; y++){
3046 for(x=0; x<s->mb_width; x++){
3047 const int mb_xy= x + y*s->mb_stride;
3048 const int b_xy = 4*x + 4*y*h->b_stride;
3049 const int b8_xy= 2*x + 2*y*h->b8_stride;
3051 h->mb2b_xy [mb_xy]= b_xy;
3052 h->mb2b8_xy[mb_xy]= b8_xy;
3056 s->obmc_scratchpad = NULL;
3058 if(!h->dequant4_coeff[0])
3059 init_dequant_tables(h);
3067 static void common_init(H264Context *h){
3068 MpegEncContext * const s = &h->s;
3070 s->width = s->avctx->width;
3071 s->height = s->avctx->height;
3072 s->codec_id= s->avctx->codec->id;
3076 h->dequant_coeff_pps= -1;
3077 s->unrestricted_mv=1;
3078 s->decode=1; //FIXME
3080 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3081 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3084 static int decode_init(AVCodecContext *avctx){
3085 H264Context *h= avctx->priv_data;
3086 MpegEncContext * const s = &h->s;
3088 MPV_decode_defaults(s);
3093 s->out_format = FMT_H264;
3094 s->workaround_bugs= avctx->workaround_bugs;
3097 // s->decode_mb= ff_h263_decode_mb;
3099 avctx->pix_fmt= PIX_FMT_YUV420P;
3103 if(avctx->extradata_size > 0 && avctx->extradata &&
3104 *(char *)avctx->extradata == 1){
3114 static int frame_start(H264Context *h){
3115 MpegEncContext * const s = &h->s;
3118 if(MPV_frame_start(s, s->avctx) < 0)
3120 ff_er_frame_start(s);
3122 assert(s->linesize && s->uvlinesize);
3124 for(i=0; i<16; i++){
3125 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3126 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3129 h->block_offset[16+i]=
3130 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3131 h->block_offset[24+16+i]=
3132 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3135 /* can't be in alloc_tables because linesize isn't known there.
3136 * FIXME: redo bipred weight to not require extra buffer? */
3137 if(!s->obmc_scratchpad)
3138 s->obmc_scratchpad = av_malloc(16*s->linesize + 2*8*s->uvlinesize);
3140 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3144 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3145 MpegEncContext * const s = &h->s;
3149 src_cb -= uvlinesize;
3150 src_cr -= uvlinesize;
3152 // There are two lines saved, the line above the the top macroblock of a pair,
3153 // and the line above the bottom macroblock
3154 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3155 for(i=1; i<17; i++){
3156 h->left_border[i]= src_y[15+i* linesize];
3159 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3160 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3162 if(!(s->flags&CODEC_FLAG_GRAY)){
3163 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3164 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3166 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3167 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3169 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3170 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3174 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3175 MpegEncContext * const s = &h->s;
3178 int deblock_left = (s->mb_x > 0);
3179 int deblock_top = (s->mb_y > 0);
3181 src_y -= linesize + 1;
3182 src_cb -= uvlinesize + 1;
3183 src_cr -= uvlinesize + 1;
3185 #define XCHG(a,b,t,xchg)\
3192 for(i = !deblock_top; i<17; i++){
3193 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3198 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3199 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3200 if(s->mb_x+1 < s->mb_width){
3201 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3205 if(!(s->flags&CODEC_FLAG_GRAY)){
3207 for(i = !deblock_top; i<9; i++){
3208 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3209 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3213 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3214 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3219 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3220 MpegEncContext * const s = &h->s;
3223 src_y -= 2 * linesize;
3224 src_cb -= 2 * uvlinesize;
3225 src_cr -= 2 * uvlinesize;
3227 // There are two lines saved, the line above the the top macroblock of a pair,
3228 // and the line above the bottom macroblock
3229 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3230 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3231 for(i=2; i<34; i++){
3232 h->left_border[i]= src_y[15+i* linesize];
3235 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3236 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3237 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3238 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3240 if(!(s->flags&CODEC_FLAG_GRAY)){
3241 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3242 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3243 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3244 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3245 for(i=2; i<18; i++){
3246 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3247 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3249 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3250 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3251 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3252 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3256 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3257 MpegEncContext * const s = &h->s;
3260 int deblock_left = (s->mb_x > 0);
3261 int deblock_top = (s->mb_y > 0);
3263 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3265 src_y -= 2 * linesize + 1;
3266 src_cb -= 2 * uvlinesize + 1;
3267 src_cr -= 2 * uvlinesize + 1;
3269 #define XCHG(a,b,t,xchg)\
3276 for(i = (!deblock_top)<<1; i<34; i++){
3277 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3282 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3283 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3284 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3285 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3288 if(!(s->flags&CODEC_FLAG_GRAY)){
3290 for(i = (!deblock_top) << 1; i<18; i++){
3291 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3292 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3296 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3297 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3298 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3299 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3304 static void hl_decode_mb(H264Context *h){
3305 MpegEncContext * const s = &h->s;
3306 const int mb_x= s->mb_x;
3307 const int mb_y= s->mb_y;
3308 const int mb_xy= mb_x + mb_y*s->mb_stride;
3309 const int mb_type= s->current_picture.mb_type[mb_xy];
3310 uint8_t *dest_y, *dest_cb, *dest_cr;
3311 int linesize, uvlinesize /*dct_offset*/;
3313 int *block_offset = &h->block_offset[0];
3314 const unsigned int bottom = mb_y & 1;
3315 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3316 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3321 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3322 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3323 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3325 if (h->mb_field_decoding_flag) {
3326 linesize = s->linesize * 2;
3327 uvlinesize = s->uvlinesize * 2;
3328 block_offset = &h->block_offset[24];
3329 if(mb_y&1){ //FIXME move out of this func?
3330 dest_y -= s->linesize*15;
3331 dest_cb-= s->uvlinesize*7;
3332 dest_cr-= s->uvlinesize*7;
3335 linesize = s->linesize;
3336 uvlinesize = s->uvlinesize;
3337 // dct_offset = s->linesize * 16;
3340 idct_add = transform_bypass
3341 ? IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4
3342 : IS_8x8DCT(mb_type) ? s->dsp.h264_idct8_add : s->dsp.h264_idct_add;
3344 if (IS_INTRA_PCM(mb_type)) {
3347 // The pixels are stored in h->mb array in the same order as levels,
3348 // copy them in output in the correct order.
3349 for(i=0; i<16; i++) {
3350 for (y=0; y<4; y++) {
3351 for (x=0; x<4; x++) {
3352 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3356 for(i=16; i<16+4; i++) {
3357 for (y=0; y<4; y++) {
3358 for (x=0; x<4; x++) {
3359 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3363 for(i=20; i<20+4; i++) {
3364 for (y=0; y<4; y++) {
3365 for (x=0; x<4; x++) {
3366 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3371 if(IS_INTRA(mb_type)){
3372 if(h->deblocking_filter) {
3373 if (h->mb_aff_frame) {
3375 xchg_pair_border(h, dest_y, dest_cb, dest_cr, s->linesize, s->uvlinesize, 1);
3377 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3381 if(!(s->flags&CODEC_FLAG_GRAY)){
3382 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3383 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3386 if(IS_INTRA4x4(mb_type)){
3388 if(IS_8x8DCT(mb_type)){
3389 for(i=0; i<16; i+=4){
3390 uint8_t * const ptr= dest_y + block_offset[i];
3391 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3392 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3393 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3394 if(h->non_zero_count_cache[ scan8[i] ])
3395 idct_add(ptr, h->mb + i*16, linesize);
3398 for(i=0; i<16; i++){
3399 uint8_t * const ptr= dest_y + block_offset[i];
3401 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3404 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3405 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3406 assert(mb_y || linesize <= block_offset[i]);
3407 if(!topright_avail){
3408 tr= ptr[3 - linesize]*0x01010101;
3409 topright= (uint8_t*) &tr;
3411 topright= ptr + 4 - linesize;
3415 h->pred4x4[ dir ](ptr, topright, linesize);
3416 if(h->non_zero_count_cache[ scan8[i] ]){
3417 if(s->codec_id == CODEC_ID_H264)
3418 idct_add(ptr, h->mb + i*16, linesize);
3420 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3425 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3426 if(s->codec_id == CODEC_ID_H264){
3427 if(!transform_bypass)
3428 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3430 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3432 if(h->deblocking_filter) {
3433 if (h->mb_aff_frame) {
3435 uint8_t *pair_dest_y = s->current_picture.data[0] + ((mb_y-1) * 16* s->linesize ) + mb_x * 16;
3436 uint8_t *pair_dest_cb = s->current_picture.data[1] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3437 uint8_t *pair_dest_cr = s->current_picture.data[2] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3439 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3443 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3446 }else if(s->codec_id == CODEC_ID_H264){
3447 hl_motion(h, dest_y, dest_cb, dest_cr,
3448 s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
3449 s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
3450 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3454 if(!IS_INTRA4x4(mb_type)){
3455 if(s->codec_id == CODEC_ID_H264){
3456 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3457 for(i=0; i<16; i+=di){
3458 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3459 uint8_t * const ptr= dest_y + block_offset[i];
3460 idct_add(ptr, h->mb + i*16, linesize);
3464 for(i=0; i<16; i++){
3465 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3466 uint8_t * const ptr= dest_y + block_offset[i];
3467 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3473 if(!(s->flags&CODEC_FLAG_GRAY)){
3474 idct_add = transform_bypass ? s->dsp.add_pixels4 : s->dsp.h264_idct_add;
3475 if(!transform_bypass){
3476 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3477 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3479 if(s->codec_id == CODEC_ID_H264){
3480 for(i=16; i<16+4; i++){
3481 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3482 uint8_t * const ptr= dest_cb + block_offset[i];
3483 idct_add(ptr, h->mb + i*16, uvlinesize);
3486 for(i=20; i<20+4; i++){
3487 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3488 uint8_t * const ptr= dest_cr + block_offset[i];
3489 idct_add(ptr, h->mb + i*16, uvlinesize);
3493 for(i=16; i<16+4; i++){
3494 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3495 uint8_t * const ptr= dest_cb + block_offset[i];
3496 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3499 for(i=20; i<20+4; i++){
3500 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3501 uint8_t * const ptr= dest_cr + block_offset[i];
3502 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3508 if(h->deblocking_filter) {
3509 if (h->mb_aff_frame) {
3510 const int mb_y = s->mb_y - 1;
3511 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3512 const int mb_xy= mb_x + mb_y*s->mb_stride;
3513 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3514 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3515 uint8_t tmp = s->current_picture.data[1][384];
3516 if (!bottom) return;
3517 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3518 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3519 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3521 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3522 // TODO deblock a pair
3525 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3526 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3527 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3528 if (tmp != s->current_picture.data[1][384]) {
3529 tprintf("modified pixel 8,1 (1)\n");
3533 tprintf("call mbaff filter_mb\n");
3534 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3535 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3536 if (tmp != s->current_picture.data[1][384]) {
3537 tprintf("modified pixel 8,1 (2)\n");
3540 tprintf("call filter_mb\n");
3541 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3542 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3543 filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3549 * fills the default_ref_list.
3551 static int fill_default_ref_list(H264Context *h){
3552 MpegEncContext * const s = &h->s;
3554 int smallest_poc_greater_than_current = -1;
3555 Picture sorted_short_ref[32];
3557 if(h->slice_type==B_TYPE){
3561 /* sort frame according to poc in B slice */
3562 for(out_i=0; out_i<h->short_ref_count; out_i++){
3564 int best_poc=INT_MAX;
3566 for(i=0; i<h->short_ref_count; i++){
3567 const int poc= h->short_ref[i]->poc;
3568 if(poc > limit && poc < best_poc){
3574 assert(best_i != INT_MIN);
3577 sorted_short_ref[out_i]= *h->short_ref[best_i];
3578 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3579 if (-1 == smallest_poc_greater_than_current) {
3580 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3581 smallest_poc_greater_than_current = out_i;
3587 if(s->picture_structure == PICT_FRAME){
3588 if(h->slice_type==B_TYPE){
3590 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3592 // find the largest poc
3593 for(list=0; list<2; list++){
3596 int step= list ? -1 : 1;
3598 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3599 while(j<0 || j>= h->short_ref_count){
3600 if(j != -99 && step == (list ? -1 : 1))
3603 j= smallest_poc_greater_than_current + (step>>1);
3605 if(sorted_short_ref[j].reference != 3) continue;
3606 h->default_ref_list[list][index ]= sorted_short_ref[j];
3607 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3610 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3611 if(h->long_ref[i] == NULL) continue;
3612 if(h->long_ref[i]->reference != 3) continue;
3614 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3615 h->default_ref_list[ list ][index++].pic_id= i;;
3618 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3619 // swap the two first elements of L1 when
3620 // L0 and L1 are identical
3621 Picture temp= h->default_ref_list[1][0];
3622 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3623 h->default_ref_list[1][1] = temp;
3626 if(index < h->ref_count[ list ])
3627 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3631 for(i=0; i<h->short_ref_count; i++){
3632 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3633 h->default_ref_list[0][index ]= *h->short_ref[i];
3634 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3636 for(i = 0; i < 16; i++){
3637 if(h->long_ref[i] == NULL) continue;
3638 if(h->long_ref[i]->reference != 3) continue;
3639 h->default_ref_list[0][index ]= *h->long_ref[i];
3640 h->default_ref_list[0][index++].pic_id= i;;
3642 if(index < h->ref_count[0])
3643 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3646 if(h->slice_type==B_TYPE){
3648 //FIXME second field balh
3652 for (i=0; i<h->ref_count[0]; i++) {
3653 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3655 if(h->slice_type==B_TYPE){
3656 for (i=0; i<h->ref_count[1]; i++) {
3657 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3664 static void print_short_term(H264Context *h);
3665 static void print_long_term(H264Context *h);
3667 static int decode_ref_pic_list_reordering(H264Context *h){
3668 MpegEncContext * const s = &h->s;
3671 print_short_term(h);
3673 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3675 for(list=0; list<2; list++){
3676 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3678 if(get_bits1(&s->gb)){
3679 int pred= h->curr_pic_num;
3681 for(index=0; ; index++){
3682 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3685 Picture *ref = NULL;
3687 if(reordering_of_pic_nums_idc==3)
3690 if(index >= h->ref_count[list]){
3691 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3695 if(reordering_of_pic_nums_idc<3){
3696 if(reordering_of_pic_nums_idc<2){
3697 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3699 if(abs_diff_pic_num >= h->max_pic_num){
3700 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3704 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3705 else pred+= abs_diff_pic_num;
3706 pred &= h->max_pic_num - 1;
3708 for(i= h->short_ref_count-1; i>=0; i--){
3709 ref = h->short_ref[i];
3710 assert(ref->reference == 3);
3711 assert(!ref->long_ref);
3712 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3716 ref->pic_id= ref->frame_num;
3718 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3719 ref = h->long_ref[pic_id];
3720 ref->pic_id= pic_id;
3721 assert(ref->reference == 3);
3722 assert(ref->long_ref);
3727 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3728 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3730 for(i=index; i+1<h->ref_count[list]; i++){
3731 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3734 for(; i > index; i--){
3735 h->ref_list[list][i]= h->ref_list[list][i-1];
3737 h->ref_list[list][index]= *ref;
3740 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3746 if(h->slice_type!=B_TYPE) break;
3748 for(list=0; list<2; list++){
3749 for(index= 0; index < h->ref_count[list]; index++){
3750 if(!h->ref_list[list][index].data[0])
3751 h->ref_list[list][index]= s->current_picture;
3753 if(h->slice_type!=B_TYPE) break;
3756 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3757 direct_dist_scale_factor(h);
3758 direct_ref_list_init(h);
3762 static int pred_weight_table(H264Context *h){
3763 MpegEncContext * const s = &h->s;
3765 int luma_def, chroma_def;
3768 h->use_weight_chroma= 0;
3769 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3770 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3771 luma_def = 1<<h->luma_log2_weight_denom;
3772 chroma_def = 1<<h->chroma_log2_weight_denom;
3774 for(list=0; list<2; list++){
3775 for(i=0; i<h->ref_count[list]; i++){
3776 int luma_weight_flag, chroma_weight_flag;
3778 luma_weight_flag= get_bits1(&s->gb);
3779 if(luma_weight_flag){
3780 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3781 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3782 if( h->luma_weight[list][i] != luma_def
3783 || h->luma_offset[list][i] != 0)
3786 h->luma_weight[list][i]= luma_def;
3787 h->luma_offset[list][i]= 0;
3790 chroma_weight_flag= get_bits1(&s->gb);
3791 if(chroma_weight_flag){
3794 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3795 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3796 if( h->chroma_weight[list][i][j] != chroma_def
3797 || h->chroma_offset[list][i][j] != 0)
3798 h->use_weight_chroma= 1;
3803 h->chroma_weight[list][i][j]= chroma_def;
3804 h->chroma_offset[list][i][j]= 0;
3808 if(h->slice_type != B_TYPE) break;
3810 h->use_weight= h->use_weight || h->use_weight_chroma;
3814 static void implicit_weight_table(H264Context *h){
3815 MpegEncContext * const s = &h->s;
3817 int cur_poc = s->current_picture_ptr->poc;
3819 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3820 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3822 h->use_weight_chroma= 0;
3827 h->use_weight_chroma= 2;
3828 h->luma_log2_weight_denom= 5;
3829 h->chroma_log2_weight_denom= 5;
3832 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3833 int poc0 = h->ref_list[0][ref0].poc;
3834 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3835 int poc1 = h->ref_list[1][ref1].poc;
3836 int td = clip(poc1 - poc0, -128, 127);
3838 int tb = clip(cur_poc - poc0, -128, 127);
3839 int tx = (16384 + (ABS(td) >> 1)) / td;
3840 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3841 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3842 h->implicit_weight[ref0][ref1] = 32;
3844 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3846 h->implicit_weight[ref0][ref1] = 32;
3851 static inline void unreference_pic(H264Context *h, Picture *pic){
3854 if(pic == h->delayed_output_pic)
3857 for(i = 0; h->delayed_pic[i]; i++)
3858 if(pic == h->delayed_pic[i]){
3866 * instantaneous decoder refresh.
3868 static void idr(H264Context *h){
3871 for(i=0; i<16; i++){
3872 if (h->long_ref[i] != NULL) {
3873 unreference_pic(h, h->long_ref[i]);
3874 h->long_ref[i]= NULL;
3877 h->long_ref_count=0;
3879 for(i=0; i<h->short_ref_count; i++){
3880 unreference_pic(h, h->short_ref[i]);
3881 h->short_ref[i]= NULL;
3883 h->short_ref_count=0;
3886 /* forget old pics after a seek */
3887 static void flush_dpb(AVCodecContext *avctx){
3888 H264Context *h= avctx->priv_data;
3891 h->delayed_pic[i]= NULL;
3892 h->delayed_output_pic= NULL;
3894 if(h->s.current_picture_ptr)
3895 h->s.current_picture_ptr->reference= 0;
3900 * @return the removed picture or NULL if an error occurs
3902 static Picture * remove_short(H264Context *h, int frame_num){
3903 MpegEncContext * const s = &h->s;
3906 if(s->avctx->debug&FF_DEBUG_MMCO)
3907 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3909 for(i=0; i<h->short_ref_count; i++){
3910 Picture *pic= h->short_ref[i];
3911 if(s->avctx->debug&FF_DEBUG_MMCO)
3912 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3913 if(pic->frame_num == frame_num){
3914 h->short_ref[i]= NULL;
3915 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
3916 h->short_ref_count--;
3925 * @return the removed picture or NULL if an error occurs
3927 static Picture * remove_long(H264Context *h, int i){
3930 pic= h->long_ref[i];
3931 h->long_ref[i]= NULL;
3932 if(pic) h->long_ref_count--;
3938 * print short term list
3940 static void print_short_term(H264Context *h) {
3942 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3943 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3944 for(i=0; i<h->short_ref_count; i++){
3945 Picture *pic= h->short_ref[i];
3946 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3952 * print long term list
3954 static void print_long_term(H264Context *h) {
3956 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3957 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3958 for(i = 0; i < 16; i++){
3959 Picture *pic= h->long_ref[i];
3961 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3968 * Executes the reference picture marking (memory management control operations).
3970 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3971 MpegEncContext * const s = &h->s;
3973 int current_is_long=0;
3976 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3977 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3979 for(i=0; i<mmco_count; i++){
3980 if(s->avctx->debug&FF_DEBUG_MMCO)
3981 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
3983 switch(mmco[i].opcode){
3984 case MMCO_SHORT2UNUSED:
3985 pic= remove_short(h, mmco[i].short_frame_num);
3987 unreference_pic(h, pic);
3988 else if(s->avctx->debug&FF_DEBUG_MMCO)
3989 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
3991 case MMCO_SHORT2LONG:
3992 pic= remove_long(h, mmco[i].long_index);
3993 if(pic) unreference_pic(h, pic);
3995 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
3996 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3997 h->long_ref_count++;
3999 case MMCO_LONG2UNUSED:
4000 pic= remove_long(h, mmco[i].long_index);
4002 unreference_pic(h, pic);
4003 else if(s->avctx->debug&FF_DEBUG_MMCO)
4004 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4007 pic= remove_long(h, mmco[i].long_index);
4008 if(pic) unreference_pic(h, pic);
4010 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4011 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4012 h->long_ref_count++;
4016 case MMCO_SET_MAX_LONG:
4017 assert(mmco[i].long_index <= 16);
4018 // just remove the long term which index is greater than new max
4019 for(j = mmco[i].long_index; j<16; j++){
4020 pic = remove_long(h, j);
4021 if (pic) unreference_pic(h, pic);
4025 while(h->short_ref_count){
4026 pic= remove_short(h, h->short_ref[0]->frame_num);
4027 unreference_pic(h, pic);
4029 for(j = 0; j < 16; j++) {
4030 pic= remove_long(h, j);
4031 if(pic) unreference_pic(h, pic);
4038 if(!current_is_long){
4039 pic= remove_short(h, s->current_picture_ptr->frame_num);
4041 unreference_pic(h, pic);
4042 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4045 if(h->short_ref_count)
4046 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4048 h->short_ref[0]= s->current_picture_ptr;
4049 h->short_ref[0]->long_ref=0;
4050 h->short_ref_count++;
4053 print_short_term(h);
4058 static int decode_ref_pic_marking(H264Context *h){
4059 MpegEncContext * const s = &h->s;
4062 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4063 s->broken_link= get_bits1(&s->gb) -1;
4064 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4065 if(h->mmco[0].long_index == -1)
4068 h->mmco[0].opcode= MMCO_LONG;
4072 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4073 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4074 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4076 h->mmco[i].opcode= opcode;
4077 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4078 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4079 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4080 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4084 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4085 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4086 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4087 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4092 if(opcode > MMCO_LONG){
4093 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4096 if(opcode == MMCO_END)
4101 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4103 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4104 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4105 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4115 static int init_poc(H264Context *h){
4116 MpegEncContext * const s = &h->s;
4117 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4120 if(h->nal_unit_type == NAL_IDR_SLICE){
4121 h->frame_num_offset= 0;
4123 if(h->frame_num < h->prev_frame_num)
4124 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4126 h->frame_num_offset= h->prev_frame_num_offset;
4129 if(h->sps.poc_type==0){
4130 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4132 if(h->nal_unit_type == NAL_IDR_SLICE){
4137 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4138 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4139 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4140 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4142 h->poc_msb = h->prev_poc_msb;
4143 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4145 field_poc[1] = h->poc_msb + h->poc_lsb;
4146 if(s->picture_structure == PICT_FRAME)
4147 field_poc[1] += h->delta_poc_bottom;
4148 }else if(h->sps.poc_type==1){
4149 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4152 if(h->sps.poc_cycle_length != 0)
4153 abs_frame_num = h->frame_num_offset + h->frame_num;
4157 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4160 expected_delta_per_poc_cycle = 0;
4161 for(i=0; i < h->sps.poc_cycle_length; i++)
4162 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4164 if(abs_frame_num > 0){
4165 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4166 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4168 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4169 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4170 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4174 if(h->nal_ref_idc == 0)
4175 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4177 field_poc[0] = expectedpoc + h->delta_poc[0];
4178 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4180 if(s->picture_structure == PICT_FRAME)
4181 field_poc[1] += h->delta_poc[1];
4184 if(h->nal_unit_type == NAL_IDR_SLICE){
4187 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4188 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4194 if(s->picture_structure != PICT_BOTTOM_FIELD)
4195 s->current_picture_ptr->field_poc[0]= field_poc[0];
4196 if(s->picture_structure != PICT_TOP_FIELD)
4197 s->current_picture_ptr->field_poc[1]= field_poc[1];
4198 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4199 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4205 * decodes a slice header.
4206 * this will allso call MPV_common_init() and frame_start() as needed
4208 static int decode_slice_header(H264Context *h){
4209 MpegEncContext * const s = &h->s;
4210 int first_mb_in_slice, pps_id;
4211 int num_ref_idx_active_override_flag;
4212 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4214 int default_ref_list_done = 0;
4216 s->current_picture.reference= h->nal_ref_idc != 0;
4217 s->dropable= h->nal_ref_idc == 0;
4219 first_mb_in_slice= get_ue_golomb(&s->gb);
4221 slice_type= get_ue_golomb(&s->gb);
4223 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4228 h->slice_type_fixed=1;
4230 h->slice_type_fixed=0;
4232 slice_type= slice_type_map[ slice_type ];
4233 if (slice_type == I_TYPE
4234 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4235 default_ref_list_done = 1;
4237 h->slice_type= slice_type;
4239 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4241 pps_id= get_ue_golomb(&s->gb);
4243 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4246 h->pps= h->pps_buffer[pps_id];
4247 if(h->pps.slice_group_count == 0){
4248 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4252 h->sps= h->sps_buffer[ h->pps.sps_id ];
4253 if(h->sps.log2_max_frame_num == 0){
4254 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4258 if(h->dequant_coeff_pps != pps_id){
4259 h->dequant_coeff_pps = pps_id;
4260 init_dequant_tables(h);
4263 s->mb_width= h->sps.mb_width;
4264 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4266 h->b_stride= s->mb_width*4 + 1;
4267 h->b8_stride= s->mb_width*2 + 1;
4269 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4270 if(h->sps.frame_mbs_only_flag)
4271 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4273 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4275 if (s->context_initialized
4276 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4280 if (!s->context_initialized) {
4281 if (MPV_common_init(s) < 0)
4284 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4285 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4286 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4289 for(i=0; i<16; i++){
4290 #define T(x) (x>>2) | ((x<<2) & 0xF)
4291 h->zigzag_scan[i] = T(zigzag_scan[i]);
4292 h-> field_scan[i] = T( field_scan[i]);
4295 if(h->sps.transform_bypass){ //FIXME same ugly
4296 h->zigzag_scan_q0 = zigzag_scan;
4297 h->field_scan_q0 = field_scan;
4299 h->zigzag_scan_q0 = h->zigzag_scan;
4300 h->field_scan_q0 = h->field_scan;
4305 s->avctx->width = s->width;
4306 s->avctx->height = s->height;
4307 s->avctx->sample_aspect_ratio= h->sps.sar;
4308 if(!s->avctx->sample_aspect_ratio.den)
4309 s->avctx->sample_aspect_ratio.den = 1;
4311 if(h->sps.timing_info_present_flag){
4312 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4313 if(h->x264_build > 0 && h->x264_build < 44)
4314 s->avctx->time_base.den *= 2;
4315 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4316 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4320 if(h->slice_num == 0){
4321 if(frame_start(h) < 0)
4325 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4326 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4328 h->mb_aff_frame = 0;
4329 if(h->sps.frame_mbs_only_flag){
4330 s->picture_structure= PICT_FRAME;
4332 if(get_bits1(&s->gb)) { //field_pic_flag
4333 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4335 s->picture_structure= PICT_FRAME;
4336 first_mb_in_slice <<= h->sps.mb_aff;
4337 h->mb_aff_frame = h->sps.mb_aff;
4341 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4342 s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width;
4343 if(s->mb_y >= s->mb_height){
4347 if(s->picture_structure==PICT_FRAME){
4348 h->curr_pic_num= h->frame_num;
4349 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4351 h->curr_pic_num= 2*h->frame_num;
4352 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4355 if(h->nal_unit_type == NAL_IDR_SLICE){
4356 get_ue_golomb(&s->gb); /* idr_pic_id */
4359 if(h->sps.poc_type==0){
4360 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4362 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4363 h->delta_poc_bottom= get_se_golomb(&s->gb);
4367 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4368 h->delta_poc[0]= get_se_golomb(&s->gb);
4370 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4371 h->delta_poc[1]= get_se_golomb(&s->gb);
4376 if(h->pps.redundant_pic_cnt_present){
4377 h->redundant_pic_count= get_ue_golomb(&s->gb);
4380 //set defaults, might be overriden a few line later
4381 h->ref_count[0]= h->pps.ref_count[0];
4382 h->ref_count[1]= h->pps.ref_count[1];
4384 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4385 if(h->slice_type == B_TYPE){
4386 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4388 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4390 if(num_ref_idx_active_override_flag){
4391 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4392 if(h->slice_type==B_TYPE)
4393 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4395 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4396 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4402 if(!default_ref_list_done){
4403 fill_default_ref_list(h);
4406 if(decode_ref_pic_list_reordering(h) < 0)
4409 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4410 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4411 pred_weight_table(h);
4412 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4413 implicit_weight_table(h);
4417 if(s->current_picture.reference)
4418 decode_ref_pic_marking(h);
4420 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4421 h->cabac_init_idc = get_ue_golomb(&s->gb);
4423 h->last_qscale_diff = 0;
4424 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4425 if(s->qscale<0 || s->qscale>51){
4426 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4429 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4430 //FIXME qscale / qp ... stuff
4431 if(h->slice_type == SP_TYPE){
4432 get_bits1(&s->gb); /* sp_for_switch_flag */
4434 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4435 get_se_golomb(&s->gb); /* slice_qs_delta */
4438 h->deblocking_filter = 1;
4439 h->slice_alpha_c0_offset = 0;
4440 h->slice_beta_offset = 0;
4441 if( h->pps.deblocking_filter_parameters_present ) {
4442 h->deblocking_filter= get_ue_golomb(&s->gb);
4443 if(h->deblocking_filter < 2)
4444 h->deblocking_filter^= 1; // 1<->0
4446 if( h->deblocking_filter ) {
4447 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4448 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4451 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4452 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4453 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4454 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4455 h->deblocking_filter= 0;
4458 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4459 slice_group_change_cycle= get_bits(&s->gb, ?);
4464 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4465 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4467 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4469 av_get_pict_type_char(h->slice_type),
4470 pps_id, h->frame_num,
4471 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4472 h->ref_count[0], h->ref_count[1],
4474 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4476 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4486 static inline int get_level_prefix(GetBitContext *gb){
4490 OPEN_READER(re, gb);
4491 UPDATE_CACHE(re, gb);
4492 buf=GET_CACHE(re, gb);
4494 log= 32 - av_log2(buf);
4496 print_bin(buf>>(32-log), log);
4497 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4500 LAST_SKIP_BITS(re, gb, log);
4501 CLOSE_READER(re, gb);
4506 static inline int get_dct8x8_allowed(H264Context *h){
4509 if(!IS_SUB_8X8(h->sub_mb_type[i])
4510 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4517 * decodes a residual block.
4518 * @param n block index
4519 * @param scantable scantable
4520 * @param max_coeff number of coefficients in the block
4521 * @return <0 if an error occured
4523 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4524 MpegEncContext * const s = &h->s;
4525 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4527 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4529 //FIXME put trailing_onex into the context
4531 if(n == CHROMA_DC_BLOCK_INDEX){
4532 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4533 total_coeff= coeff_token>>2;
4535 if(n == LUMA_DC_BLOCK_INDEX){
4536 total_coeff= pred_non_zero_count(h, 0);
4537 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4538 total_coeff= coeff_token>>2;
4540 total_coeff= pred_non_zero_count(h, n);
4541 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4542 total_coeff= coeff_token>>2;
4543 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4547 //FIXME set last_non_zero?
4552 trailing_ones= coeff_token&3;
4553 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4554 assert(total_coeff<=16);
4556 for(i=0; i<trailing_ones; i++){
4557 level[i]= 1 - 2*get_bits1(gb);
4561 int level_code, mask;
4562 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4563 int prefix= get_level_prefix(gb);
4565 //first coefficient has suffix_length equal to 0 or 1
4566 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4568 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4570 level_code= (prefix<<suffix_length); //part
4571 }else if(prefix==14){
4573 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4575 level_code= prefix + get_bits(gb, 4); //part
4576 }else if(prefix==15){
4577 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4578 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4580 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4584 if(trailing_ones < 3) level_code += 2;
4589 mask= -(level_code&1);
4590 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4593 //remaining coefficients have suffix_length > 0
4594 for(;i<total_coeff;i++) {
4595 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4596 prefix = get_level_prefix(gb);
4598 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4599 }else if(prefix==15){
4600 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4602 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4605 mask= -(level_code&1);
4606 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4607 if(level_code > suffix_limit[suffix_length])
4612 if(total_coeff == max_coeff)
4615 if(n == CHROMA_DC_BLOCK_INDEX)
4616 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4618 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4621 coeff_num = zeros_left + total_coeff - 1;
4622 j = scantable[coeff_num];
4624 block[j] = level[0];
4625 for(i=1;i<total_coeff;i++) {
4628 else if(zeros_left < 7){
4629 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4631 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4633 zeros_left -= run_before;
4634 coeff_num -= 1 + run_before;
4635 j= scantable[ coeff_num ];
4640 block[j] = (level[0] * qmul[j] + 32)>>6;
4641 for(i=1;i<total_coeff;i++) {
4644 else if(zeros_left < 7){
4645 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4647 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4649 zeros_left -= run_before;
4650 coeff_num -= 1 + run_before;
4651 j= scantable[ coeff_num ];
4653 block[j]= (level[i] * qmul[j] + 32)>>6;
4658 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4666 * decodes a P_SKIP or B_SKIP macroblock
4668 static void decode_mb_skip(H264Context *h){
4669 MpegEncContext * const s = &h->s;
4670 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4673 memset(h->non_zero_count[mb_xy], 0, 16);
4674 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4676 if(h->mb_aff_frame && s->mb_skip_run==0 && (s->mb_y&1)==0){
4677 h->mb_field_decoding_flag= get_bits1(&s->gb);
4679 if(h->mb_field_decoding_flag)
4680 mb_type|= MB_TYPE_INTERLACED;
4682 if( h->slice_type == B_TYPE )
4684 // just for fill_caches. pred_direct_motion will set the real mb_type
4685 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4687 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4688 pred_direct_motion(h, &mb_type);
4690 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4691 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
4697 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4699 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4700 pred_pskip_motion(h, &mx, &my);
4701 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4702 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4704 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4707 write_back_motion(h, mb_type);
4708 s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP;
4709 s->current_picture.qscale_table[mb_xy]= s->qscale;
4710 h->slice_table[ mb_xy ]= h->slice_num;
4711 h->prev_mb_skipped= 1;
4715 * decodes a macroblock
4716 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4718 static int decode_mb_cavlc(H264Context *h){
4719 MpegEncContext * const s = &h->s;
4720 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4721 int mb_type, partition_count, cbp;
4722 int dct8x8_allowed= h->pps.transform_8x8_mode;
4724 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4726 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4727 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4729 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4730 if(s->mb_skip_run==-1)
4731 s->mb_skip_run= get_ue_golomb(&s->gb);
4733 if (s->mb_skip_run--) {
4738 if(h->mb_aff_frame){
4739 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
4740 h->mb_field_decoding_flag = get_bits1(&s->gb);
4742 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4744 h->prev_mb_skipped= 0;
4746 mb_type= get_ue_golomb(&s->gb);
4747 if(h->slice_type == B_TYPE){
4749 partition_count= b_mb_type_info[mb_type].partition_count;
4750 mb_type= b_mb_type_info[mb_type].type;
4753 goto decode_intra_mb;
4755 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4757 partition_count= p_mb_type_info[mb_type].partition_count;
4758 mb_type= p_mb_type_info[mb_type].type;
4761 goto decode_intra_mb;
4764 assert(h->slice_type == I_TYPE);
4767 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4771 cbp= i_mb_type_info[mb_type].cbp;
4772 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4773 mb_type= i_mb_type_info[mb_type].type;
4776 if(h->mb_field_decoding_flag)
4777 mb_type |= MB_TYPE_INTERLACED;
4779 h->slice_table[ mb_xy ]= h->slice_num;
4781 if(IS_INTRA_PCM(mb_type)){
4784 // we assume these blocks are very rare so we dont optimize it
4785 align_get_bits(&s->gb);
4787 // The pixels are stored in the same order as levels in h->mb array.
4788 for(y=0; y<16; y++){
4789 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4790 for(x=0; x<16; x++){
4791 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4792 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4796 const int index= 256 + 4*(y&3) + 32*(y>>2);
4798 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4799 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4803 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4805 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4806 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4810 // In deblocking, the quantizer is 0
4811 s->current_picture.qscale_table[mb_xy]= 0;
4812 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
4813 // All coeffs are present
4814 memset(h->non_zero_count[mb_xy], 16, 16);
4816 s->current_picture.mb_type[mb_xy]= mb_type;
4820 fill_caches(h, mb_type, 0);
4823 if(IS_INTRA(mb_type)){
4824 // init_top_left_availability(h);
4825 if(IS_INTRA4x4(mb_type)){
4828 if(dct8x8_allowed && get_bits1(&s->gb)){
4829 mb_type |= MB_TYPE_8x8DCT;
4833 // fill_intra4x4_pred_table(h);
4834 for(i=0; i<16; i+=di){
4835 const int mode_coded= !get_bits1(&s->gb);
4836 const int predicted_mode= pred_intra_mode(h, i);
4840 const int rem_mode= get_bits(&s->gb, 3);
4841 if(rem_mode<predicted_mode)
4846 mode= predicted_mode;
4850 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4852 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4854 write_back_intra_pred_mode(h);
4855 if( check_intra4x4_pred_mode(h) < 0)
4858 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4859 if(h->intra16x16_pred_mode < 0)
4862 h->chroma_pred_mode= get_ue_golomb(&s->gb);
4864 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
4865 if(h->chroma_pred_mode < 0)
4867 }else if(partition_count==4){
4868 int i, j, sub_partition_count[4], list, ref[2][4];
4870 if(h->slice_type == B_TYPE){
4872 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4873 if(h->sub_mb_type[i] >=13){
4874 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4877 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4878 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4880 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4881 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4882 pred_direct_motion(h, &mb_type);
4883 h->ref_cache[0][scan8[4]] =
4884 h->ref_cache[1][scan8[4]] =
4885 h->ref_cache[0][scan8[12]] =
4886 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4889 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4891 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4892 if(h->sub_mb_type[i] >=4){
4893 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4896 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4897 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4901 for(list=0; list<2; list++){
4902 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4903 if(ref_count == 0) continue;
4904 if (h->mb_aff_frame && h->mb_field_decoding_flag) {
4908 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4909 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4910 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4919 dct8x8_allowed = get_dct8x8_allowed(h);
4921 for(list=0; list<2; list++){
4922 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4923 if(ref_count == 0) continue;
4926 if(IS_DIRECT(h->sub_mb_type[i])) {
4927 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4930 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4931 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4933 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4934 const int sub_mb_type= h->sub_mb_type[i];
4935 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4936 for(j=0; j<sub_partition_count[i]; j++){
4938 const int index= 4*i + block_width*j;
4939 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4940 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4941 mx += get_se_golomb(&s->gb);
4942 my += get_se_golomb(&s->gb);
4943 tprintf("final mv:%d %d\n", mx, my);
4945 if(IS_SUB_8X8(sub_mb_type)){
4946 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
4947 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4948 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
4949 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4950 }else if(IS_SUB_8X4(sub_mb_type)){
4951 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
4952 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
4953 }else if(IS_SUB_4X8(sub_mb_type)){
4954 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
4955 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
4957 assert(IS_SUB_4X4(sub_mb_type));
4958 mv_cache[ 0 ][0]= mx;
4959 mv_cache[ 0 ][1]= my;
4963 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4969 }else if(IS_DIRECT(mb_type)){
4970 pred_direct_motion(h, &mb_type);
4971 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4973 int list, mx, my, i;
4974 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4975 if(IS_16X16(mb_type)){
4976 for(list=0; list<2; list++){
4977 if(h->ref_count[list]>0){
4978 if(IS_DIR(mb_type, 0, list)){
4979 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4980 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4982 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
4985 for(list=0; list<2; list++){
4986 if(IS_DIR(mb_type, 0, list)){
4987 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4988 mx += get_se_golomb(&s->gb);
4989 my += get_se_golomb(&s->gb);
4990 tprintf("final mv:%d %d\n", mx, my);
4992 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
4994 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
4997 else if(IS_16X8(mb_type)){
4998 for(list=0; list<2; list++){
4999 if(h->ref_count[list]>0){
5001 if(IS_DIR(mb_type, i, list)){
5002 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5003 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5005 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5009 for(list=0; list<2; list++){
5011 if(IS_DIR(mb_type, i, list)){
5012 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5013 mx += get_se_golomb(&s->gb);
5014 my += get_se_golomb(&s->gb);
5015 tprintf("final mv:%d %d\n", mx, my);
5017 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5019 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5023 assert(IS_8X16(mb_type));
5024 for(list=0; list<2; list++){
5025 if(h->ref_count[list]>0){
5027 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5028 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5029 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5031 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5035 for(list=0; list<2; list++){
5037 if(IS_DIR(mb_type, i, list)){
5038 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5039 mx += get_se_golomb(&s->gb);
5040 my += get_se_golomb(&s->gb);
5041 tprintf("final mv:%d %d\n", mx, my);
5043 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5045 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5051 if(IS_INTER(mb_type))
5052 write_back_motion(h, mb_type);
5054 if(!IS_INTRA16x16(mb_type)){
5055 cbp= get_ue_golomb(&s->gb);
5057 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5061 if(IS_INTRA4x4(mb_type))
5062 cbp= golomb_to_intra4x4_cbp[cbp];
5064 cbp= golomb_to_inter_cbp[cbp];
5067 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5068 if(get_bits1(&s->gb))
5069 mb_type |= MB_TYPE_8x8DCT;
5071 s->current_picture.mb_type[mb_xy]= mb_type;
5073 if(cbp || IS_INTRA16x16(mb_type)){
5074 int i8x8, i4x4, chroma_idx;
5075 int chroma_qp, dquant;
5076 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5077 const uint8_t *scan, *dc_scan;
5079 // fill_non_zero_count_cache(h);
5081 if(IS_INTERLACED(mb_type)){
5082 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5083 dc_scan= luma_dc_field_scan;
5085 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5086 dc_scan= luma_dc_zigzag_scan;
5089 dquant= get_se_golomb(&s->gb);
5091 if( dquant > 25 || dquant < -26 ){
5092 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5096 s->qscale += dquant;
5097 if(((unsigned)s->qscale) > 51){
5098 if(s->qscale<0) s->qscale+= 52;
5099 else s->qscale-= 52;
5102 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5103 if(IS_INTRA16x16(mb_type)){
5104 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5105 return -1; //FIXME continue if partitioned and other return -1 too
5108 assert((cbp&15) == 0 || (cbp&15) == 15);
5111 for(i8x8=0; i8x8<4; i8x8++){
5112 for(i4x4=0; i4x4<4; i4x4++){
5113 const int index= i4x4 + 4*i8x8;
5114 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5120 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5123 for(i8x8=0; i8x8<4; i8x8++){
5124 if(cbp & (1<<i8x8)){
5125 if(IS_8x8DCT(mb_type)){
5126 DCTELEM *buf = &h->mb[64*i8x8];
5128 for(i4x4=0; i4x4<4; i4x4++){
5129 if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4,
5130 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5133 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5134 nnz[0] |= nnz[1] | nnz[8] | nnz[9];
5136 for(i4x4=0; i4x4<4; i4x4++){
5137 const int index= i4x4 + 4*i8x8;
5139 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5145 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5146 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5152 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5153 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5159 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5160 for(i4x4=0; i4x4<4; i4x4++){
5161 const int index= 16 + 4*chroma_idx + i4x4;
5162 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5168 uint8_t * const nnz= &h->non_zero_count_cache[0];
5169 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5170 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5173 uint8_t * const nnz= &h->non_zero_count_cache[0];
5174 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5175 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5176 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5178 s->current_picture.qscale_table[mb_xy]= s->qscale;
5179 write_back_non_zero_count(h);
5184 static int decode_cabac_field_decoding_flag(H264Context *h) {
5185 MpegEncContext * const s = &h->s;
5186 const int mb_x = s->mb_x;
5187 const int mb_y = s->mb_y & ~1;
5188 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5189 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5191 unsigned int ctx = 0;
5193 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5196 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5200 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5203 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5204 uint8_t *state= &h->cabac_state[ctx_base];
5208 MpegEncContext * const s = &h->s;
5209 const int mba_xy = h->left_mb_xy[0];
5210 const int mbb_xy = h->top_mb_xy;
5212 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5214 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5216 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5217 return 0; /* I4x4 */
5220 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5221 return 0; /* I4x4 */
5224 if( get_cabac_terminate( &h->cabac ) )
5225 return 25; /* PCM */
5227 mb_type = 1; /* I16x16 */
5228 if( get_cabac( &h->cabac, &state[1] ) )
5229 mb_type += 12; /* cbp_luma != 0 */
5231 if( get_cabac( &h->cabac, &state[2] ) ) {
5232 if( get_cabac( &h->cabac, &state[2+intra_slice] ) )
5233 mb_type += 4 * 2; /* cbp_chroma == 2 */
5235 mb_type += 4 * 1; /* cbp_chroma == 1 */
5237 if( get_cabac( &h->cabac, &state[3+intra_slice] ) )
5239 if( get_cabac( &h->cabac, &state[3+2*intra_slice] ) )
5244 static int decode_cabac_mb_type( H264Context *h ) {
5245 MpegEncContext * const s = &h->s;
5247 if( h->slice_type == I_TYPE ) {
5248 return decode_cabac_intra_mb_type(h, 3, 1);
5249 } else if( h->slice_type == P_TYPE ) {
5250 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5252 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5253 if( get_cabac( &h->cabac, &h->cabac_state[16] ) == 0 )
5254 return 0; /* P_L0_D16x16; */
5256 return 3; /* P_8x8; */
5258 if( get_cabac( &h->cabac, &h->cabac_state[17] ) == 0 )
5259 return 2; /* P_L0_D8x16; */
5261 return 1; /* P_L0_D16x8; */
5264 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5266 } else if( h->slice_type == B_TYPE ) {
5267 const int mba_xy = h->left_mb_xy[0];
5268 const int mbb_xy = h->top_mb_xy;
5272 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] )
5273 && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5275 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )
5276 && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5279 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5280 return 0; /* B_Direct_16x16 */
5282 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5283 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5286 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5287 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5288 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5289 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5291 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5292 else if( bits == 13 ) {
5293 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5294 } else if( bits == 14 )
5295 return 11; /* B_L1_L0_8x16 */
5296 else if( bits == 15 )
5297 return 22; /* B_8x8 */
5299 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5300 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5302 /* TODO SI/SP frames? */
5307 static int decode_cabac_mb_skip( H264Context *h) {
5308 MpegEncContext * const s = &h->s;
5309 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5310 const int mba_xy = mb_xy - 1;
5311 const int mbb_xy = mb_xy - s->mb_stride;
5314 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5316 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5319 if( h->slice_type == P_TYPE || h->slice_type == SP_TYPE)
5320 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5322 return get_cabac( &h->cabac, &h->cabac_state[24+ctx] );
5325 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5328 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5331 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5332 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5333 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5335 if( mode >= pred_mode )
5341 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5342 const int mba_xy = h->left_mb_xy[0];
5343 const int mbb_xy = h->top_mb_xy;
5347 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5348 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5351 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5354 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5357 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5359 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5365 static const uint8_t block_idx_x[16] = {
5366 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5368 static const uint8_t block_idx_y[16] = {
5369 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5371 static const uint8_t block_idx_xy[4][4] = {
5378 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5379 MpegEncContext * const s = &h->s;
5384 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5390 x = block_idx_x[4*i8x8];
5391 y = block_idx_y[4*i8x8];
5395 else if( s->mb_x > 0 && (h->slice_table[h->left_mb_xy[0]] == h->slice_num)) {
5396 cbp_a = h->left_cbp;
5397 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5402 else if( s->mb_y > 0 && (h->slice_table[h->top_mb_xy] == h->slice_num)) {
5404 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5407 /* No need to test for skip as we put 0 for skip block */
5408 /* No need to test for IPCM as we put 1 for IPCM block */
5410 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5411 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5416 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5417 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5421 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5427 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5431 cbp_a = (h->left_cbp>>4)&0x03;
5432 cbp_b = (h-> top_cbp>>4)&0x03;
5435 if( cbp_a > 0 ) ctx++;
5436 if( cbp_b > 0 ) ctx += 2;
5437 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5441 if( cbp_a == 2 ) ctx++;
5442 if( cbp_b == 2 ) ctx += 2;
5443 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5445 static int decode_cabac_mb_dqp( H264Context *h) {
5446 MpegEncContext * const s = &h->s;
5452 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5454 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5456 if( h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) )
5459 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5465 if(val > 52) //prevent infinite loop
5472 return -(val + 1)/2;
5474 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5475 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5477 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5479 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5483 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5485 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5486 return 0; /* B_Direct_8x8 */
5487 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5488 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5490 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5491 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5492 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5495 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5496 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5500 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5501 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5504 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5505 int refa = h->ref_cache[list][scan8[n] - 1];
5506 int refb = h->ref_cache[list][scan8[n] - 8];
5510 if( h->slice_type == B_TYPE) {
5511 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5513 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5522 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5532 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5533 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5534 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5535 int ctxbase = (l == 0) ? 40 : 47;
5540 else if( amvd > 32 )
5545 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5550 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5558 while( get_cabac_bypass( &h->cabac ) ) {
5563 if( get_cabac_bypass( &h->cabac ) )
5567 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
5571 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5576 nza = h->left_cbp&0x100;
5577 nzb = h-> top_cbp&0x100;
5578 } else if( cat == 1 || cat == 2 ) {
5579 nza = h->non_zero_count_cache[scan8[idx] - 1];
5580 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5581 } else if( cat == 3 ) {
5582 nza = (h->left_cbp>>(6+idx))&0x01;
5583 nzb = (h-> top_cbp>>(6+idx))&0x01;
5586 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5587 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5596 return ctx + 4 * cat;
5599 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5600 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5601 static const int significant_coeff_flag_field_offset[2] = { 105, 277 };
5602 static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 };
5603 static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 };
5604 static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 };
5605 static const int coeff_abs_level_m1_offset[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 };
5606 static const int significant_coeff_flag_offset_8x8[63] = {
5607 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5608 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5609 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5610 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
5612 static const int last_coeff_flag_offset_8x8[63] = {
5613 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5614 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5615 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5616 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5622 int coeff_count = 0;
5625 int abslevelgt1 = 0;
5627 uint8_t *significant_coeff_ctx_base;
5628 uint8_t *last_coeff_ctx_base;
5629 uint8_t *abs_level_m1_ctx_base;
5631 /* cat: 0-> DC 16x16 n = 0
5632 * 1-> AC 16x16 n = luma4x4idx
5633 * 2-> Luma4x4 n = luma4x4idx
5634 * 3-> DC Chroma n = iCbCr
5635 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5636 * 5-> Luma8x8 n = 4 * luma8x8idx
5639 /* read coded block flag */
5641 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5642 if( cat == 1 || cat == 2 )
5643 h->non_zero_count_cache[scan8[n]] = 0;
5645 h->non_zero_count_cache[scan8[16+n]] = 0;
5651 significant_coeff_ctx_base = h->cabac_state
5652 + significant_coeff_flag_offset[cat]
5653 + significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5654 last_coeff_ctx_base = h->cabac_state
5655 + last_significant_coeff_flag_offset[cat]
5656 + last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5657 abs_level_m1_ctx_base = h->cabac_state
5658 + coeff_abs_level_m1_offset[cat];
5661 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5662 for(last= 0; last < coefs; last++) { \
5663 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5664 if( get_cabac( &h->cabac, sig_ctx )) { \
5665 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5666 index[coeff_count++] = last; \
5667 if( get_cabac( &h->cabac, last_ctx ) ) { \
5673 DECODE_SIGNIFICANCE( 63, significant_coeff_flag_offset_8x8[last],
5674 last_coeff_flag_offset_8x8[last] );
5676 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5678 if( last == max_coeff -1 ) {
5679 index[coeff_count++] = last;
5681 assert(coeff_count > 0);
5684 h->cbp_table[mb_xy] |= 0x100;
5685 else if( cat == 1 || cat == 2 )
5686 h->non_zero_count_cache[scan8[n]] = coeff_count;
5688 h->cbp_table[mb_xy] |= 0x40 << n;
5690 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5693 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, 1, 1);
5696 for( i = coeff_count - 1; i >= 0; i-- ) {
5697 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5698 int j= scantable[index[i]];
5700 if( get_cabac( &h->cabac, ctx ) == 0 ) {
5702 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
5705 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6;
5706 else block[j] = ( qmul[j] + 32) >> 6;
5712 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5713 while( coeff_abs < 15 && get_cabac( &h->cabac, ctx ) ) {
5717 if( coeff_abs >= 15 ) {
5719 while( get_cabac_bypass( &h->cabac ) ) {
5720 coeff_abs += 1 << j;
5725 if( get_cabac_bypass( &h->cabac ) )
5726 coeff_abs += 1 << j ;
5731 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
5732 else block[j] = coeff_abs;
5734 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5735 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5744 void inline compute_mb_neighboors(H264Context *h)
5746 MpegEncContext * const s = &h->s;
5747 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5748 h->top_mb_xy = mb_xy - s->mb_stride;
5749 h->left_mb_xy[0] = mb_xy - 1;
5750 if(h->mb_aff_frame){
5751 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5752 const int top_pair_xy = pair_xy - s->mb_stride;
5753 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5754 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5755 const int curr_mb_frame_flag = !h->mb_field_decoding_flag;
5756 const int bottom = (s->mb_y & 1);
5758 ? !curr_mb_frame_flag // bottom macroblock
5759 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5761 h->top_mb_xy -= s->mb_stride;
5763 if (left_mb_frame_flag != curr_mb_frame_flag) {
5764 h->left_mb_xy[0] = pair_xy - 1;
5771 * decodes a macroblock
5772 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5774 static int decode_mb_cabac(H264Context *h) {
5775 MpegEncContext * const s = &h->s;
5776 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5777 int mb_type, partition_count, cbp = 0;
5778 int dct8x8_allowed= h->pps.transform_8x8_mode;
5780 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5782 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5783 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5784 /* read skip flags */
5785 if( decode_cabac_mb_skip( h ) ) {
5788 h->cbp_table[mb_xy] = 0;
5789 h->chroma_pred_mode_table[mb_xy] = 0;
5790 h->last_qscale_diff = 0;
5796 if(h->mb_aff_frame){
5797 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
5798 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5800 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5802 h->prev_mb_skipped = 0;
5804 compute_mb_neighboors(h);
5805 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5806 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5810 if( h->slice_type == B_TYPE ) {
5812 partition_count= b_mb_type_info[mb_type].partition_count;
5813 mb_type= b_mb_type_info[mb_type].type;
5816 goto decode_intra_mb;
5818 } else if( h->slice_type == P_TYPE ) {
5820 partition_count= p_mb_type_info[mb_type].partition_count;
5821 mb_type= p_mb_type_info[mb_type].type;
5824 goto decode_intra_mb;
5827 assert(h->slice_type == I_TYPE);
5829 partition_count = 0;
5830 cbp= i_mb_type_info[mb_type].cbp;
5831 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5832 mb_type= i_mb_type_info[mb_type].type;
5834 if(h->mb_field_decoding_flag)
5835 mb_type |= MB_TYPE_INTERLACED;
5837 h->slice_table[ mb_xy ]= h->slice_num;
5839 if(IS_INTRA_PCM(mb_type)) {
5843 // We assume these blocks are very rare so we dont optimize it.
5844 // FIXME The two following lines get the bitstream position in the cabac
5845 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5846 ptr= h->cabac.bytestream;
5847 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
5849 // The pixels are stored in the same order as levels in h->mb array.
5850 for(y=0; y<16; y++){
5851 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5852 for(x=0; x<16; x++){
5853 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
5854 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5858 const int index= 256 + 4*(y&3) + 32*(y>>2);
5860 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5861 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5865 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5867 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5868 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5872 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5874 // All blocks are present
5875 h->cbp_table[mb_xy] = 0x1ef;
5876 h->chroma_pred_mode_table[mb_xy] = 0;
5877 // In deblocking, the quantizer is 0
5878 s->current_picture.qscale_table[mb_xy]= 0;
5879 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5880 // All coeffs are present
5881 memset(h->non_zero_count[mb_xy], 16, 16);
5882 s->current_picture.mb_type[mb_xy]= mb_type;
5886 fill_caches(h, mb_type, 0);
5888 if( IS_INTRA( mb_type ) ) {
5890 if( IS_INTRA4x4( mb_type ) ) {
5891 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5892 mb_type |= MB_TYPE_8x8DCT;
5893 for( i = 0; i < 16; i+=4 ) {
5894 int pred = pred_intra_mode( h, i );
5895 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5896 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5899 for( i = 0; i < 16; i++ ) {
5900 int pred = pred_intra_mode( h, i );
5901 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5903 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5906 write_back_intra_pred_mode(h);
5907 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5909 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5910 if( h->intra16x16_pred_mode < 0 ) return -1;
5912 h->chroma_pred_mode_table[mb_xy] =
5913 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5915 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
5916 if( h->chroma_pred_mode < 0 ) return -1;
5917 } else if( partition_count == 4 ) {
5918 int i, j, sub_partition_count[4], list, ref[2][4];
5920 if( h->slice_type == B_TYPE ) {
5921 for( i = 0; i < 4; i++ ) {
5922 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5923 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5924 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5926 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5927 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5928 pred_direct_motion(h, &mb_type);
5929 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5930 for( i = 0; i < 4; i++ )
5931 if( IS_DIRECT(h->sub_mb_type[i]) )
5932 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5936 for( i = 0; i < 4; i++ ) {
5937 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5938 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5939 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5943 for( list = 0; list < 2; list++ ) {
5944 if( h->ref_count[list] > 0 ) {
5945 for( i = 0; i < 4; i++ ) {
5946 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5947 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5948 if( h->ref_count[list] > 1 )
5949 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5955 h->ref_cache[list][ scan8[4*i]+1 ]=
5956 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5962 dct8x8_allowed = get_dct8x8_allowed(h);
5964 for(list=0; list<2; list++){
5966 if(IS_DIRECT(h->sub_mb_type[i])){
5967 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5970 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5972 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5973 const int sub_mb_type= h->sub_mb_type[i];
5974 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5975 for(j=0; j<sub_partition_count[i]; j++){
5978 const int index= 4*i + block_width*j;
5979 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5980 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5981 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5983 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5984 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5985 tprintf("final mv:%d %d\n", mx, my);
5987 if(IS_SUB_8X8(sub_mb_type)){
5988 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5989 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5990 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5991 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5993 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
5994 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5995 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
5996 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5997 }else if(IS_SUB_8X4(sub_mb_type)){
5998 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5999 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
6001 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
6002 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
6003 }else if(IS_SUB_4X8(sub_mb_type)){
6004 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
6005 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
6007 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
6008 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
6010 assert(IS_SUB_4X4(sub_mb_type));
6011 mv_cache[ 0 ][0]= mx;
6012 mv_cache[ 0 ][1]= my;
6014 mvd_cache[ 0 ][0]= mx - mpx;
6015 mvd_cache[ 0 ][1]= my - mpy;
6019 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6020 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6021 p[0] = p[1] = p[8] = p[9] = 0;
6022 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6026 } else if( IS_DIRECT(mb_type) ) {
6027 pred_direct_motion(h, &mb_type);
6028 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6029 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6030 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6032 int list, mx, my, i, mpx, mpy;
6033 if(IS_16X16(mb_type)){
6034 for(list=0; list<2; list++){
6035 if(IS_DIR(mb_type, 0, list)){
6036 if(h->ref_count[list] > 0 ){
6037 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6038 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6041 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6043 for(list=0; list<2; list++){
6044 if(IS_DIR(mb_type, 0, list)){
6045 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6047 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6048 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6049 tprintf("final mv:%d %d\n", mx, my);
6051 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6052 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6054 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6057 else if(IS_16X8(mb_type)){
6058 for(list=0; list<2; list++){
6059 if(h->ref_count[list]>0){
6061 if(IS_DIR(mb_type, i, list)){
6062 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6063 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6065 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6069 for(list=0; list<2; list++){
6071 if(IS_DIR(mb_type, i, list)){
6072 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6073 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6074 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6075 tprintf("final mv:%d %d\n", mx, my);
6077 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6078 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6080 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6081 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6086 assert(IS_8X16(mb_type));
6087 for(list=0; list<2; list++){
6088 if(h->ref_count[list]>0){
6090 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6091 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6092 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6094 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6098 for(list=0; list<2; list++){
6100 if(IS_DIR(mb_type, i, list)){
6101 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6102 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6103 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6105 tprintf("final mv:%d %d\n", mx, my);
6106 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6107 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6109 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6110 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6117 if( IS_INTER( mb_type ) ) {
6118 h->chroma_pred_mode_table[mb_xy] = 0;
6119 write_back_motion( h, mb_type );
6122 if( !IS_INTRA16x16( mb_type ) ) {
6123 cbp = decode_cabac_mb_cbp_luma( h );
6124 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6127 h->cbp_table[mb_xy] = cbp;
6129 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6130 if( decode_cabac_mb_transform_size( h ) )
6131 mb_type |= MB_TYPE_8x8DCT;
6133 s->current_picture.mb_type[mb_xy]= mb_type;
6135 if( cbp || IS_INTRA16x16( mb_type ) ) {
6136 const uint8_t *scan, *dc_scan;
6139 if(IS_INTERLACED(mb_type)){
6140 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6141 dc_scan= luma_dc_field_scan;
6143 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6144 dc_scan= luma_dc_zigzag_scan;
6147 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6148 if( dqp == INT_MIN ){
6149 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6153 if(((unsigned)s->qscale) > 51){
6154 if(s->qscale<0) s->qscale+= 52;
6155 else s->qscale-= 52;
6157 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6159 if( IS_INTRA16x16( mb_type ) ) {
6161 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6162 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6165 for( i = 0; i < 16; i++ ) {
6166 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6167 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6171 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6175 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6176 if( cbp & (1<<i8x8) ) {
6177 if( IS_8x8DCT(mb_type) ) {
6178 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6179 zigzag_scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6182 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6183 const int index = 4*i8x8 + i4x4;
6184 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6185 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6189 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6190 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6197 for( c = 0; c < 2; c++ ) {
6198 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6199 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6206 for( c = 0; c < 2; c++ ) {
6207 for( i = 0; i < 4; i++ ) {
6208 const int index = 16 + 4 * c + i;
6209 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6210 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6215 uint8_t * const nnz= &h->non_zero_count_cache[0];
6216 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6217 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6220 uint8_t * const nnz= &h->non_zero_count_cache[0];
6221 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6222 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6223 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6226 s->current_picture.qscale_table[mb_xy]= s->qscale;
6227 write_back_non_zero_count(h);
6233 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6235 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6236 const int alpha = alpha_table[index_a];
6237 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6242 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6243 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6245 /* 16px edge length, because bS=4 is triggered by being at
6246 * the edge of an intra MB, so all 4 bS are the same */
6247 for( d = 0; d < 16; d++ ) {
6248 const int p0 = pix[-1];
6249 const int p1 = pix[-2];
6250 const int p2 = pix[-3];
6252 const int q0 = pix[0];
6253 const int q1 = pix[1];
6254 const int q2 = pix[2];
6256 if( ABS( p0 - q0 ) < alpha &&
6257 ABS( p1 - p0 ) < beta &&
6258 ABS( q1 - q0 ) < beta ) {
6260 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6261 if( ABS( p2 - p0 ) < beta)
6263 const int p3 = pix[-4];
6265 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6266 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6267 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6270 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6272 if( ABS( q2 - q0 ) < beta)
6274 const int q3 = pix[3];
6276 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6277 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6278 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6281 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6285 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6286 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6288 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6294 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6296 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6297 const int alpha = alpha_table[index_a];
6298 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6303 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6304 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6306 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6310 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6312 for( i = 0; i < 16; i++, pix += stride) {
6318 int bS_index = (i >> 1);
6319 if (h->mb_field_decoding_flag) {
6321 bS_index |= (i & 1);
6324 if( bS[bS_index] == 0 ) {
6328 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6329 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6330 alpha = alpha_table[index_a];
6331 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6334 if( bS[bS_index] < 4 ) {
6335 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6336 /* 4px edge length */
6337 const int p0 = pix[-1];
6338 const int p1 = pix[-2];
6339 const int p2 = pix[-3];
6340 const int q0 = pix[0];
6341 const int q1 = pix[1];
6342 const int q2 = pix[2];
6344 if( ABS( p0 - q0 ) < alpha &&
6345 ABS( p1 - p0 ) < beta &&
6346 ABS( q1 - q0 ) < beta ) {
6350 if( ABS( p2 - p0 ) < beta ) {
6351 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6354 if( ABS( q2 - q0 ) < beta ) {
6355 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6359 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6360 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6361 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6362 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6365 /* 4px edge length */
6366 const int p0 = pix[-1];
6367 const int p1 = pix[-2];
6368 const int p2 = pix[-3];
6370 const int q0 = pix[0];
6371 const int q1 = pix[1];
6372 const int q2 = pix[2];
6374 if( ABS( p0 - q0 ) < alpha &&
6375 ABS( p1 - p0 ) < beta &&
6376 ABS( q1 - q0 ) < beta ) {
6378 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6379 if( ABS( p2 - p0 ) < beta)
6381 const int p3 = pix[-4];
6383 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6384 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6385 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6388 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6390 if( ABS( q2 - q0 ) < beta)
6392 const int q3 = pix[3];
6394 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6395 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6396 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6399 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6403 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6404 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6406 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6411 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp[2] ) {
6413 for( i = 0; i < 8; i++, pix += stride) {
6421 if( bS[bS_index] == 0 ) {
6425 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6426 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6427 alpha = alpha_table[index_a];
6428 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6429 if( bS[bS_index] < 4 ) {
6430 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6431 /* 2px edge length (because we use same bS than the one for luma) */
6432 const int p0 = pix[-1];
6433 const int p1 = pix[-2];
6434 const int q0 = pix[0];
6435 const int q1 = pix[1];
6437 if( ABS( p0 - q0 ) < alpha &&
6438 ABS( p1 - p0 ) < beta &&
6439 ABS( q1 - q0 ) < beta ) {
6440 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6442 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6443 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6444 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6447 const int p0 = pix[-1];
6448 const int p1 = pix[-2];
6449 const int q0 = pix[0];
6450 const int q1 = pix[1];
6452 if( ABS( p0 - q0 ) < alpha &&
6453 ABS( p1 - p0 ) < beta &&
6454 ABS( q1 - q0 ) < beta ) {
6456 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6457 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6458 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6464 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6466 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6467 const int alpha = alpha_table[index_a];
6468 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6469 const int pix_next = stride;
6474 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6475 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6477 /* 16px edge length, see filter_mb_edgev */
6478 for( d = 0; d < 16; d++ ) {
6479 const int p0 = pix[-1*pix_next];
6480 const int p1 = pix[-2*pix_next];
6481 const int p2 = pix[-3*pix_next];
6482 const int q0 = pix[0];
6483 const int q1 = pix[1*pix_next];
6484 const int q2 = pix[2*pix_next];
6486 if( ABS( p0 - q0 ) < alpha &&
6487 ABS( p1 - p0 ) < beta &&
6488 ABS( q1 - q0 ) < beta ) {
6490 const int p3 = pix[-4*pix_next];
6491 const int q3 = pix[ 3*pix_next];
6493 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6494 if( ABS( p2 - p0 ) < beta) {
6496 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6497 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6498 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6501 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6503 if( ABS( q2 - q0 ) < beta) {
6505 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6506 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6507 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6510 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6514 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6515 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6517 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6524 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6526 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6527 const int alpha = alpha_table[index_a];
6528 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6533 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6534 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6536 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6540 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6541 MpegEncContext * const s = &h->s;
6542 const int mb_xy= mb_x + mb_y*s->mb_stride;
6543 int first_vertical_edge_done = 0;
6545 /* FIXME: A given frame may occupy more than one position in
6546 * the reference list. So ref2frm should be populated with
6547 * frame numbers, not indices. */
6548 static const int ref2frm[18] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6551 // left mb is in picture
6552 && h->slice_table[mb_xy-1] != 255
6553 // and current and left pair do not have the same interlaced type
6554 && (IS_INTERLACED(s->current_picture.mb_type[mb_xy]) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6555 // and left mb is in the same slice if deblocking_filter == 2
6556 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6557 /* First vertical edge is different in MBAFF frames
6558 * There are 8 different bS to compute and 2 different Qp
6565 first_vertical_edge_done = 1;
6566 for( i = 0; i < 8; i++ ) {
6568 int b_idx= 8 + 4 + 8*y;
6569 int bn_idx= b_idx - 1;
6571 int mbn_xy = h->mb_field_decoding_flag ? h->left_mb_xy[i>>2] : h->left_mb_xy[i&1];
6573 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6574 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6576 } else if( h->non_zero_count_cache[b_idx] != 0 ||
6577 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6578 h->non_zero_count_cache[bn_idx] != 0 ) {
6583 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6584 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6585 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6586 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6593 if(bS[0]+bS[1]+bS[2]+bS[3] != 0) {
6594 // Do not use s->qscale as luma quantizer because it has not the same
6595 // value in IPCM macroblocks.
6596 qp[0] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[0]] + 1 ) >> 1;
6597 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6598 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[0]] ) + 1 ) >> 1;
6599 qp[1] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[1]] + 1 ) >> 1;
6600 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6601 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[1]] ) + 1 ) >> 1;
6604 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
6605 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6606 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6607 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
6608 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
6611 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6612 for( dir = 0; dir < 2; dir++ )
6615 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6616 const int mb_type = s->current_picture.mb_type[mb_xy];
6617 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6618 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6620 const int edges = ((mb_type & mbm_type) & (MB_TYPE_16x16|MB_TYPE_SKIP))
6621 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6622 // how often to recheck mv-based bS when iterating between edges
6623 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6624 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6625 // how often to recheck mv-based bS when iterating along each edge
6626 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6628 if (first_vertical_edge_done) {
6630 first_vertical_edge_done = 0;
6633 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6637 for( edge = start; edge < edges; edge++ ) {
6638 /* mbn_xy: neighbor macroblock */
6639 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6640 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6644 if( (edge&1) && IS_8x8DCT(mb_type) )
6647 if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0)
6648 && !IS_INTERLACED(mb_type)
6649 && IS_INTERLACED(mbn_type)
6651 // This is a special case in the norm where the filtering must
6652 // be done twice (one each of the field) even if we are in a
6653 // frame macroblock.
6655 unsigned int tmp_linesize = 2 * linesize;
6656 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6657 int mbn_xy = mb_xy - 2 * s->mb_stride;
6661 if( IS_INTRA(mb_type) ||
6662 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6663 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6666 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6669 // Do not use s->qscale as luma quantizer because it has not the same
6670 // value in IPCM macroblocks.
6671 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6672 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6673 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6674 filter_mb_edgeh( h, &img_y[0], tmp_linesize, bS, qp );
6675 chroma_qp = ( h->chroma_qp +
6676 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6677 filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp );
6678 filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp );
6681 mbn_xy += s->mb_stride;
6682 if( IS_INTRA(mb_type) ||
6683 IS_INTRA(mbn_type) ) {
6684 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6687 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6690 // Do not use s->qscale as luma quantizer because it has not the same
6691 // value in IPCM macroblocks.
6692 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6693 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6694 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6695 filter_mb_edgeh( h, &img_y[linesize], tmp_linesize, bS, qp );
6696 chroma_qp = ( h->chroma_qp +
6697 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6698 filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6699 filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6702 if( IS_INTRA(mb_type) ||
6703 IS_INTRA(mbn_type) ) {
6706 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6707 || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6716 bS[0] = bS[1] = bS[2] = bS[3] = value;
6721 if( edge & mask_edge ) {
6722 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6725 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6726 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6727 int bn_idx= b_idx - (dir ? 8:1);
6729 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6730 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6731 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6732 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4;
6734 bS[0] = bS[1] = bS[2] = bS[3] = v;
6740 for( i = 0; i < 4; i++ ) {
6741 int x = dir == 0 ? edge : i;
6742 int y = dir == 0 ? i : edge;
6743 int b_idx= 8 + 4 + x + 8*y;
6744 int bn_idx= b_idx - (dir ? 8:1);
6746 if( h->non_zero_count_cache[b_idx] != 0 ||
6747 h->non_zero_count_cache[bn_idx] != 0 ) {
6753 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6754 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6755 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6756 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6764 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6769 // Do not use s->qscale as luma quantizer because it has not the same
6770 // value in IPCM macroblocks.
6771 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6772 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6773 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6774 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6776 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6777 if( (edge&1) == 0 ) {
6778 int chroma_qp = ( h->chroma_qp +
6779 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6780 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
6781 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
6784 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6785 if( (edge&1) == 0 ) {
6786 int chroma_qp = ( h->chroma_qp +
6787 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6788 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6789 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6796 static int decode_slice(H264Context *h){
6797 MpegEncContext * const s = &h->s;
6798 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6802 if( h->pps.cabac ) {
6806 align_get_bits( &s->gb );
6809 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
6810 ff_init_cabac_decoder( &h->cabac,
6811 s->gb.buffer + get_bits_count(&s->gb)/8,
6812 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6813 /* calculate pre-state */
6814 for( i= 0; i < 460; i++ ) {
6816 if( h->slice_type == I_TYPE )
6817 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6819 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6822 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6824 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6828 int ret = decode_mb_cabac(h);
6831 if(ret>=0) hl_decode_mb(h);
6833 /* XXX: useless as decode_mb_cabac it doesn't support that ... */
6834 if( ret >= 0 && h->mb_aff_frame ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6837 if(ret>=0) ret = decode_mb_cabac(h);
6839 if(ret>=0) hl_decode_mb(h);
6842 eos = get_cabac_terminate( &h->cabac );
6844 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
6845 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6846 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6850 if( ++s->mb_x >= s->mb_width ) {
6852 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6854 if(h->mb_aff_frame) {
6859 if( eos || s->mb_y >= s->mb_height ) {
6860 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6861 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6868 int ret = decode_mb_cavlc(h);
6870 if(ret>=0) hl_decode_mb(h);
6872 if(ret>=0 && h->mb_aff_frame){ //FIXME optimal? or let mb_decode decode 16x32 ?
6874 ret = decode_mb_cavlc(h);
6876 if(ret>=0) hl_decode_mb(h);
6881 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6882 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6887 if(++s->mb_x >= s->mb_width){
6889 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6891 if(h->mb_aff_frame) {
6894 if(s->mb_y >= s->mb_height){
6895 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6897 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6898 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6902 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6909 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6910 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6911 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6912 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6916 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6925 for(;s->mb_y < s->mb_height; s->mb_y++){
6926 for(;s->mb_x < s->mb_width; s->mb_x++){
6927 int ret= decode_mb(h);
6932 fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6933 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6938 if(++s->mb_x >= s->mb_width){
6940 if(++s->mb_y >= s->mb_height){
6941 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6942 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6946 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6953 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6954 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6955 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6959 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6966 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6969 return -1; //not reached
6972 static int decode_unregistered_user_data(H264Context *h, int size){
6973 MpegEncContext * const s = &h->s;
6974 uint8_t user_data[16+256];
6980 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6981 user_data[i]= get_bits(&s->gb, 8);
6985 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6986 if(e==1 && build>=0)
6987 h->x264_build= build;
6989 if(s->avctx->debug & FF_DEBUG_BUGS)
6990 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6993 skip_bits(&s->gb, 8);
6998 static int decode_sei(H264Context *h){
6999 MpegEncContext * const s = &h->s;
7001 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7006 type+= show_bits(&s->gb, 8);
7007 }while(get_bits(&s->gb, 8) == 255);
7011 size+= show_bits(&s->gb, 8);
7012 }while(get_bits(&s->gb, 8) == 255);
7016 if(decode_unregistered_user_data(h, size) < 0);
7020 skip_bits(&s->gb, 8*size);
7023 //FIXME check bits here
7024 align_get_bits(&s->gb);
7030 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7031 MpegEncContext * const s = &h->s;
7033 cpb_count = get_ue_golomb(&s->gb) + 1;
7034 get_bits(&s->gb, 4); /* bit_rate_scale */
7035 get_bits(&s->gb, 4); /* cpb_size_scale */
7036 for(i=0; i<cpb_count; i++){
7037 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7038 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7039 get_bits1(&s->gb); /* cbr_flag */
7041 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7042 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7043 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7044 get_bits(&s->gb, 5); /* time_offset_length */
7047 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7048 MpegEncContext * const s = &h->s;
7049 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7050 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7052 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7054 if( aspect_ratio_info_present_flag ) {
7055 aspect_ratio_idc= get_bits(&s->gb, 8);
7056 if( aspect_ratio_idc == EXTENDED_SAR ) {
7057 sps->sar.num= get_bits(&s->gb, 16);
7058 sps->sar.den= get_bits(&s->gb, 16);
7059 }else if(aspect_ratio_idc < 16){
7060 sps->sar= pixel_aspect[aspect_ratio_idc];
7062 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7069 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7071 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7072 get_bits1(&s->gb); /* overscan_appropriate_flag */
7075 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7076 get_bits(&s->gb, 3); /* video_format */
7077 get_bits1(&s->gb); /* video_full_range_flag */
7078 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7079 get_bits(&s->gb, 8); /* colour_primaries */
7080 get_bits(&s->gb, 8); /* transfer_characteristics */
7081 get_bits(&s->gb, 8); /* matrix_coefficients */
7085 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7086 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7087 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7090 sps->timing_info_present_flag = get_bits1(&s->gb);
7091 if(sps->timing_info_present_flag){
7092 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7093 sps->time_scale = get_bits_long(&s->gb, 32);
7094 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7097 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7098 if(nal_hrd_parameters_present_flag)
7099 decode_hrd_parameters(h, sps);
7100 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7101 if(vcl_hrd_parameters_present_flag)
7102 decode_hrd_parameters(h, sps);
7103 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7104 get_bits1(&s->gb); /* low_delay_hrd_flag */
7105 get_bits1(&s->gb); /* pic_struct_present_flag */
7107 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7108 if(sps->bitstream_restriction_flag){
7109 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7110 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7111 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7112 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7113 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7114 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7115 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7121 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7122 const uint8_t *jvt_list, const uint8_t *fallback_list){
7123 MpegEncContext * const s = &h->s;
7124 int i, last = 8, next = 8;
7125 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7126 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7127 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7129 for(i=0;i<size;i++){
7131 next = (last + get_se_golomb(&s->gb)) & 0xff;
7132 if(!i && !next){ /* matrix not written, we use the preset one */
7133 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7136 last = factors[scan[i]] = next ? next : last;
7140 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7141 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7142 MpegEncContext * const s = &h->s;
7143 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7144 const uint8_t *fallback[4] = {
7145 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7146 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7147 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7148 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7150 if(get_bits1(&s->gb)){
7151 sps->scaling_matrix_present |= is_sps;
7152 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7153 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7154 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7155 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7156 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7157 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7158 if(is_sps || pps->transform_8x8_mode){
7159 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7160 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7162 } else if(fallback_sps) {
7163 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7164 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7168 static inline int decode_seq_parameter_set(H264Context *h){
7169 MpegEncContext * const s = &h->s;
7170 int profile_idc, level_idc;
7174 profile_idc= get_bits(&s->gb, 8);
7175 get_bits1(&s->gb); //constraint_set0_flag
7176 get_bits1(&s->gb); //constraint_set1_flag
7177 get_bits1(&s->gb); //constraint_set2_flag
7178 get_bits1(&s->gb); //constraint_set3_flag
7179 get_bits(&s->gb, 4); // reserved
7180 level_idc= get_bits(&s->gb, 8);
7181 sps_id= get_ue_golomb(&s->gb);
7183 sps= &h->sps_buffer[ sps_id ];
7184 sps->profile_idc= profile_idc;
7185 sps->level_idc= level_idc;
7187 if(sps->profile_idc >= 100){ //high profile
7188 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7189 get_bits1(&s->gb); //residual_color_transform_flag
7190 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7191 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7192 sps->transform_bypass = get_bits1(&s->gb);
7193 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7195 sps->scaling_matrix_present = 0;
7197 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7198 sps->poc_type= get_ue_golomb(&s->gb);
7200 if(sps->poc_type == 0){ //FIXME #define
7201 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7202 } else if(sps->poc_type == 1){//FIXME #define
7203 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7204 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7205 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7206 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7208 for(i=0; i<sps->poc_cycle_length; i++)
7209 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7211 if(sps->poc_type > 2){
7212 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7216 sps->ref_frame_count= get_ue_golomb(&s->gb);
7217 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7218 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7220 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7221 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7222 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7223 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7224 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7227 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7228 if(!sps->frame_mbs_only_flag)
7229 sps->mb_aff= get_bits1(&s->gb);
7233 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7235 sps->crop= get_bits1(&s->gb);
7237 sps->crop_left = get_ue_golomb(&s->gb);
7238 sps->crop_right = get_ue_golomb(&s->gb);
7239 sps->crop_top = get_ue_golomb(&s->gb);
7240 sps->crop_bottom= get_ue_golomb(&s->gb);
7241 if(sps->crop_left || sps->crop_top){
7242 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7248 sps->crop_bottom= 0;
7251 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7252 if( sps->vui_parameters_present_flag )
7253 decode_vui_parameters(h, sps);
7255 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7256 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7257 sps_id, sps->profile_idc, sps->level_idc,
7259 sps->ref_frame_count,
7260 sps->mb_width, sps->mb_height,
7261 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7262 sps->direct_8x8_inference_flag ? "8B8" : "",
7263 sps->crop_left, sps->crop_right,
7264 sps->crop_top, sps->crop_bottom,
7265 sps->vui_parameters_present_flag ? "VUI" : ""
7271 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7272 MpegEncContext * const s = &h->s;
7273 int pps_id= get_ue_golomb(&s->gb);
7274 PPS *pps= &h->pps_buffer[pps_id];
7276 pps->sps_id= get_ue_golomb(&s->gb);
7277 pps->cabac= get_bits1(&s->gb);
7278 pps->pic_order_present= get_bits1(&s->gb);
7279 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7280 if(pps->slice_group_count > 1 ){
7281 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7282 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7283 switch(pps->mb_slice_group_map_type){
7286 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7287 | run_length[ i ] |1 |ue(v) |
7292 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7294 | top_left_mb[ i ] |1 |ue(v) |
7295 | bottom_right_mb[ i ] |1 |ue(v) |
7303 | slice_group_change_direction_flag |1 |u(1) |
7304 | slice_group_change_rate_minus1 |1 |ue(v) |
7309 | slice_group_id_cnt_minus1 |1 |ue(v) |
7310 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7312 | slice_group_id[ i ] |1 |u(v) |
7317 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7318 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7319 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7320 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7324 pps->weighted_pred= get_bits1(&s->gb);
7325 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7326 pps->init_qp= get_se_golomb(&s->gb) + 26;
7327 pps->init_qs= get_se_golomb(&s->gb) + 26;
7328 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7329 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7330 pps->constrained_intra_pred= get_bits1(&s->gb);
7331 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7333 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7334 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7336 if(get_bits_count(&s->gb) < bit_length){
7337 pps->transform_8x8_mode= get_bits1(&s->gb);
7338 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7339 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7342 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7343 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7344 pps_id, pps->sps_id,
7345 pps->cabac ? "CABAC" : "CAVLC",
7346 pps->slice_group_count,
7347 pps->ref_count[0], pps->ref_count[1],
7348 pps->weighted_pred ? "weighted" : "",
7349 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7350 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7351 pps->constrained_intra_pred ? "CONSTR" : "",
7352 pps->redundant_pic_cnt_present ? "REDU" : "",
7353 pps->transform_8x8_mode ? "8x8DCT" : ""
7361 * finds the end of the current frame in the bitstream.
7362 * @return the position of the first byte of the next frame, or -1
7364 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7367 ParseContext *pc = &(h->s.parse_context);
7368 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7369 // mb_addr= pc->mb_addr - 1;
7371 for(i=0; i<=buf_size; i++){
7372 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7373 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7374 if(pc->frame_start_found){
7375 // If there isn't one more byte in the buffer
7376 // the test on first_mb_in_slice cannot be done yet
7377 // do it at next call.
7378 if (i >= buf_size) break;
7379 if (buf[i] & 0x80) {
7380 // first_mb_in_slice is 0, probably the first nal of a new
7382 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7384 pc->frame_start_found= 0;
7388 pc->frame_start_found = 1;
7390 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7391 if(pc->frame_start_found){
7393 pc->frame_start_found= 0;
7398 state= (state<<8) | buf[i];
7402 return END_NOT_FOUND;
7405 static int h264_parse(AVCodecParserContext *s,
7406 AVCodecContext *avctx,
7407 uint8_t **poutbuf, int *poutbuf_size,
7408 const uint8_t *buf, int buf_size)
7410 H264Context *h = s->priv_data;
7411 ParseContext *pc = &h->s.parse_context;
7414 next= find_frame_end(h, buf, buf_size);
7416 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
7422 *poutbuf = (uint8_t *)buf;
7423 *poutbuf_size = buf_size;
7427 static int h264_split(AVCodecContext *avctx,
7428 const uint8_t *buf, int buf_size)
7431 uint32_t state = -1;
7434 for(i=0; i<=buf_size; i++){
7435 if((state&0xFFFFFF1F) == 0x107)
7437 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7439 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
7441 while(i>4 && buf[i-5]==0) i--;
7446 state= (state<<8) | buf[i];
7452 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7453 MpegEncContext * const s = &h->s;
7454 AVCodecContext * const avctx= s->avctx;
7458 for(i=0; i<50; i++){
7459 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7463 s->current_picture_ptr= NULL;
7472 if(buf_index >= buf_size) break;
7474 for(i = 0; i < h->nal_length_size; i++)
7475 nalsize = (nalsize << 8) | buf[buf_index++];
7477 // start code prefix search
7478 for(; buf_index + 3 < buf_size; buf_index++){
7479 // this should allways succeed in the first iteration
7480 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7484 if(buf_index+3 >= buf_size) break;
7489 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7490 if(ptr[dst_length - 1] == 0) dst_length--;
7491 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
7493 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7494 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7497 if (h->is_avc && (nalsize != consumed))
7498 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7500 buf_index += consumed;
7502 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
7503 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7506 switch(h->nal_unit_type){
7508 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7510 init_get_bits(&s->gb, ptr, bit_length);
7512 h->inter_gb_ptr= &s->gb;
7513 s->data_partitioning = 0;
7515 if(decode_slice_header(h) < 0){
7516 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7519 if(h->redundant_pic_count==0 && s->hurry_up < 5
7520 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7521 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7522 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7523 && avctx->skip_frame < AVDISCARD_ALL)
7527 init_get_bits(&s->gb, ptr, bit_length);
7529 h->inter_gb_ptr= NULL;
7530 s->data_partitioning = 1;
7532 if(decode_slice_header(h) < 0){
7533 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7537 init_get_bits(&h->intra_gb, ptr, bit_length);
7538 h->intra_gb_ptr= &h->intra_gb;
7541 init_get_bits(&h->inter_gb, ptr, bit_length);
7542 h->inter_gb_ptr= &h->inter_gb;
7544 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
7546 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7547 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7548 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7549 && avctx->skip_frame < AVDISCARD_ALL)
7553 init_get_bits(&s->gb, ptr, bit_length);
7557 init_get_bits(&s->gb, ptr, bit_length);
7558 decode_seq_parameter_set(h);
7560 if(s->flags& CODEC_FLAG_LOW_DELAY)
7563 if(avctx->has_b_frames < 2)
7564 avctx->has_b_frames= !s->low_delay;
7567 init_get_bits(&s->gb, ptr, bit_length);
7569 decode_picture_parameter_set(h, bit_length);
7573 case NAL_END_SEQUENCE:
7574 case NAL_END_STREAM:
7575 case NAL_FILLER_DATA:
7577 case NAL_AUXILIARY_SLICE:
7580 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
7584 if(!s->current_picture_ptr) return buf_index; //no frame
7586 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7587 s->current_picture_ptr->pict_type= s->pict_type;
7588 s->current_picture_ptr->key_frame= s->pict_type == I_TYPE && h->nal_unit_type == NAL_IDR_SLICE;
7590 h->prev_frame_num_offset= h->frame_num_offset;
7591 h->prev_frame_num= h->frame_num;
7592 if(s->current_picture_ptr->reference){
7593 h->prev_poc_msb= h->poc_msb;
7594 h->prev_poc_lsb= h->poc_lsb;
7596 if(s->current_picture_ptr->reference)
7597 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7607 * returns the number of bytes consumed for building the current frame
7609 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7610 if(s->flags&CODEC_FLAG_TRUNCATED){
7611 pos -= s->parse_context.last_index;
7612 if(pos<0) pos=0; // FIXME remove (unneeded?)
7616 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
7617 if(pos+10>buf_size) pos=buf_size; // oops ;)
7623 static int decode_frame(AVCodecContext *avctx,
7624 void *data, int *data_size,
7625 uint8_t *buf, int buf_size)
7627 H264Context *h = avctx->priv_data;
7628 MpegEncContext *s = &h->s;
7629 AVFrame *pict = data;
7632 s->flags= avctx->flags;
7633 s->flags2= avctx->flags2;
7635 /* no supplementary picture */
7636 if (buf_size == 0) {
7640 if(s->flags&CODEC_FLAG_TRUNCATED){
7641 int next= find_frame_end(h, buf, buf_size);
7643 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7645 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7648 if(h->is_avc && !h->got_avcC) {
7649 int i, cnt, nalsize;
7650 unsigned char *p = avctx->extradata;
7651 if(avctx->extradata_size < 7) {
7652 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7656 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7659 /* sps and pps in the avcC always have length coded with 2 bytes,
7660 so put a fake nal_length_size = 2 while parsing them */
7661 h->nal_length_size = 2;
7662 // Decode sps from avcC
7663 cnt = *(p+5) & 0x1f; // Number of sps
7665 for (i = 0; i < cnt; i++) {
7666 nalsize = BE_16(p) + 2;
7667 if(decode_nal_units(h, p, nalsize) < 0) {
7668 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7673 // Decode pps from avcC
7674 cnt = *(p++); // Number of pps
7675 for (i = 0; i < cnt; i++) {
7676 nalsize = BE_16(p) + 2;
7677 if(decode_nal_units(h, p, nalsize) != nalsize) {
7678 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7683 // Now store right nal length size, that will be use to parse all other nals
7684 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7685 // Do not reparse avcC
7689 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
7690 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7694 buf_index=decode_nal_units(h, buf, buf_size);
7698 //FIXME do something with unavailable reference frames
7700 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
7701 if(!s->current_picture_ptr){
7702 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
7707 Picture *out = s->current_picture_ptr;
7708 #if 0 //decode order
7709 *data_size = sizeof(AVFrame);
7711 /* Sort B-frames into display order */
7712 Picture *cur = s->current_picture_ptr;
7713 Picture *prev = h->delayed_output_pic;
7718 int dropped_frame = 0;
7721 if(h->sps.bitstream_restriction_flag
7722 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7723 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7727 while(h->delayed_pic[pics]) pics++;
7728 h->delayed_pic[pics++] = cur;
7729 if(cur->reference == 0)
7732 for(i=0; h->delayed_pic[i]; i++)
7733 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7736 out = h->delayed_pic[0];
7737 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7738 if(h->delayed_pic[i]->poc < out->poc){
7739 out = h->delayed_pic[i];
7743 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7744 if(prev && pics <= s->avctx->has_b_frames)
7746 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7748 ((!cross_idr && prev && out->poc > prev->poc + 2)
7749 || cur->pict_type == B_TYPE)))
7752 s->avctx->has_b_frames++;
7755 else if(out_of_order)
7758 if(out_of_order || pics > s->avctx->has_b_frames){
7759 dropped_frame = (out != h->delayed_pic[out_idx]);
7760 for(i=out_idx; h->delayed_pic[i]; i++)
7761 h->delayed_pic[i] = h->delayed_pic[i+1];
7764 if(prev == out && !dropped_frame)
7767 *data_size = sizeof(AVFrame);
7768 if(prev && prev != out && prev->reference == 1)
7769 prev->reference = 0;
7770 h->delayed_output_pic = out;
7774 *pict= *(AVFrame*)out;
7776 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7779 assert(pict->data[0] || !*data_size);
7780 ff_print_debug_info(s, pict);
7781 //printf("out %d\n", (int)pict->data[0]);
7784 /* Return the Picture timestamp as the frame number */
7785 /* we substract 1 because it is added on utils.c */
7786 avctx->frame_number = s->picture_number - 1;
7788 return get_consumed_bytes(s, buf_index, buf_size);
7791 static inline void fill_mb_avail(H264Context *h){
7792 MpegEncContext * const s = &h->s;
7793 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7796 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7797 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7798 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7804 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7805 h->mb_avail[4]= 1; //FIXME move out
7806 h->mb_avail[5]= 0; //FIXME move out
7812 #define SIZE (COUNT*40)
7818 // int int_temp[10000];
7820 AVCodecContext avctx;
7822 dsputil_init(&dsp, &avctx);
7824 init_put_bits(&pb, temp, SIZE);
7825 printf("testing unsigned exp golomb\n");
7826 for(i=0; i<COUNT; i++){
7828 set_ue_golomb(&pb, i);
7829 STOP_TIMER("set_ue_golomb");
7831 flush_put_bits(&pb);
7833 init_get_bits(&gb, temp, 8*SIZE);
7834 for(i=0; i<COUNT; i++){
7837 s= show_bits(&gb, 24);
7840 j= get_ue_golomb(&gb);
7842 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7845 STOP_TIMER("get_ue_golomb");
7849 init_put_bits(&pb, temp, SIZE);
7850 printf("testing signed exp golomb\n");
7851 for(i=0; i<COUNT; i++){
7853 set_se_golomb(&pb, i - COUNT/2);
7854 STOP_TIMER("set_se_golomb");
7856 flush_put_bits(&pb);
7858 init_get_bits(&gb, temp, 8*SIZE);
7859 for(i=0; i<COUNT; i++){
7862 s= show_bits(&gb, 24);
7865 j= get_se_golomb(&gb);
7866 if(j != i - COUNT/2){
7867 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7870 STOP_TIMER("get_se_golomb");
7873 printf("testing 4x4 (I)DCT\n");
7876 uint8_t src[16], ref[16];
7877 uint64_t error= 0, max_error=0;
7879 for(i=0; i<COUNT; i++){
7881 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7882 for(j=0; j<16; j++){
7883 ref[j]= random()%255;
7884 src[j]= random()%255;
7887 h264_diff_dct_c(block, src, ref, 4);
7890 for(j=0; j<16; j++){
7891 // printf("%d ", block[j]);
7892 block[j]= block[j]*4;
7893 if(j&1) block[j]= (block[j]*4 + 2)/5;
7894 if(j&4) block[j]= (block[j]*4 + 2)/5;
7898 s->dsp.h264_idct_add(ref, block, 4);
7899 /* for(j=0; j<16; j++){
7900 printf("%d ", ref[j]);
7904 for(j=0; j<16; j++){
7905 int diff= ABS(src[j] - ref[j]);
7908 max_error= FFMAX(max_error, diff);
7911 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7913 printf("testing quantizer\n");
7914 for(qp=0; qp<52; qp++){
7916 src1_block[i]= src2_block[i]= random()%255;
7920 printf("Testing NAL layer\n");
7922 uint8_t bitstream[COUNT];
7923 uint8_t nal[COUNT*2];
7925 memset(&h, 0, sizeof(H264Context));
7927 for(i=0; i<COUNT; i++){
7935 for(j=0; j<COUNT; j++){
7936 bitstream[j]= (random() % 255) + 1;
7939 for(j=0; j<zeros; j++){
7940 int pos= random() % COUNT;
7941 while(bitstream[pos] == 0){
7950 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7952 printf("encoding failed\n");
7956 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7960 if(out_length != COUNT){
7961 printf("incorrect length %d %d\n", out_length, COUNT);
7965 if(consumed != nal_length){
7966 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7970 if(memcmp(bitstream, out, COUNT)){
7971 printf("missmatch\n");
7976 printf("Testing RBSP\n");
7984 static int decode_end(AVCodecContext *avctx)
7986 H264Context *h = avctx->priv_data;
7987 MpegEncContext *s = &h->s;
7989 av_freep(&h->rbsp_buffer);
7990 free_tables(h); //FIXME cleanup init stuff perhaps
7993 // memset(h, 0, sizeof(H264Context));
7999 AVCodec h264_decoder = {
8003 sizeof(H264Context),
8008 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8012 AVCodecParser h264_parser = {
8014 sizeof(H264Context),