2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
58 * Sequence parameter set
64 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
65 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
66 int poc_type; ///< pic_order_cnt_type
67 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
68 int delta_pic_order_always_zero_flag;
69 int offset_for_non_ref_pic;
70 int offset_for_top_to_bottom_field;
71 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
72 int ref_frame_count; ///< num_ref_frames
73 int gaps_in_frame_num_allowed_flag;
74 int mb_width; ///< frame_width_in_mbs_minus1 + 1
75 int mb_height; ///< frame_height_in_mbs_minus1 + 1
76 int frame_mbs_only_flag;
77 int mb_aff; ///<mb_adaptive_frame_field_flag
78 int direct_8x8_inference_flag;
79 int crop; ///< frame_cropping_flag
80 int crop_left; ///< frame_cropping_rect_left_offset
81 int crop_right; ///< frame_cropping_rect_right_offset
82 int crop_top; ///< frame_cropping_rect_top_offset
83 int crop_bottom; ///< frame_cropping_rect_bottom_offset
84 int vui_parameters_present_flag;
86 int timing_info_present_flag;
87 uint32_t num_units_in_tick;
89 int fixed_frame_rate_flag;
90 short offset_for_ref_frame[256]; //FIXME dyn aloc?
91 int bitstream_restriction_flag;
92 int num_reorder_frames;
93 int scaling_matrix_present;
94 uint8_t scaling_matrix4[6][16];
95 uint8_t scaling_matrix8[2][64];
99 * Picture parameter set
103 int cabac; ///< entropy_coding_mode_flag
104 int pic_order_present; ///< pic_order_present_flag
105 int slice_group_count; ///< num_slice_groups_minus1 + 1
106 int mb_slice_group_map_type;
107 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
108 int weighted_pred; ///< weighted_pred_flag
109 int weighted_bipred_idc;
110 int init_qp; ///< pic_init_qp_minus26 + 26
111 int init_qs; ///< pic_init_qs_minus26 + 26
112 int chroma_qp_index_offset;
113 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
114 int constrained_intra_pred; ///< constrained_intra_pred_flag
115 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
116 int transform_8x8_mode; ///< transform_8x8_mode_flag
117 uint8_t scaling_matrix4[6][16];
118 uint8_t scaling_matrix8[2][64];
122 * Memory management control operation opcode.
124 typedef enum MMCOOpcode{
135 * Memory management control operation.
146 typedef struct H264Context{
154 #define NAL_IDR_SLICE 5
159 #define NAL_END_SEQUENCE 10
160 #define NAL_END_STREAM 11
161 #define NAL_FILLER_DATA 12
162 #define NAL_SPS_EXT 13
163 #define NAL_AUXILIARY_SLICE 19
164 uint8_t *rbsp_buffer;
165 unsigned int rbsp_buffer_size;
168 * Used to parse AVC variant of h264
170 int is_avc; ///< this flag is != 0 if codec is avc1
171 int got_avcC; ///< flag used to parse avcC data only once
172 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
176 int prev_mb_skipped; //FIXME remove (IMHO not used)
179 int chroma_pred_mode;
180 int intra16x16_pred_mode;
185 int8_t intra4x4_pred_mode_cache[5*8];
186 int8_t (*intra4x4_pred_mode)[8];
187 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
188 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
189 void (*pred8x8 [4+3])(uint8_t *src, int stride);
190 void (*pred16x16[4+3])(uint8_t *src, int stride);
191 unsigned int topleft_samples_available;
192 unsigned int top_samples_available;
193 unsigned int topright_samples_available;
194 unsigned int left_samples_available;
195 uint8_t (*top_borders[2])[16+2*8];
196 uint8_t left_border[2*(17+2*9)];
199 * non zero coeff count cache.
200 * is 64 if not available.
202 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
203 uint8_t (*non_zero_count)[16];
206 * Motion vector cache.
208 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
209 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
210 #define LIST_NOT_USED -1 //FIXME rename?
211 #define PART_NOT_AVAILABLE -2
214 * is 1 if the specific list MV&references are set to 0,0,-2.
216 int mv_cache_clean[2];
219 * number of neighbors (top and/or left) that used 8x8 dct
221 int neighbor_transform_size;
224 * block_offset[ 0..23] for frame macroblocks
225 * block_offset[24..47] for field macroblocks
227 int block_offset[2*(16+8)];
229 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
231 int b_stride; //FIXME use s->b4_stride
237 int unknown_svq3_flag;
238 int next_slice_index;
240 SPS sps_buffer[MAX_SPS_COUNT];
241 SPS sps; ///< current sps
243 PPS pps_buffer[MAX_PPS_COUNT];
247 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
249 uint32_t dequant4_buffer[6][52][16];
250 uint32_t dequant8_buffer[2][52][64];
251 uint32_t (*dequant4_coeff[6])[16];
252 uint32_t (*dequant8_coeff[2])[64];
253 int dequant_coeff_pps; ///< reinit tables when pps changes
256 uint8_t *slice_table_base;
257 uint8_t *slice_table; ///< slice_table_base + mb_stride + 1
259 int slice_type_fixed;
261 //interlacing specific flags
263 int mb_field_decoding_flag;
270 int delta_poc_bottom;
273 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
274 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
275 int frame_num_offset; ///< for POC type 2
276 int prev_frame_num_offset; ///< for POC type 2
277 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
280 * frame_num for frames or 2*frame_num for field pics.
285 * max_frame_num or 2*max_frame_num for field pics.
289 //Weighted pred stuff
291 int use_weight_chroma;
292 int luma_log2_weight_denom;
293 int chroma_log2_weight_denom;
294 int luma_weight[2][16];
295 int luma_offset[2][16];
296 int chroma_weight[2][16][2];
297 int chroma_offset[2][16][2];
298 int implicit_weight[16][16];
301 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
302 int slice_alpha_c0_offset;
303 int slice_beta_offset;
305 int redundant_pic_count;
307 int direct_spatial_mv_pred;
308 int dist_scale_factor[16];
309 int map_col_to_list0[2][16];
312 * num_ref_idx_l0/1_active_minus1 + 1
314 int ref_count[2];// FIXME split for AFF
315 Picture *short_ref[32];
316 Picture *long_ref[32];
317 Picture default_ref_list[2][32];
318 Picture ref_list[2][32]; //FIXME size?
319 Picture field_ref_list[2][32]; //FIXME size?
320 Picture *delayed_pic[16]; //FIXME size?
321 Picture *delayed_output_pic;
324 * memory management control operations buffer.
326 MMCO mmco[MAX_MMCO_COUNT];
329 int long_ref_count; ///< number of actual long term references
330 int short_ref_count; ///< number of actual short term references
333 GetBitContext intra_gb;
334 GetBitContext inter_gb;
335 GetBitContext *intra_gb_ptr;
336 GetBitContext *inter_gb_ptr;
338 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
344 uint8_t cabac_state[460];
347 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
351 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
352 uint8_t *chroma_pred_mode_table;
353 int last_qscale_diff;
354 int16_t (*mvd_table[2])[2];
355 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
356 uint8_t *direct_table;
357 uint8_t direct_cache[5*8];
359 uint8_t zigzag_scan[16];
360 uint8_t field_scan[16];
361 uint8_t zigzag_scan8x8[64];
362 uint8_t zigzag_scan8x8_cavlc[64];
363 const uint8_t *zigzag_scan_q0;
364 const uint8_t *field_scan_q0;
365 const uint8_t *zigzag_scan8x8_q0;
366 const uint8_t *zigzag_scan8x8_cavlc_q0;
371 static VLC coeff_token_vlc[4];
372 static VLC chroma_dc_coeff_token_vlc;
374 static VLC total_zeros_vlc[15];
375 static VLC chroma_dc_total_zeros_vlc[3];
377 static VLC run_vlc[6];
380 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
381 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
382 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
384 static always_inline uint32_t pack16to32(int a, int b){
385 #ifdef WORDS_BIGENDIAN
386 return (b&0xFFFF) + (a<<16);
388 return (a&0xFFFF) + (b<<16);
394 * @param h height of the rectangle, should be a constant
395 * @param w width of the rectangle, should be a constant
396 * @param size the size of val (1 or 4), should be a constant
398 static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
399 uint8_t *p= (uint8_t*)vp;
400 assert(size==1 || size==4);
405 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
406 assert((stride&(w-1))==0);
407 //FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it
410 *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
411 }else if(w==2 && h==4){
412 *(uint16_t*)(p + 0*stride)=
413 *(uint16_t*)(p + 1*stride)=
414 *(uint16_t*)(p + 2*stride)=
415 *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
416 }else if(w==4 && h==1){
417 *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
418 }else if(w==4 && h==2){
419 *(uint32_t*)(p + 0*stride)=
420 *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
421 }else if(w==4 && h==4){
422 *(uint32_t*)(p + 0*stride)=
423 *(uint32_t*)(p + 1*stride)=
424 *(uint32_t*)(p + 2*stride)=
425 *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
426 }else if(w==8 && h==1){
428 *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
429 }else if(w==8 && h==2){
430 *(uint32_t*)(p + 0 + 0*stride)=
431 *(uint32_t*)(p + 4 + 0*stride)=
432 *(uint32_t*)(p + 0 + 1*stride)=
433 *(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101;
434 }else if(w==8 && h==4){
435 *(uint64_t*)(p + 0*stride)=
436 *(uint64_t*)(p + 1*stride)=
437 *(uint64_t*)(p + 2*stride)=
438 *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
439 }else if(w==16 && h==2){
440 *(uint64_t*)(p + 0+0*stride)=
441 *(uint64_t*)(p + 8+0*stride)=
442 *(uint64_t*)(p + 0+1*stride)=
443 *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
444 }else if(w==16 && h==4){
445 *(uint64_t*)(p + 0+0*stride)=
446 *(uint64_t*)(p + 8+0*stride)=
447 *(uint64_t*)(p + 0+1*stride)=
448 *(uint64_t*)(p + 8+1*stride)=
449 *(uint64_t*)(p + 0+2*stride)=
450 *(uint64_t*)(p + 8+2*stride)=
451 *(uint64_t*)(p + 0+3*stride)=
452 *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
457 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
458 MpegEncContext * const s = &h->s;
459 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
460 int topleft_xy, top_xy, topright_xy, left_xy[2];
461 int topleft_type, top_type, topright_type, left_type[2];
465 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
466 // the actual condition is whether we're on the edge of a slice,
467 // and even then the intra and nnz parts are unnecessary.
468 if(for_deblock && h->slice_num == 1)
471 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
473 top_xy = mb_xy - s->mb_stride;
474 topleft_xy = top_xy - 1;
475 topright_xy= top_xy + 1;
476 left_xy[1] = left_xy[0] = mb_xy-1;
486 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
487 const int top_pair_xy = pair_xy - s->mb_stride;
488 const int topleft_pair_xy = top_pair_xy - 1;
489 const int topright_pair_xy = top_pair_xy + 1;
490 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
491 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
492 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
493 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
494 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
495 const int bottom = (s->mb_y & 1);
496 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
498 ? !curr_mb_frame_flag // bottom macroblock
499 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
501 top_xy -= s->mb_stride;
504 ? !curr_mb_frame_flag // bottom macroblock
505 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
507 topleft_xy -= s->mb_stride;
510 ? !curr_mb_frame_flag // bottom macroblock
511 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
513 topright_xy -= s->mb_stride;
515 if (left_mb_frame_flag != curr_mb_frame_flag) {
516 left_xy[1] = left_xy[0] = pair_xy - 1;
517 if (curr_mb_frame_flag) {
538 left_xy[1] += s->mb_stride;
551 h->top_mb_xy = top_xy;
552 h->left_mb_xy[0] = left_xy[0];
553 h->left_mb_xy[1] = left_xy[1];
555 topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
556 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
557 topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
558 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
559 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
561 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
562 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
563 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
564 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
565 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
568 if(IS_INTRA(mb_type)){
569 h->topleft_samples_available=
570 h->top_samples_available=
571 h->left_samples_available= 0xFFFF;
572 h->topright_samples_available= 0xEEEA;
574 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
575 h->topleft_samples_available= 0xB3FF;
576 h->top_samples_available= 0x33FF;
577 h->topright_samples_available= 0x26EA;
580 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
581 h->topleft_samples_available&= 0xDF5F;
582 h->left_samples_available&= 0x5F5F;
586 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
587 h->topleft_samples_available&= 0x7FFF;
589 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
590 h->topright_samples_available&= 0xFBFF;
592 if(IS_INTRA4x4(mb_type)){
593 if(IS_INTRA4x4(top_type)){
594 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
595 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
596 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
597 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
600 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
605 h->intra4x4_pred_mode_cache[4+8*0]=
606 h->intra4x4_pred_mode_cache[5+8*0]=
607 h->intra4x4_pred_mode_cache[6+8*0]=
608 h->intra4x4_pred_mode_cache[7+8*0]= pred;
611 if(IS_INTRA4x4(left_type[i])){
612 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
613 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
616 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
621 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
622 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
637 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
639 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
640 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
641 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
642 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
644 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
645 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
647 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
648 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
651 h->non_zero_count_cache[4+8*0]=
652 h->non_zero_count_cache[5+8*0]=
653 h->non_zero_count_cache[6+8*0]=
654 h->non_zero_count_cache[7+8*0]=
656 h->non_zero_count_cache[1+8*0]=
657 h->non_zero_count_cache[2+8*0]=
659 h->non_zero_count_cache[1+8*3]=
660 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
664 for (i=0; i<2; i++) {
666 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
667 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
668 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
669 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
671 h->non_zero_count_cache[3+8*1 + 2*8*i]=
672 h->non_zero_count_cache[3+8*2 + 2*8*i]=
673 h->non_zero_count_cache[0+8*1 + 8*i]=
674 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
681 h->top_cbp = h->cbp_table[top_xy];
682 } else if(IS_INTRA(mb_type)) {
689 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
690 } else if(IS_INTRA(mb_type)) {
696 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
699 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
704 //FIXME direct mb can skip much of this
705 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
707 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
708 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
709 /*if(!h->mv_cache_clean[list]){
710 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
711 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
712 h->mv_cache_clean[list]= 1;
716 h->mv_cache_clean[list]= 0;
718 if(IS_INTER(top_type)){
719 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
720 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
721 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
722 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
723 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
724 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
725 h->ref_cache[list][scan8[0] + 0 - 1*8]=
726 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
727 h->ref_cache[list][scan8[0] + 2 - 1*8]=
728 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
730 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
731 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
732 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
733 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
734 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
737 //FIXME unify cleanup or sth
738 if(IS_INTER(left_type[0])){
739 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
740 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
741 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
742 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
743 h->ref_cache[list][scan8[0] - 1 + 0*8]=
744 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
746 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
747 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
748 h->ref_cache[list][scan8[0] - 1 + 0*8]=
749 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
752 if(IS_INTER(left_type[1])){
753 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
754 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
755 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
756 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
757 h->ref_cache[list][scan8[0] - 1 + 2*8]=
758 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
760 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
761 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
762 h->ref_cache[list][scan8[0] - 1 + 2*8]=
763 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
764 assert((!left_type[0]) == (!left_type[1]));
767 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
770 if(IS_INTER(topleft_type)){
771 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
772 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
773 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
774 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
776 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
777 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
780 if(IS_INTER(topright_type)){
781 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
782 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
783 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
784 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
786 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
787 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
791 h->ref_cache[list][scan8[5 ]+1] =
792 h->ref_cache[list][scan8[7 ]+1] =
793 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
794 h->ref_cache[list][scan8[4 ]] =
795 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
796 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
797 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
798 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
799 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
800 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
803 /* XXX beurk, Load mvd */
804 if(IS_INTER(topleft_type)){
805 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
806 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy];
808 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0;
811 if(IS_INTER(top_type)){
812 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
813 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
814 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
815 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
816 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
818 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
819 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
820 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
821 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
823 if(IS_INTER(left_type[0])){
824 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
825 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
826 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
828 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
829 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
831 if(IS_INTER(left_type[1])){
832 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
833 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
834 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
836 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
837 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
839 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
840 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
841 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
842 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
843 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
845 if(h->slice_type == B_TYPE){
846 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
848 if(IS_DIRECT(top_type)){
849 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
850 }else if(IS_8X8(top_type)){
851 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
852 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
853 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
855 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
859 if(IS_DIRECT(left_type[0])){
860 h->direct_cache[scan8[0] - 1 + 0*8]=
861 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
862 }else if(IS_8X8(left_type[0])){
863 int b8_xy = h->mb2b8_xy[left_xy[0]] + 1;
864 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[b8_xy];
865 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[b8_xy + h->b8_stride];
867 h->direct_cache[scan8[0] - 1 + 0*8]=
868 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
876 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
879 static inline void write_back_intra_pred_mode(H264Context *h){
880 MpegEncContext * const s = &h->s;
881 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
883 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
884 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
885 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
886 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
887 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
888 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
889 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
893 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
895 static inline int check_intra4x4_pred_mode(H264Context *h){
896 MpegEncContext * const s = &h->s;
897 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
898 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
901 if(!(h->top_samples_available&0x8000)){
903 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
905 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
908 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
913 if(!(h->left_samples_available&0x8000)){
915 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
917 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
920 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
926 } //FIXME cleanup like next
929 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
931 static inline int check_intra_pred_mode(H264Context *h, int mode){
932 MpegEncContext * const s = &h->s;
933 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
934 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
936 if(mode < 0 || mode > 6) {
937 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
941 if(!(h->top_samples_available&0x8000)){
944 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
949 if(!(h->left_samples_available&0x8000)){
952 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
961 * gets the predicted intra4x4 prediction mode.
963 static inline int pred_intra_mode(H264Context *h, int n){
964 const int index8= scan8[n];
965 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
966 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
967 const int min= FFMIN(left, top);
969 tprintf("mode:%d %d min:%d\n", left ,top, min);
971 if(min<0) return DC_PRED;
975 static inline void write_back_non_zero_count(H264Context *h){
976 MpegEncContext * const s = &h->s;
977 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
979 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
980 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
981 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
982 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
983 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
984 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
985 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
987 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
988 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
989 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
991 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
992 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
993 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
997 * gets the predicted number of non zero coefficients.
998 * @param n block index
1000 static inline int pred_non_zero_count(H264Context *h, int n){
1001 const int index8= scan8[n];
1002 const int left= h->non_zero_count_cache[index8 - 1];
1003 const int top = h->non_zero_count_cache[index8 - 8];
1006 if(i<64) i= (i+1)>>1;
1008 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1013 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1014 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1016 if(topright_ref != PART_NOT_AVAILABLE){
1017 *C= h->mv_cache[list][ i - 8 + part_width ];
1018 return topright_ref;
1020 tprintf("topright MV not available\n");
1022 *C= h->mv_cache[list][ i - 8 - 1 ];
1023 return h->ref_cache[list][ i - 8 - 1 ];
1028 * gets the predicted MV.
1029 * @param n the block index
1030 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1031 * @param mx the x component of the predicted motion vector
1032 * @param my the y component of the predicted motion vector
1034 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1035 const int index8= scan8[n];
1036 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1037 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1038 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1039 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1041 int diagonal_ref, match_count;
1043 assert(part_width==1 || part_width==2 || part_width==4);
1053 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1054 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1055 tprintf("pred_motion match_count=%d\n", match_count);
1056 if(match_count > 1){ //most common
1057 *mx= mid_pred(A[0], B[0], C[0]);
1058 *my= mid_pred(A[1], B[1], C[1]);
1059 }else if(match_count==1){
1063 }else if(top_ref==ref){
1071 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1075 *mx= mid_pred(A[0], B[0], C[0]);
1076 *my= mid_pred(A[1], B[1], C[1]);
1080 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1084 * gets the directionally predicted 16x8 MV.
1085 * @param n the block index
1086 * @param mx the x component of the predicted motion vector
1087 * @param my the y component of the predicted motion vector
1089 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1091 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1092 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1094 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1102 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1103 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1105 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1107 if(left_ref == ref){
1115 pred_motion(h, n, 4, list, ref, mx, my);
1119 * gets the directionally predicted 8x16 MV.
1120 * @param n the block index
1121 * @param mx the x component of the predicted motion vector
1122 * @param my the y component of the predicted motion vector
1124 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1126 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1127 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1129 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1131 if(left_ref == ref){
1140 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1142 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1144 if(diagonal_ref == ref){
1152 pred_motion(h, n, 2, list, ref, mx, my);
1155 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1156 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1157 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1159 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1161 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1162 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1163 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1169 pred_motion(h, 0, 4, 0, 0, mx, my);
1174 static inline void direct_dist_scale_factor(H264Context * const h){
1175 const int poc = h->s.current_picture_ptr->poc;
1176 const int poc1 = h->ref_list[1][0].poc;
1178 for(i=0; i<h->ref_count[0]; i++){
1179 int poc0 = h->ref_list[0][i].poc;
1180 int td = clip(poc1 - poc0, -128, 127);
1181 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1182 h->dist_scale_factor[i] = 256;
1184 int tb = clip(poc - poc0, -128, 127);
1185 int tx = (16384 + (ABS(td) >> 1)) / td;
1186 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1190 static inline void direct_ref_list_init(H264Context * const h){
1191 MpegEncContext * const s = &h->s;
1192 Picture * const ref1 = &h->ref_list[1][0];
1193 Picture * const cur = s->current_picture_ptr;
1195 if(cur->pict_type == I_TYPE)
1196 cur->ref_count[0] = 0;
1197 if(cur->pict_type != B_TYPE)
1198 cur->ref_count[1] = 0;
1199 for(list=0; list<2; list++){
1200 cur->ref_count[list] = h->ref_count[list];
1201 for(j=0; j<h->ref_count[list]; j++)
1202 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1204 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1206 for(list=0; list<2; list++){
1207 for(i=0; i<ref1->ref_count[list]; i++){
1208 const int poc = ref1->ref_poc[list][i];
1209 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1210 for(j=0; j<h->ref_count[list]; j++)
1211 if(h->ref_list[list][j].poc == poc){
1212 h->map_col_to_list0[list][i] = j;
1219 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1220 MpegEncContext * const s = &h->s;
1221 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1222 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1223 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1224 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1225 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1226 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1227 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1228 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1229 const int is_b8x8 = IS_8X8(*mb_type);
1233 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1234 /* FIXME save sub mb types from previous frames (or derive from MVs)
1235 * so we know exactly what block size to use */
1236 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1237 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1238 }else if(!is_b8x8 && (IS_16X16(mb_type_col) || IS_INTRA(mb_type_col))){
1239 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1240 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1242 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1243 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1246 *mb_type |= MB_TYPE_DIRECT2;
1248 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1250 if(h->direct_spatial_mv_pred){
1255 /* ref = min(neighbors) */
1256 for(list=0; list<2; list++){
1257 int refa = h->ref_cache[list][scan8[0] - 1];
1258 int refb = h->ref_cache[list][scan8[0] - 8];
1259 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1261 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1263 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1265 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1271 if(ref[0] < 0 && ref[1] < 0){
1272 ref[0] = ref[1] = 0;
1273 mv[0][0] = mv[0][1] =
1274 mv[1][0] = mv[1][1] = 0;
1276 for(list=0; list<2; list++){
1278 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1280 mv[list][0] = mv[list][1] = 0;
1285 *mb_type &= ~MB_TYPE_P0L1;
1286 sub_mb_type &= ~MB_TYPE_P0L1;
1287 }else if(ref[0] < 0){
1288 *mb_type &= ~MB_TYPE_P0L0;
1289 sub_mb_type &= ~MB_TYPE_P0L0;
1292 if(IS_16X16(*mb_type)){
1293 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1294 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1295 if(!IS_INTRA(mb_type_col)
1296 && ( (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1)
1297 || (l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1
1298 && (h->x264_build>33 || !h->x264_build)))){
1300 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1302 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1304 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1306 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1308 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1309 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1312 for(i8=0; i8<4; i8++){
1313 const int x8 = i8&1;
1314 const int y8 = i8>>1;
1316 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1318 h->sub_mb_type[i8] = sub_mb_type;
1320 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1321 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1322 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1323 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1326 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1327 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1328 && (h->x264_build>33 || !h->x264_build)))){
1329 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1330 if(IS_SUB_8X8(sub_mb_type)){
1331 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1332 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1334 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1336 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1339 for(i4=0; i4<4; i4++){
1340 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1341 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1343 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1345 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1351 }else{ /* direct temporal mv pred */
1352 if(IS_16X16(*mb_type)){
1353 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1354 if(IS_INTRA(mb_type_col)){
1355 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1356 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1357 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1359 const int ref0 = l1ref0[0] >= 0 ? h->map_col_to_list0[0][l1ref0[0]]
1360 : h->map_col_to_list0[1][l1ref1[0]];
1361 const int dist_scale_factor = h->dist_scale_factor[ref0];
1362 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1364 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1365 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1366 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1367 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1368 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1371 for(i8=0; i8<4; i8++){
1372 const int x8 = i8&1;
1373 const int y8 = i8>>1;
1374 int ref0, dist_scale_factor;
1375 const int16_t (*l1mv)[2]= l1mv0;
1377 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1379 h->sub_mb_type[i8] = sub_mb_type;
1380 if(IS_INTRA(mb_type_col)){
1381 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1382 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1383 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1384 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1388 ref0 = l1ref0[x8 + y8*h->b8_stride];
1390 ref0 = h->map_col_to_list0[0][ref0];
1392 ref0 = h->map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1395 dist_scale_factor = h->dist_scale_factor[ref0];
1397 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1398 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1399 if(IS_SUB_8X8(sub_mb_type)){
1400 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1401 int mx = (dist_scale_factor * mv_col[0] + 128) >> 8;
1402 int my = (dist_scale_factor * mv_col[1] + 128) >> 8;
1403 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1404 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1406 for(i4=0; i4<4; i4++){
1407 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1408 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1409 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1410 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1411 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1412 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1419 static inline void write_back_motion(H264Context *h, int mb_type){
1420 MpegEncContext * const s = &h->s;
1421 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1422 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1425 for(list=0; list<2; list++){
1427 if(!USES_LIST(mb_type, list)){
1428 if(1){ //FIXME skip or never read if mb_type doesn't use it
1430 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
1431 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
1433 if( h->pps.cabac ) {
1434 /* FIXME needed ? */
1436 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]=
1437 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= 0;
1441 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]=
1442 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= LIST_NOT_USED;
1449 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1450 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1452 if( h->pps.cabac ) {
1454 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1455 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1459 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
1460 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
1464 if(h->slice_type == B_TYPE && h->pps.cabac){
1465 if(IS_8X8(mb_type)){
1466 h->direct_table[b8_xy+1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1467 h->direct_table[b8_xy+0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1468 h->direct_table[b8_xy+1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1474 * Decodes a network abstraction layer unit.
1475 * @param consumed is the number of bytes used as input
1476 * @param length is the length of the array
1477 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1478 * @returns decoded bytes, might be src+1 if no escapes
1480 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1484 // src[0]&0x80; //forbidden bit
1485 h->nal_ref_idc= src[0]>>5;
1486 h->nal_unit_type= src[0]&0x1F;
1490 for(i=0; i<length; i++)
1491 printf("%2X ", src[i]);
1493 for(i=0; i+1<length; i+=2){
1494 if(src[i]) continue;
1495 if(i>0 && src[i-1]==0) i--;
1496 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1498 /* startcode, so we must be past the end */
1505 if(i>=length-1){ //no escaped 0
1506 *dst_length= length;
1507 *consumed= length+1; //+1 for the header
1511 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1512 dst= h->rbsp_buffer;
1514 //printf("decoding esc\n");
1517 //remove escapes (very rare 1:2^22)
1518 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1519 if(src[si+2]==3){ //escape
1524 }else //next start code
1528 dst[di++]= src[si++];
1532 *consumed= si + 1;//+1 for the header
1533 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1539 * @param src the data which should be escaped
1540 * @param dst the target buffer, dst+1 == src is allowed as a special case
1541 * @param length the length of the src data
1542 * @param dst_length the length of the dst array
1543 * @returns length of escaped data in bytes or -1 if an error occured
1545 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1546 int i, escape_count, si, di;
1550 assert(dst_length>0);
1552 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1554 if(length==0) return 1;
1557 for(i=0; i<length; i+=2){
1558 if(src[i]) continue;
1559 if(i>0 && src[i-1]==0)
1561 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1567 if(escape_count==0){
1569 memcpy(dst+1, src, length);
1573 if(length + escape_count + 1> dst_length)
1576 //this should be damn rare (hopefully)
1578 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1579 temp= h->rbsp_buffer;
1580 //printf("encoding esc\n");
1585 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1586 temp[di++]= 0; si++;
1587 temp[di++]= 0; si++;
1589 temp[di++]= src[si++];
1592 temp[di++]= src[si++];
1594 memcpy(dst+1, temp, length+escape_count);
1596 assert(di == length+escape_count);
1602 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1604 static void encode_rbsp_trailing(PutBitContext *pb){
1607 length= (-put_bits_count(pb))&7;
1608 if(length) put_bits(pb, length, 0);
1613 * identifies the exact end of the bitstream
1614 * @return the length of the trailing, or 0 if damaged
1616 static int decode_rbsp_trailing(uint8_t *src){
1620 tprintf("rbsp trailing %X\n", v);
1630 * idct tranforms the 16 dc values and dequantize them.
1631 * @param qp quantization parameter
1633 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1636 int temp[16]; //FIXME check if this is a good idea
1637 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1638 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1640 //memset(block, 64, 2*256);
1643 const int offset= y_offset[i];
1644 const int z0= block[offset+stride*0] + block[offset+stride*4];
1645 const int z1= block[offset+stride*0] - block[offset+stride*4];
1646 const int z2= block[offset+stride*1] - block[offset+stride*5];
1647 const int z3= block[offset+stride*1] + block[offset+stride*5];
1656 const int offset= x_offset[i];
1657 const int z0= temp[4*0+i] + temp[4*2+i];
1658 const int z1= temp[4*0+i] - temp[4*2+i];
1659 const int z2= temp[4*1+i] - temp[4*3+i];
1660 const int z3= temp[4*1+i] + temp[4*3+i];
1662 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1663 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1664 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1665 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1671 * dct tranforms the 16 dc values.
1672 * @param qp quantization parameter ??? FIXME
1674 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1675 // const int qmul= dequant_coeff[qp][0];
1677 int temp[16]; //FIXME check if this is a good idea
1678 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1679 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1682 const int offset= y_offset[i];
1683 const int z0= block[offset+stride*0] + block[offset+stride*4];
1684 const int z1= block[offset+stride*0] - block[offset+stride*4];
1685 const int z2= block[offset+stride*1] - block[offset+stride*5];
1686 const int z3= block[offset+stride*1] + block[offset+stride*5];
1695 const int offset= x_offset[i];
1696 const int z0= temp[4*0+i] + temp[4*2+i];
1697 const int z1= temp[4*0+i] - temp[4*2+i];
1698 const int z2= temp[4*1+i] - temp[4*3+i];
1699 const int z3= temp[4*1+i] + temp[4*3+i];
1701 block[stride*0 +offset]= (z0 + z3)>>1;
1702 block[stride*2 +offset]= (z1 + z2)>>1;
1703 block[stride*8 +offset]= (z1 - z2)>>1;
1704 block[stride*10+offset]= (z0 - z3)>>1;
1712 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1713 const int stride= 16*2;
1714 const int xStride= 16;
1717 a= block[stride*0 + xStride*0];
1718 b= block[stride*0 + xStride*1];
1719 c= block[stride*1 + xStride*0];
1720 d= block[stride*1 + xStride*1];
1727 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1728 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1729 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1730 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1734 static void chroma_dc_dct_c(DCTELEM *block){
1735 const int stride= 16*2;
1736 const int xStride= 16;
1739 a= block[stride*0 + xStride*0];
1740 b= block[stride*0 + xStride*1];
1741 c= block[stride*1 + xStride*0];
1742 d= block[stride*1 + xStride*1];
1749 block[stride*0 + xStride*0]= (a+c);
1750 block[stride*0 + xStride*1]= (e+b);
1751 block[stride*1 + xStride*0]= (a-c);
1752 block[stride*1 + xStride*1]= (e-b);
1757 * gets the chroma qp.
1759 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1761 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
1766 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1768 //FIXME try int temp instead of block
1771 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1772 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1773 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1774 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1775 const int z0= d0 + d3;
1776 const int z3= d0 - d3;
1777 const int z1= d1 + d2;
1778 const int z2= d1 - d2;
1780 block[0 + 4*i]= z0 + z1;
1781 block[1 + 4*i]= 2*z3 + z2;
1782 block[2 + 4*i]= z0 - z1;
1783 block[3 + 4*i]= z3 - 2*z2;
1787 const int z0= block[0*4 + i] + block[3*4 + i];
1788 const int z3= block[0*4 + i] - block[3*4 + i];
1789 const int z1= block[1*4 + i] + block[2*4 + i];
1790 const int z2= block[1*4 + i] - block[2*4 + i];
1792 block[0*4 + i]= z0 + z1;
1793 block[1*4 + i]= 2*z3 + z2;
1794 block[2*4 + i]= z0 - z1;
1795 block[3*4 + i]= z3 - 2*z2;
1800 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1801 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1802 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1804 const int * const quant_table= quant_coeff[qscale];
1805 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1806 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1807 const unsigned int threshold2= (threshold1<<1);
1813 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1814 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1815 const unsigned int dc_threshold2= (dc_threshold1<<1);
1817 int level= block[0]*quant_coeff[qscale+18][0];
1818 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1820 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1823 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1826 // last_non_zero = i;
1831 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1832 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1833 const unsigned int dc_threshold2= (dc_threshold1<<1);
1835 int level= block[0]*quant_table[0];
1836 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1838 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1841 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1844 // last_non_zero = i;
1857 const int j= scantable[i];
1858 int level= block[j]*quant_table[j];
1860 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1861 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1862 if(((unsigned)(level+threshold1))>threshold2){
1864 level= (bias + level)>>QUANT_SHIFT;
1867 level= (bias - level)>>QUANT_SHIFT;
1876 return last_non_zero;
1879 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1880 const uint32_t a= ((uint32_t*)(src-stride))[0];
1881 ((uint32_t*)(src+0*stride))[0]= a;
1882 ((uint32_t*)(src+1*stride))[0]= a;
1883 ((uint32_t*)(src+2*stride))[0]= a;
1884 ((uint32_t*)(src+3*stride))[0]= a;
1887 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1888 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1889 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1890 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1891 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1894 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1895 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1896 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1898 ((uint32_t*)(src+0*stride))[0]=
1899 ((uint32_t*)(src+1*stride))[0]=
1900 ((uint32_t*)(src+2*stride))[0]=
1901 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1904 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1905 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1907 ((uint32_t*)(src+0*stride))[0]=
1908 ((uint32_t*)(src+1*stride))[0]=
1909 ((uint32_t*)(src+2*stride))[0]=
1910 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1913 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1914 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1916 ((uint32_t*)(src+0*stride))[0]=
1917 ((uint32_t*)(src+1*stride))[0]=
1918 ((uint32_t*)(src+2*stride))[0]=
1919 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1922 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1923 ((uint32_t*)(src+0*stride))[0]=
1924 ((uint32_t*)(src+1*stride))[0]=
1925 ((uint32_t*)(src+2*stride))[0]=
1926 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1930 #define LOAD_TOP_RIGHT_EDGE\
1931 const int t4= topright[0];\
1932 const int t5= topright[1];\
1933 const int t6= topright[2];\
1934 const int t7= topright[3];\
1936 #define LOAD_LEFT_EDGE\
1937 const int l0= src[-1+0*stride];\
1938 const int l1= src[-1+1*stride];\
1939 const int l2= src[-1+2*stride];\
1940 const int l3= src[-1+3*stride];\
1942 #define LOAD_TOP_EDGE\
1943 const int t0= src[ 0-1*stride];\
1944 const int t1= src[ 1-1*stride];\
1945 const int t2= src[ 2-1*stride];\
1946 const int t3= src[ 3-1*stride];\
1948 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1949 const int lt= src[-1-1*stride];
1953 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
1955 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
1958 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
1962 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1965 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1967 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1968 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1971 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1976 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1978 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1981 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1985 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1988 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1990 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1991 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1994 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1995 const int lt= src[-1-1*stride];
1998 const __attribute__((unused)) int unu= l3;
2001 src[1+2*stride]=(lt + t0 + 1)>>1;
2003 src[2+2*stride]=(t0 + t1 + 1)>>1;
2005 src[3+2*stride]=(t1 + t2 + 1)>>1;
2006 src[3+0*stride]=(t2 + t3 + 1)>>1;
2008 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2010 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2012 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2013 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2014 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2015 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2018 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2021 const __attribute__((unused)) int unu= t7;
2023 src[0+0*stride]=(t0 + t1 + 1)>>1;
2025 src[0+2*stride]=(t1 + t2 + 1)>>1;
2027 src[1+2*stride]=(t2 + t3 + 1)>>1;
2029 src[2+2*stride]=(t3 + t4+ 1)>>1;
2030 src[3+2*stride]=(t4 + t5+ 1)>>1;
2031 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2033 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2035 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2037 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2038 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2041 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2044 src[0+0*stride]=(l0 + l1 + 1)>>1;
2045 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2047 src[0+1*stride]=(l1 + l2 + 1)>>1;
2049 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2051 src[0+2*stride]=(l2 + l3 + 1)>>1;
2053 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2062 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2063 const int lt= src[-1-1*stride];
2066 const __attribute__((unused)) int unu= t3;
2069 src[2+1*stride]=(lt + l0 + 1)>>1;
2071 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2072 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2073 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2075 src[2+2*stride]=(l0 + l1 + 1)>>1;
2077 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2079 src[2+3*stride]=(l1 + l2+ 1)>>1;
2081 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2082 src[0+3*stride]=(l2 + l3 + 1)>>1;
2083 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2086 static void pred16x16_vertical_c(uint8_t *src, int stride){
2088 const uint32_t a= ((uint32_t*)(src-stride))[0];
2089 const uint32_t b= ((uint32_t*)(src-stride))[1];
2090 const uint32_t c= ((uint32_t*)(src-stride))[2];
2091 const uint32_t d= ((uint32_t*)(src-stride))[3];
2093 for(i=0; i<16; i++){
2094 ((uint32_t*)(src+i*stride))[0]= a;
2095 ((uint32_t*)(src+i*stride))[1]= b;
2096 ((uint32_t*)(src+i*stride))[2]= c;
2097 ((uint32_t*)(src+i*stride))[3]= d;
2101 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2104 for(i=0; i<16; i++){
2105 ((uint32_t*)(src+i*stride))[0]=
2106 ((uint32_t*)(src+i*stride))[1]=
2107 ((uint32_t*)(src+i*stride))[2]=
2108 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2112 static void pred16x16_dc_c(uint8_t *src, int stride){
2116 dc+= src[-1+i*stride];
2123 dc= 0x01010101*((dc + 16)>>5);
2125 for(i=0; i<16; i++){
2126 ((uint32_t*)(src+i*stride))[0]=
2127 ((uint32_t*)(src+i*stride))[1]=
2128 ((uint32_t*)(src+i*stride))[2]=
2129 ((uint32_t*)(src+i*stride))[3]= dc;
2133 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2137 dc+= src[-1+i*stride];
2140 dc= 0x01010101*((dc + 8)>>4);
2142 for(i=0; i<16; i++){
2143 ((uint32_t*)(src+i*stride))[0]=
2144 ((uint32_t*)(src+i*stride))[1]=
2145 ((uint32_t*)(src+i*stride))[2]=
2146 ((uint32_t*)(src+i*stride))[3]= dc;
2150 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2156 dc= 0x01010101*((dc + 8)>>4);
2158 for(i=0; i<16; i++){
2159 ((uint32_t*)(src+i*stride))[0]=
2160 ((uint32_t*)(src+i*stride))[1]=
2161 ((uint32_t*)(src+i*stride))[2]=
2162 ((uint32_t*)(src+i*stride))[3]= dc;
2166 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2169 for(i=0; i<16; i++){
2170 ((uint32_t*)(src+i*stride))[0]=
2171 ((uint32_t*)(src+i*stride))[1]=
2172 ((uint32_t*)(src+i*stride))[2]=
2173 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2177 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2180 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2181 const uint8_t * const src0 = src+7-stride;
2182 const uint8_t *src1 = src+8*stride-1;
2183 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2184 int H = src0[1] - src0[-1];
2185 int V = src1[0] - src2[ 0];
2186 for(k=2; k<=8; ++k) {
2187 src1 += stride; src2 -= stride;
2188 H += k*(src0[k] - src0[-k]);
2189 V += k*(src1[0] - src2[ 0]);
2192 H = ( 5*(H/4) ) / 16;
2193 V = ( 5*(V/4) ) / 16;
2195 /* required for 100% accuracy */
2196 i = H; H = V; V = i;
2198 H = ( 5*H+32 ) >> 6;
2199 V = ( 5*V+32 ) >> 6;
2202 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2203 for(j=16; j>0; --j) {
2206 for(i=-16; i<0; i+=4) {
2207 src[16+i] = cm[ (b ) >> 5 ];
2208 src[17+i] = cm[ (b+ H) >> 5 ];
2209 src[18+i] = cm[ (b+2*H) >> 5 ];
2210 src[19+i] = cm[ (b+3*H) >> 5 ];
2217 static void pred16x16_plane_c(uint8_t *src, int stride){
2218 pred16x16_plane_compat_c(src, stride, 0);
2221 static void pred8x8_vertical_c(uint8_t *src, int stride){
2223 const uint32_t a= ((uint32_t*)(src-stride))[0];
2224 const uint32_t b= ((uint32_t*)(src-stride))[1];
2227 ((uint32_t*)(src+i*stride))[0]= a;
2228 ((uint32_t*)(src+i*stride))[1]= b;
2232 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2236 ((uint32_t*)(src+i*stride))[0]=
2237 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2241 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2245 ((uint32_t*)(src+i*stride))[0]=
2246 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2250 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2256 dc0+= src[-1+i*stride];
2257 dc2+= src[-1+(i+4)*stride];
2259 dc0= 0x01010101*((dc0 + 2)>>2);
2260 dc2= 0x01010101*((dc2 + 2)>>2);
2263 ((uint32_t*)(src+i*stride))[0]=
2264 ((uint32_t*)(src+i*stride))[1]= dc0;
2267 ((uint32_t*)(src+i*stride))[0]=
2268 ((uint32_t*)(src+i*stride))[1]= dc2;
2272 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2278 dc0+= src[i-stride];
2279 dc1+= src[4+i-stride];
2281 dc0= 0x01010101*((dc0 + 2)>>2);
2282 dc1= 0x01010101*((dc1 + 2)>>2);
2285 ((uint32_t*)(src+i*stride))[0]= dc0;
2286 ((uint32_t*)(src+i*stride))[1]= dc1;
2289 ((uint32_t*)(src+i*stride))[0]= dc0;
2290 ((uint32_t*)(src+i*stride))[1]= dc1;
2295 static void pred8x8_dc_c(uint8_t *src, int stride){
2297 int dc0, dc1, dc2, dc3;
2301 dc0+= src[-1+i*stride] + src[i-stride];
2302 dc1+= src[4+i-stride];
2303 dc2+= src[-1+(i+4)*stride];
2305 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2306 dc0= 0x01010101*((dc0 + 4)>>3);
2307 dc1= 0x01010101*((dc1 + 2)>>2);
2308 dc2= 0x01010101*((dc2 + 2)>>2);
2311 ((uint32_t*)(src+i*stride))[0]= dc0;
2312 ((uint32_t*)(src+i*stride))[1]= dc1;
2315 ((uint32_t*)(src+i*stride))[0]= dc2;
2316 ((uint32_t*)(src+i*stride))[1]= dc3;
2320 static void pred8x8_plane_c(uint8_t *src, int stride){
2323 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2324 const uint8_t * const src0 = src+3-stride;
2325 const uint8_t *src1 = src+4*stride-1;
2326 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2327 int H = src0[1] - src0[-1];
2328 int V = src1[0] - src2[ 0];
2329 for(k=2; k<=4; ++k) {
2330 src1 += stride; src2 -= stride;
2331 H += k*(src0[k] - src0[-k]);
2332 V += k*(src1[0] - src2[ 0]);
2334 H = ( 17*H+16 ) >> 5;
2335 V = ( 17*V+16 ) >> 5;
2337 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2338 for(j=8; j>0; --j) {
2341 src[0] = cm[ (b ) >> 5 ];
2342 src[1] = cm[ (b+ H) >> 5 ];
2343 src[2] = cm[ (b+2*H) >> 5 ];
2344 src[3] = cm[ (b+3*H) >> 5 ];
2345 src[4] = cm[ (b+4*H) >> 5 ];
2346 src[5] = cm[ (b+5*H) >> 5 ];
2347 src[6] = cm[ (b+6*H) >> 5 ];
2348 src[7] = cm[ (b+7*H) >> 5 ];
2353 #define SRC(x,y) src[(x)+(y)*stride]
2355 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2356 #define PREDICT_8x8_LOAD_LEFT \
2357 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2358 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2359 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2360 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2363 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2364 #define PREDICT_8x8_LOAD_TOP \
2365 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2366 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2367 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2368 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2369 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2372 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2373 #define PREDICT_8x8_LOAD_TOPRIGHT \
2374 int t8, t9, t10, t11, t12, t13, t14, t15; \
2375 if(has_topright) { \
2376 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2377 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2378 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2380 #define PREDICT_8x8_LOAD_TOPLEFT \
2381 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2383 #define PREDICT_8x8_DC(v) \
2385 for( y = 0; y < 8; y++ ) { \
2386 ((uint32_t*)src)[0] = \
2387 ((uint32_t*)src)[1] = v; \
2391 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2393 PREDICT_8x8_DC(0x80808080);
2395 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2397 PREDICT_8x8_LOAD_LEFT;
2398 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2401 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2403 PREDICT_8x8_LOAD_TOP;
2404 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2407 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2409 PREDICT_8x8_LOAD_LEFT;
2410 PREDICT_8x8_LOAD_TOP;
2411 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2412 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2415 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2417 PREDICT_8x8_LOAD_LEFT;
2418 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2419 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2420 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2423 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2426 PREDICT_8x8_LOAD_TOP;
2435 for( y = 1; y < 8; y++ )
2436 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2438 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2440 PREDICT_8x8_LOAD_TOP;
2441 PREDICT_8x8_LOAD_TOPRIGHT;
2442 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2443 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2444 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2445 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2446 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2447 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2448 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2449 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2450 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2451 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2452 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2453 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2454 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2455 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2456 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2458 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2460 PREDICT_8x8_LOAD_TOP;
2461 PREDICT_8x8_LOAD_LEFT;
2462 PREDICT_8x8_LOAD_TOPLEFT;
2463 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2464 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2465 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2466 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2467 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2468 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2469 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2470 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2471 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2472 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2473 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2474 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2475 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2476 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2477 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2480 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2482 PREDICT_8x8_LOAD_TOP;
2483 PREDICT_8x8_LOAD_LEFT;
2484 PREDICT_8x8_LOAD_TOPLEFT;
2485 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2486 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2487 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2488 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2489 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2490 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2491 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2492 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2493 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2494 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2495 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2496 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2497 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2498 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2499 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2500 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2501 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2502 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2503 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2504 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2505 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2506 SRC(7,0)= (t6 + t7 + 1) >> 1;
2508 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2510 PREDICT_8x8_LOAD_TOP;
2511 PREDICT_8x8_LOAD_LEFT;
2512 PREDICT_8x8_LOAD_TOPLEFT;
2513 SRC(0,7)= (l6 + l7 + 1) >> 1;
2514 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2515 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2516 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2517 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2518 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2519 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2520 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2521 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2522 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2523 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2524 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2525 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2526 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2527 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2528 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2529 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2530 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2531 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2532 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2533 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2534 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2536 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2538 PREDICT_8x8_LOAD_TOP;
2539 PREDICT_8x8_LOAD_TOPRIGHT;
2540 SRC(0,0)= (t0 + t1 + 1) >> 1;
2541 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2542 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2543 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2544 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2545 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2546 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2547 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2548 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2549 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2550 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2551 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2552 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2553 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2554 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2555 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2556 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2557 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2558 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2559 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2560 SRC(7,6)= (t10 + t11 + 1) >> 1;
2561 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2563 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2565 PREDICT_8x8_LOAD_LEFT;
2566 SRC(0,0)= (l0 + l1 + 1) >> 1;
2567 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2568 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2569 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2570 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2571 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2572 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2573 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2574 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2575 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2576 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2577 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2578 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2579 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2580 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2581 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2582 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2583 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2585 #undef PREDICT_8x8_LOAD_LEFT
2586 #undef PREDICT_8x8_LOAD_TOP
2587 #undef PREDICT_8x8_LOAD_TOPLEFT
2588 #undef PREDICT_8x8_LOAD_TOPRIGHT
2589 #undef PREDICT_8x8_DC
2595 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2596 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2597 int src_x_offset, int src_y_offset,
2598 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2599 MpegEncContext * const s = &h->s;
2600 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2601 const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2602 const int luma_xy= (mx&3) + ((my&3)<<2);
2603 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
2604 uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
2605 uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
2606 int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
2607 int extra_height= extra_width;
2609 const int full_mx= mx>>2;
2610 const int full_my= my>>2;
2611 const int pic_width = 16*s->mb_width;
2612 const int pic_height = 16*s->mb_height;
2617 if(mx&7) extra_width -= 3;
2618 if(my&7) extra_height -= 3;
2620 if( full_mx < 0-extra_width
2621 || full_my < 0-extra_height
2622 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2623 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2624 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2625 src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
2629 qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
2631 qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
2634 if(s->flags&CODEC_FLAG_GRAY) return;
2637 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2638 src_cb= s->edge_emu_buffer;
2640 chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
2643 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2644 src_cr= s->edge_emu_buffer;
2646 chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
2649 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2650 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2651 int x_offset, int y_offset,
2652 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2653 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2654 int list0, int list1){
2655 MpegEncContext * const s = &h->s;
2656 qpel_mc_func *qpix_op= qpix_put;
2657 h264_chroma_mc_func chroma_op= chroma_put;
2659 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2660 dest_cb += x_offset + y_offset*s->uvlinesize;
2661 dest_cr += x_offset + y_offset*s->uvlinesize;
2662 x_offset += 8*s->mb_x;
2663 y_offset += 8*s->mb_y;
2666 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2667 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2668 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2669 qpix_op, chroma_op);
2672 chroma_op= chroma_avg;
2676 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2677 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2678 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2679 qpix_op, chroma_op);
2683 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2684 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2685 int x_offset, int y_offset,
2686 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2687 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2688 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2689 int list0, int list1){
2690 MpegEncContext * const s = &h->s;
2692 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2693 dest_cb += x_offset + y_offset*s->uvlinesize;
2694 dest_cr += x_offset + y_offset*s->uvlinesize;
2695 x_offset += 8*s->mb_x;
2696 y_offset += 8*s->mb_y;
2699 /* don't optimize for luma-only case, since B-frames usually
2700 * use implicit weights => chroma too. */
2701 uint8_t *tmp_cb = s->obmc_scratchpad;
2702 uint8_t *tmp_cr = tmp_cb + 8*s->uvlinesize;
2703 uint8_t *tmp_y = tmp_cr + 8*s->uvlinesize;
2704 int refn0 = h->ref_cache[0][ scan8[n] ];
2705 int refn1 = h->ref_cache[1][ scan8[n] ];
2707 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2708 dest_y, dest_cb, dest_cr,
2709 x_offset, y_offset, qpix_put, chroma_put);
2710 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2711 tmp_y, tmp_cb, tmp_cr,
2712 x_offset, y_offset, qpix_put, chroma_put);
2714 if(h->use_weight == 2){
2715 int weight0 = h->implicit_weight[refn0][refn1];
2716 int weight1 = 64 - weight0;
2717 luma_weight_avg( dest_y, tmp_y, s-> linesize, 5, weight0, weight1, 0);
2718 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0);
2719 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0);
2721 luma_weight_avg(dest_y, tmp_y, s->linesize, h->luma_log2_weight_denom,
2722 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2723 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
2724 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2725 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2726 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
2727 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2728 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2729 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
2732 int list = list1 ? 1 : 0;
2733 int refn = h->ref_cache[list][ scan8[n] ];
2734 Picture *ref= &h->ref_list[list][refn];
2735 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2736 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2737 qpix_put, chroma_put);
2739 luma_weight_op(dest_y, s->linesize, h->luma_log2_weight_denom,
2740 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2741 if(h->use_weight_chroma){
2742 chroma_weight_op(dest_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2743 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2744 chroma_weight_op(dest_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2745 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2750 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2751 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2752 int x_offset, int y_offset,
2753 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2754 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2755 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2756 int list0, int list1){
2757 if((h->use_weight==2 && list0 && list1
2758 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2759 || h->use_weight==1)
2760 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2761 x_offset, y_offset, qpix_put, chroma_put,
2762 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2764 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2765 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2768 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2769 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2770 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2771 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2772 MpegEncContext * const s = &h->s;
2773 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2774 const int mb_type= s->current_picture.mb_type[mb_xy];
2776 assert(IS_INTER(mb_type));
2778 if(IS_16X16(mb_type)){
2779 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2780 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2781 &weight_op[0], &weight_avg[0],
2782 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2783 }else if(IS_16X8(mb_type)){
2784 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2785 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2786 &weight_op[1], &weight_avg[1],
2787 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2788 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2789 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2790 &weight_op[1], &weight_avg[1],
2791 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2792 }else if(IS_8X16(mb_type)){
2793 mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
2794 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2795 &weight_op[2], &weight_avg[2],
2796 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2797 mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
2798 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2799 &weight_op[2], &weight_avg[2],
2800 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2804 assert(IS_8X8(mb_type));
2807 const int sub_mb_type= h->sub_mb_type[i];
2809 int x_offset= (i&1)<<2;
2810 int y_offset= (i&2)<<1;
2812 if(IS_SUB_8X8(sub_mb_type)){
2813 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2814 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2815 &weight_op[3], &weight_avg[3],
2816 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2817 }else if(IS_SUB_8X4(sub_mb_type)){
2818 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2819 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2820 &weight_op[4], &weight_avg[4],
2821 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2822 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2823 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2824 &weight_op[4], &weight_avg[4],
2825 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2826 }else if(IS_SUB_4X8(sub_mb_type)){
2827 mc_part(h, n , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2828 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2829 &weight_op[5], &weight_avg[5],
2830 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2831 mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2832 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2833 &weight_op[5], &weight_avg[5],
2834 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2837 assert(IS_SUB_4X4(sub_mb_type));
2839 int sub_x_offset= x_offset + 2*(j&1);
2840 int sub_y_offset= y_offset + (j&2);
2841 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2842 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2843 &weight_op[6], &weight_avg[6],
2844 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2851 static void decode_init_vlc(H264Context *h){
2852 static int done = 0;
2858 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
2859 &chroma_dc_coeff_token_len [0], 1, 1,
2860 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
2863 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
2864 &coeff_token_len [i][0], 1, 1,
2865 &coeff_token_bits[i][0], 1, 1, 1);
2869 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2870 &chroma_dc_total_zeros_len [i][0], 1, 1,
2871 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
2873 for(i=0; i<15; i++){
2874 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
2875 &total_zeros_len [i][0], 1, 1,
2876 &total_zeros_bits[i][0], 1, 1, 1);
2880 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2881 &run_len [i][0], 1, 1,
2882 &run_bits[i][0], 1, 1, 1);
2884 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2885 &run_len [6][0], 1, 1,
2886 &run_bits[6][0], 1, 1, 1);
2891 * Sets the intra prediction function pointers.
2893 static void init_pred_ptrs(H264Context *h){
2894 // MpegEncContext * const s = &h->s;
2896 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
2897 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
2898 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
2899 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2900 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2901 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
2902 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
2903 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
2904 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
2905 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
2906 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
2907 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
2909 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
2910 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
2911 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
2912 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
2913 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
2914 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
2915 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
2916 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
2917 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
2918 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
2919 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
2920 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
2922 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
2923 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
2924 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
2925 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
2926 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
2927 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
2928 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
2930 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
2931 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
2932 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
2933 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
2934 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
2935 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
2936 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
2939 static void free_tables(H264Context *h){
2940 av_freep(&h->intra4x4_pred_mode);
2941 av_freep(&h->chroma_pred_mode_table);
2942 av_freep(&h->cbp_table);
2943 av_freep(&h->mvd_table[0]);
2944 av_freep(&h->mvd_table[1]);
2945 av_freep(&h->direct_table);
2946 av_freep(&h->non_zero_count);
2947 av_freep(&h->slice_table_base);
2948 av_freep(&h->top_borders[1]);
2949 av_freep(&h->top_borders[0]);
2950 h->slice_table= NULL;
2952 av_freep(&h->mb2b_xy);
2953 av_freep(&h->mb2b8_xy);
2955 av_freep(&h->s.obmc_scratchpad);
2958 static void init_dequant8_coeff_table(H264Context *h){
2960 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2961 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2962 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2964 for(i=0; i<2; i++ ){
2965 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2966 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2970 for(q=0; q<52; q++){
2971 int shift = div6[q];
2974 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2975 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2976 h->pps.scaling_matrix8[i][x]) << shift;
2981 static void init_dequant4_coeff_table(H264Context *h){
2983 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2984 for(i=0; i<6; i++ ){
2985 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2987 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2988 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2995 for(q=0; q<52; q++){
2996 int shift = div6[q] + 2;
2999 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
3000 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
3001 h->pps.scaling_matrix4[i][x]) << shift;
3006 static void init_dequant_tables(H264Context *h){
3008 init_dequant4_coeff_table(h);
3009 if(h->pps.transform_8x8_mode)
3010 init_dequant8_coeff_table(h);
3011 if(h->sps.transform_bypass){
3014 h->dequant4_coeff[i][0][x] = 1<<6;
3015 if(h->pps.transform_8x8_mode)
3018 h->dequant8_coeff[i][0][x] = 1<<6;
3025 * needs width/height
3027 static int alloc_tables(H264Context *h){
3028 MpegEncContext * const s = &h->s;
3029 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3032 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3034 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3035 CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t))
3036 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3037 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3038 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3040 if( h->pps.cabac ) {
3041 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3042 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3043 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3044 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3047 memset(h->slice_table_base, -1, big_mb_num * sizeof(uint8_t));
3048 h->slice_table= h->slice_table_base + s->mb_stride + 1;
3050 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3051 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3052 for(y=0; y<s->mb_height; y++){
3053 for(x=0; x<s->mb_width; x++){
3054 const int mb_xy= x + y*s->mb_stride;
3055 const int b_xy = 4*x + 4*y*h->b_stride;
3056 const int b8_xy= 2*x + 2*y*h->b8_stride;
3058 h->mb2b_xy [mb_xy]= b_xy;
3059 h->mb2b8_xy[mb_xy]= b8_xy;
3063 s->obmc_scratchpad = NULL;
3065 if(!h->dequant4_coeff[0])
3066 init_dequant_tables(h);
3074 static void common_init(H264Context *h){
3075 MpegEncContext * const s = &h->s;
3077 s->width = s->avctx->width;
3078 s->height = s->avctx->height;
3079 s->codec_id= s->avctx->codec->id;
3083 h->dequant_coeff_pps= -1;
3084 s->unrestricted_mv=1;
3085 s->decode=1; //FIXME
3087 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3088 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3091 static int decode_init(AVCodecContext *avctx){
3092 H264Context *h= avctx->priv_data;
3093 MpegEncContext * const s = &h->s;
3095 MPV_decode_defaults(s);
3100 s->out_format = FMT_H264;
3101 s->workaround_bugs= avctx->workaround_bugs;
3104 // s->decode_mb= ff_h263_decode_mb;
3106 avctx->pix_fmt= PIX_FMT_YUV420P;
3110 if(avctx->extradata_size > 0 && avctx->extradata &&
3111 *(char *)avctx->extradata == 1){
3121 static int frame_start(H264Context *h){
3122 MpegEncContext * const s = &h->s;
3125 if(MPV_frame_start(s, s->avctx) < 0)
3127 ff_er_frame_start(s);
3129 assert(s->linesize && s->uvlinesize);
3131 for(i=0; i<16; i++){
3132 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3133 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3136 h->block_offset[16+i]=
3137 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3138 h->block_offset[24+16+i]=
3139 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3142 /* can't be in alloc_tables because linesize isn't known there.
3143 * FIXME: redo bipred weight to not require extra buffer? */
3144 if(!s->obmc_scratchpad)
3145 s->obmc_scratchpad = av_malloc(16*s->linesize + 2*8*s->uvlinesize);
3147 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3151 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3152 MpegEncContext * const s = &h->s;
3156 src_cb -= uvlinesize;
3157 src_cr -= uvlinesize;
3159 // There are two lines saved, the line above the the top macroblock of a pair,
3160 // and the line above the bottom macroblock
3161 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3162 for(i=1; i<17; i++){
3163 h->left_border[i]= src_y[15+i* linesize];
3166 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3167 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3169 if(!(s->flags&CODEC_FLAG_GRAY)){
3170 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3171 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3173 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3174 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3176 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3177 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3181 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3182 MpegEncContext * const s = &h->s;
3185 int deblock_left = (s->mb_x > 0);
3186 int deblock_top = (s->mb_y > 0);
3188 src_y -= linesize + 1;
3189 src_cb -= uvlinesize + 1;
3190 src_cr -= uvlinesize + 1;
3192 #define XCHG(a,b,t,xchg)\
3199 for(i = !deblock_top; i<17; i++){
3200 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3205 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3206 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3207 if(s->mb_x+1 < s->mb_width){
3208 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3212 if(!(s->flags&CODEC_FLAG_GRAY)){
3214 for(i = !deblock_top; i<9; i++){
3215 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3216 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3220 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3221 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3226 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3227 MpegEncContext * const s = &h->s;
3230 src_y -= 2 * linesize;
3231 src_cb -= 2 * uvlinesize;
3232 src_cr -= 2 * uvlinesize;
3234 // There are two lines saved, the line above the the top macroblock of a pair,
3235 // and the line above the bottom macroblock
3236 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3237 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3238 for(i=2; i<34; i++){
3239 h->left_border[i]= src_y[15+i* linesize];
3242 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3243 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3244 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3245 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3247 if(!(s->flags&CODEC_FLAG_GRAY)){
3248 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3249 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3250 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3251 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3252 for(i=2; i<18; i++){
3253 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3254 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3256 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3257 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3258 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3259 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3263 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3264 MpegEncContext * const s = &h->s;
3267 int deblock_left = (s->mb_x > 0);
3268 int deblock_top = (s->mb_y > 0);
3270 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3272 src_y -= 2 * linesize + 1;
3273 src_cb -= 2 * uvlinesize + 1;
3274 src_cr -= 2 * uvlinesize + 1;
3276 #define XCHG(a,b,t,xchg)\
3283 for(i = (!deblock_top)<<1; i<34; i++){
3284 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3289 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3290 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3291 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3292 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3295 if(!(s->flags&CODEC_FLAG_GRAY)){
3297 for(i = (!deblock_top) << 1; i<18; i++){
3298 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3299 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3303 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3304 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3305 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3306 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3311 static void hl_decode_mb(H264Context *h){
3312 MpegEncContext * const s = &h->s;
3313 const int mb_x= s->mb_x;
3314 const int mb_y= s->mb_y;
3315 const int mb_xy= mb_x + mb_y*s->mb_stride;
3316 const int mb_type= s->current_picture.mb_type[mb_xy];
3317 uint8_t *dest_y, *dest_cb, *dest_cr;
3318 int linesize, uvlinesize /*dct_offset*/;
3320 int *block_offset = &h->block_offset[0];
3321 const unsigned int bottom = mb_y & 1;
3322 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3323 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3324 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3329 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3330 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3331 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3333 if (h->mb_field_decoding_flag) {
3334 linesize = s->linesize * 2;
3335 uvlinesize = s->uvlinesize * 2;
3336 block_offset = &h->block_offset[24];
3337 if(mb_y&1){ //FIXME move out of this func?
3338 dest_y -= s->linesize*15;
3339 dest_cb-= s->uvlinesize*7;
3340 dest_cr-= s->uvlinesize*7;
3343 linesize = s->linesize;
3344 uvlinesize = s->uvlinesize;
3345 // dct_offset = s->linesize * 16;
3348 if(transform_bypass){
3350 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3351 }else if(IS_8x8DCT(mb_type)){
3352 idct_dc_add = s->dsp.h264_idct8_dc_add;
3353 idct_add = s->dsp.h264_idct8_add;
3355 idct_dc_add = s->dsp.h264_idct_dc_add;
3356 idct_add = s->dsp.h264_idct_add;
3359 if (IS_INTRA_PCM(mb_type)) {
3362 // The pixels are stored in h->mb array in the same order as levels,
3363 // copy them in output in the correct order.
3364 for(i=0; i<16; i++) {
3365 for (y=0; y<4; y++) {
3366 for (x=0; x<4; x++) {
3367 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3371 for(i=16; i<16+4; i++) {
3372 for (y=0; y<4; y++) {
3373 for (x=0; x<4; x++) {
3374 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3378 for(i=20; i<20+4; i++) {
3379 for (y=0; y<4; y++) {
3380 for (x=0; x<4; x++) {
3381 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3386 if(IS_INTRA(mb_type)){
3387 if(h->deblocking_filter) {
3388 if (h->mb_aff_frame) {
3390 xchg_pair_border(h, dest_y, dest_cb, dest_cr, s->linesize, s->uvlinesize, 1);
3392 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3396 if(!(s->flags&CODEC_FLAG_GRAY)){
3397 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3398 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3401 if(IS_INTRA4x4(mb_type)){
3403 if(IS_8x8DCT(mb_type)){
3404 for(i=0; i<16; i+=4){
3405 uint8_t * const ptr= dest_y + block_offset[i];
3406 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3407 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3408 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3409 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3411 if(nnz == 1 && h->mb[i*16])
3412 idct_dc_add(ptr, h->mb + i*16, linesize);
3414 idct_add(ptr, h->mb + i*16, linesize);
3418 for(i=0; i<16; i++){
3419 uint8_t * const ptr= dest_y + block_offset[i];
3421 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3424 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3425 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3426 assert(mb_y || linesize <= block_offset[i]);
3427 if(!topright_avail){
3428 tr= ptr[3 - linesize]*0x01010101;
3429 topright= (uint8_t*) &tr;
3431 topright= ptr + 4 - linesize;
3435 h->pred4x4[ dir ](ptr, topright, linesize);
3436 nnz = h->non_zero_count_cache[ scan8[i] ];
3438 if(s->codec_id == CODEC_ID_H264){
3439 if(nnz == 1 && h->mb[i*16])
3440 idct_dc_add(ptr, h->mb + i*16, linesize);
3442 idct_add(ptr, h->mb + i*16, linesize);
3444 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3449 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3450 if(s->codec_id == CODEC_ID_H264){
3451 if(!transform_bypass)
3452 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3454 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3456 if(h->deblocking_filter) {
3457 if (h->mb_aff_frame) {
3459 uint8_t *pair_dest_y = s->current_picture.data[0] + ((mb_y-1) * 16* s->linesize ) + mb_x * 16;
3460 uint8_t *pair_dest_cb = s->current_picture.data[1] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3461 uint8_t *pair_dest_cr = s->current_picture.data[2] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3463 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3467 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3470 }else if(s->codec_id == CODEC_ID_H264){
3471 hl_motion(h, dest_y, dest_cb, dest_cr,
3472 s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
3473 s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
3474 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3478 if(!IS_INTRA4x4(mb_type)){
3479 if(s->codec_id == CODEC_ID_H264){
3480 if(IS_INTRA16x16(mb_type)){
3481 for(i=0; i<16; i++){
3482 if(h->non_zero_count_cache[ scan8[i] ])
3483 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3484 else if(h->mb[i*16])
3485 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3488 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3489 for(i=0; i<16; i+=di){
3490 int nnz = h->non_zero_count_cache[ scan8[i] ];
3492 if(nnz==1 && h->mb[i*16])
3493 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3495 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3500 for(i=0; i<16; i++){
3501 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3502 uint8_t * const ptr= dest_y + block_offset[i];
3503 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3509 if(!(s->flags&CODEC_FLAG_GRAY)){
3510 uint8_t *dest[2] = {dest_cb, dest_cr};
3511 if(transform_bypass){
3512 idct_add = idct_dc_add = s->dsp.add_pixels4;
3514 idct_add = s->dsp.h264_idct_add;
3515 idct_dc_add = s->dsp.h264_idct_dc_add;
3516 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3517 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3519 if(s->codec_id == CODEC_ID_H264){
3520 for(i=16; i<16+8; i++){
3521 if(h->non_zero_count_cache[ scan8[i] ])
3522 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3523 else if(h->mb[i*16])
3524 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3527 for(i=16; i<16+8; i++){
3528 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3529 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3530 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3536 if(h->deblocking_filter) {
3537 if (h->mb_aff_frame) {
3538 const int mb_y = s->mb_y - 1;
3539 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3540 const int mb_xy= mb_x + mb_y*s->mb_stride;
3541 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3542 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3543 uint8_t tmp = s->current_picture.data[1][384];
3544 if (!bottom) return;
3545 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3546 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3547 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3549 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3550 // TODO deblock a pair
3553 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3554 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3555 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3556 if (tmp != s->current_picture.data[1][384]) {
3557 tprintf("modified pixel 8,1 (1)\n");
3561 tprintf("call mbaff filter_mb\n");
3562 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3563 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3564 if (tmp != s->current_picture.data[1][384]) {
3565 tprintf("modified pixel 8,1 (2)\n");
3568 tprintf("call filter_mb\n");
3569 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3570 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3571 filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3577 * fills the default_ref_list.
3579 static int fill_default_ref_list(H264Context *h){
3580 MpegEncContext * const s = &h->s;
3582 int smallest_poc_greater_than_current = -1;
3583 Picture sorted_short_ref[32];
3585 if(h->slice_type==B_TYPE){
3589 /* sort frame according to poc in B slice */
3590 for(out_i=0; out_i<h->short_ref_count; out_i++){
3592 int best_poc=INT_MAX;
3594 for(i=0; i<h->short_ref_count; i++){
3595 const int poc= h->short_ref[i]->poc;
3596 if(poc > limit && poc < best_poc){
3602 assert(best_i != INT_MIN);
3605 sorted_short_ref[out_i]= *h->short_ref[best_i];
3606 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3607 if (-1 == smallest_poc_greater_than_current) {
3608 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3609 smallest_poc_greater_than_current = out_i;
3615 if(s->picture_structure == PICT_FRAME){
3616 if(h->slice_type==B_TYPE){
3618 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3620 // find the largest poc
3621 for(list=0; list<2; list++){
3624 int step= list ? -1 : 1;
3626 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3627 while(j<0 || j>= h->short_ref_count){
3628 if(j != -99 && step == (list ? -1 : 1))
3631 j= smallest_poc_greater_than_current + (step>>1);
3633 if(sorted_short_ref[j].reference != 3) continue;
3634 h->default_ref_list[list][index ]= sorted_short_ref[j];
3635 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3638 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3639 if(h->long_ref[i] == NULL) continue;
3640 if(h->long_ref[i]->reference != 3) continue;
3642 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3643 h->default_ref_list[ list ][index++].pic_id= i;;
3646 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3647 // swap the two first elements of L1 when
3648 // L0 and L1 are identical
3649 Picture temp= h->default_ref_list[1][0];
3650 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3651 h->default_ref_list[1][1] = temp;
3654 if(index < h->ref_count[ list ])
3655 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3659 for(i=0; i<h->short_ref_count; i++){
3660 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3661 h->default_ref_list[0][index ]= *h->short_ref[i];
3662 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3664 for(i = 0; i < 16; i++){
3665 if(h->long_ref[i] == NULL) continue;
3666 if(h->long_ref[i]->reference != 3) continue;
3667 h->default_ref_list[0][index ]= *h->long_ref[i];
3668 h->default_ref_list[0][index++].pic_id= i;;
3670 if(index < h->ref_count[0])
3671 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3674 if(h->slice_type==B_TYPE){
3676 //FIXME second field balh
3680 for (i=0; i<h->ref_count[0]; i++) {
3681 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3683 if(h->slice_type==B_TYPE){
3684 for (i=0; i<h->ref_count[1]; i++) {
3685 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3692 static void print_short_term(H264Context *h);
3693 static void print_long_term(H264Context *h);
3695 static int decode_ref_pic_list_reordering(H264Context *h){
3696 MpegEncContext * const s = &h->s;
3699 print_short_term(h);
3701 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3703 for(list=0; list<2; list++){
3704 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3706 if(get_bits1(&s->gb)){
3707 int pred= h->curr_pic_num;
3709 for(index=0; ; index++){
3710 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3713 Picture *ref = NULL;
3715 if(reordering_of_pic_nums_idc==3)
3718 if(index >= h->ref_count[list]){
3719 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3723 if(reordering_of_pic_nums_idc<3){
3724 if(reordering_of_pic_nums_idc<2){
3725 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3727 if(abs_diff_pic_num >= h->max_pic_num){
3728 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3732 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3733 else pred+= abs_diff_pic_num;
3734 pred &= h->max_pic_num - 1;
3736 for(i= h->short_ref_count-1; i>=0; i--){
3737 ref = h->short_ref[i];
3738 assert(ref->reference == 3);
3739 assert(!ref->long_ref);
3740 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3744 ref->pic_id= ref->frame_num;
3746 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3747 ref = h->long_ref[pic_id];
3748 ref->pic_id= pic_id;
3749 assert(ref->reference == 3);
3750 assert(ref->long_ref);
3755 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3756 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3758 for(i=index; i+1<h->ref_count[list]; i++){
3759 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3762 for(; i > index; i--){
3763 h->ref_list[list][i]= h->ref_list[list][i-1];
3765 h->ref_list[list][index]= *ref;
3768 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3774 if(h->slice_type!=B_TYPE) break;
3776 for(list=0; list<2; list++){
3777 for(index= 0; index < h->ref_count[list]; index++){
3778 if(!h->ref_list[list][index].data[0])
3779 h->ref_list[list][index]= s->current_picture;
3781 if(h->slice_type!=B_TYPE) break;
3784 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3785 direct_dist_scale_factor(h);
3786 direct_ref_list_init(h);
3790 static int pred_weight_table(H264Context *h){
3791 MpegEncContext * const s = &h->s;
3793 int luma_def, chroma_def;
3796 h->use_weight_chroma= 0;
3797 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3798 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3799 luma_def = 1<<h->luma_log2_weight_denom;
3800 chroma_def = 1<<h->chroma_log2_weight_denom;
3802 for(list=0; list<2; list++){
3803 for(i=0; i<h->ref_count[list]; i++){
3804 int luma_weight_flag, chroma_weight_flag;
3806 luma_weight_flag= get_bits1(&s->gb);
3807 if(luma_weight_flag){
3808 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3809 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3810 if( h->luma_weight[list][i] != luma_def
3811 || h->luma_offset[list][i] != 0)
3814 h->luma_weight[list][i]= luma_def;
3815 h->luma_offset[list][i]= 0;
3818 chroma_weight_flag= get_bits1(&s->gb);
3819 if(chroma_weight_flag){
3822 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3823 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3824 if( h->chroma_weight[list][i][j] != chroma_def
3825 || h->chroma_offset[list][i][j] != 0)
3826 h->use_weight_chroma= 1;
3831 h->chroma_weight[list][i][j]= chroma_def;
3832 h->chroma_offset[list][i][j]= 0;
3836 if(h->slice_type != B_TYPE) break;
3838 h->use_weight= h->use_weight || h->use_weight_chroma;
3842 static void implicit_weight_table(H264Context *h){
3843 MpegEncContext * const s = &h->s;
3845 int cur_poc = s->current_picture_ptr->poc;
3847 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3848 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3850 h->use_weight_chroma= 0;
3855 h->use_weight_chroma= 2;
3856 h->luma_log2_weight_denom= 5;
3857 h->chroma_log2_weight_denom= 5;
3860 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3861 int poc0 = h->ref_list[0][ref0].poc;
3862 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3863 int poc1 = h->ref_list[1][ref1].poc;
3864 int td = clip(poc1 - poc0, -128, 127);
3866 int tb = clip(cur_poc - poc0, -128, 127);
3867 int tx = (16384 + (ABS(td) >> 1)) / td;
3868 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3869 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3870 h->implicit_weight[ref0][ref1] = 32;
3872 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3874 h->implicit_weight[ref0][ref1] = 32;
3879 static inline void unreference_pic(H264Context *h, Picture *pic){
3882 if(pic == h->delayed_output_pic)
3885 for(i = 0; h->delayed_pic[i]; i++)
3886 if(pic == h->delayed_pic[i]){
3894 * instantaneous decoder refresh.
3896 static void idr(H264Context *h){
3899 for(i=0; i<16; i++){
3900 if (h->long_ref[i] != NULL) {
3901 unreference_pic(h, h->long_ref[i]);
3902 h->long_ref[i]= NULL;
3905 h->long_ref_count=0;
3907 for(i=0; i<h->short_ref_count; i++){
3908 unreference_pic(h, h->short_ref[i]);
3909 h->short_ref[i]= NULL;
3911 h->short_ref_count=0;
3914 /* forget old pics after a seek */
3915 static void flush_dpb(AVCodecContext *avctx){
3916 H264Context *h= avctx->priv_data;
3918 for(i=0; i<16; i++) {
3919 if(h->delayed_pic[i])
3920 h->delayed_pic[i]->reference= 0;
3921 h->delayed_pic[i]= NULL;
3923 if(h->delayed_output_pic)
3924 h->delayed_output_pic->reference= 0;
3925 h->delayed_output_pic= NULL;
3927 if(h->s.current_picture_ptr)
3928 h->s.current_picture_ptr->reference= 0;
3933 * @return the removed picture or NULL if an error occurs
3935 static Picture * remove_short(H264Context *h, int frame_num){
3936 MpegEncContext * const s = &h->s;
3939 if(s->avctx->debug&FF_DEBUG_MMCO)
3940 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3942 for(i=0; i<h->short_ref_count; i++){
3943 Picture *pic= h->short_ref[i];
3944 if(s->avctx->debug&FF_DEBUG_MMCO)
3945 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3946 if(pic->frame_num == frame_num){
3947 h->short_ref[i]= NULL;
3948 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
3949 h->short_ref_count--;
3958 * @return the removed picture or NULL if an error occurs
3960 static Picture * remove_long(H264Context *h, int i){
3963 pic= h->long_ref[i];
3964 h->long_ref[i]= NULL;
3965 if(pic) h->long_ref_count--;
3971 * print short term list
3973 static void print_short_term(H264Context *h) {
3975 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3976 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3977 for(i=0; i<h->short_ref_count; i++){
3978 Picture *pic= h->short_ref[i];
3979 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3985 * print long term list
3987 static void print_long_term(H264Context *h) {
3989 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3990 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3991 for(i = 0; i < 16; i++){
3992 Picture *pic= h->long_ref[i];
3994 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4001 * Executes the reference picture marking (memory management control operations).
4003 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
4004 MpegEncContext * const s = &h->s;
4006 int current_is_long=0;
4009 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
4010 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4012 for(i=0; i<mmco_count; i++){
4013 if(s->avctx->debug&FF_DEBUG_MMCO)
4014 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4016 switch(mmco[i].opcode){
4017 case MMCO_SHORT2UNUSED:
4018 pic= remove_short(h, mmco[i].short_frame_num);
4020 unreference_pic(h, pic);
4021 else if(s->avctx->debug&FF_DEBUG_MMCO)
4022 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4024 case MMCO_SHORT2LONG:
4025 pic= remove_long(h, mmco[i].long_index);
4026 if(pic) unreference_pic(h, pic);
4028 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4029 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4030 h->long_ref_count++;
4032 case MMCO_LONG2UNUSED:
4033 pic= remove_long(h, mmco[i].long_index);
4035 unreference_pic(h, pic);
4036 else if(s->avctx->debug&FF_DEBUG_MMCO)
4037 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4040 pic= remove_long(h, mmco[i].long_index);
4041 if(pic) unreference_pic(h, pic);
4043 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4044 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4045 h->long_ref_count++;
4049 case MMCO_SET_MAX_LONG:
4050 assert(mmco[i].long_index <= 16);
4051 // just remove the long term which index is greater than new max
4052 for(j = mmco[i].long_index; j<16; j++){
4053 pic = remove_long(h, j);
4054 if (pic) unreference_pic(h, pic);
4058 while(h->short_ref_count){
4059 pic= remove_short(h, h->short_ref[0]->frame_num);
4060 unreference_pic(h, pic);
4062 for(j = 0; j < 16; j++) {
4063 pic= remove_long(h, j);
4064 if(pic) unreference_pic(h, pic);
4071 if(!current_is_long){
4072 pic= remove_short(h, s->current_picture_ptr->frame_num);
4074 unreference_pic(h, pic);
4075 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4078 if(h->short_ref_count)
4079 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4081 h->short_ref[0]= s->current_picture_ptr;
4082 h->short_ref[0]->long_ref=0;
4083 h->short_ref_count++;
4086 print_short_term(h);
4091 static int decode_ref_pic_marking(H264Context *h){
4092 MpegEncContext * const s = &h->s;
4095 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4096 s->broken_link= get_bits1(&s->gb) -1;
4097 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4098 if(h->mmco[0].long_index == -1)
4101 h->mmco[0].opcode= MMCO_LONG;
4105 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4106 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4107 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4109 h->mmco[i].opcode= opcode;
4110 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4111 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4112 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4113 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4117 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4118 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4119 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4120 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4125 if(opcode > MMCO_LONG){
4126 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4129 if(opcode == MMCO_END)
4134 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4136 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4137 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4138 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4148 static int init_poc(H264Context *h){
4149 MpegEncContext * const s = &h->s;
4150 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4153 if(h->nal_unit_type == NAL_IDR_SLICE){
4154 h->frame_num_offset= 0;
4156 if(h->frame_num < h->prev_frame_num)
4157 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4159 h->frame_num_offset= h->prev_frame_num_offset;
4162 if(h->sps.poc_type==0){
4163 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4165 if(h->nal_unit_type == NAL_IDR_SLICE){
4170 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4171 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4172 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4173 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4175 h->poc_msb = h->prev_poc_msb;
4176 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4178 field_poc[1] = h->poc_msb + h->poc_lsb;
4179 if(s->picture_structure == PICT_FRAME)
4180 field_poc[1] += h->delta_poc_bottom;
4181 }else if(h->sps.poc_type==1){
4182 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4185 if(h->sps.poc_cycle_length != 0)
4186 abs_frame_num = h->frame_num_offset + h->frame_num;
4190 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4193 expected_delta_per_poc_cycle = 0;
4194 for(i=0; i < h->sps.poc_cycle_length; i++)
4195 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4197 if(abs_frame_num > 0){
4198 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4199 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4201 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4202 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4203 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4207 if(h->nal_ref_idc == 0)
4208 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4210 field_poc[0] = expectedpoc + h->delta_poc[0];
4211 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4213 if(s->picture_structure == PICT_FRAME)
4214 field_poc[1] += h->delta_poc[1];
4217 if(h->nal_unit_type == NAL_IDR_SLICE){
4220 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4221 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4227 if(s->picture_structure != PICT_BOTTOM_FIELD)
4228 s->current_picture_ptr->field_poc[0]= field_poc[0];
4229 if(s->picture_structure != PICT_TOP_FIELD)
4230 s->current_picture_ptr->field_poc[1]= field_poc[1];
4231 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4232 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4238 * decodes a slice header.
4239 * this will allso call MPV_common_init() and frame_start() as needed
4241 static int decode_slice_header(H264Context *h){
4242 MpegEncContext * const s = &h->s;
4243 int first_mb_in_slice, pps_id;
4244 int num_ref_idx_active_override_flag;
4245 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4247 int default_ref_list_done = 0;
4249 s->current_picture.reference= h->nal_ref_idc != 0;
4250 s->dropable= h->nal_ref_idc == 0;
4252 first_mb_in_slice= get_ue_golomb(&s->gb);
4254 slice_type= get_ue_golomb(&s->gb);
4256 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4261 h->slice_type_fixed=1;
4263 h->slice_type_fixed=0;
4265 slice_type= slice_type_map[ slice_type ];
4266 if (slice_type == I_TYPE
4267 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4268 default_ref_list_done = 1;
4270 h->slice_type= slice_type;
4272 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4274 pps_id= get_ue_golomb(&s->gb);
4276 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4279 h->pps= h->pps_buffer[pps_id];
4280 if(h->pps.slice_group_count == 0){
4281 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4285 h->sps= h->sps_buffer[ h->pps.sps_id ];
4286 if(h->sps.log2_max_frame_num == 0){
4287 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4291 if(h->dequant_coeff_pps != pps_id){
4292 h->dequant_coeff_pps = pps_id;
4293 init_dequant_tables(h);
4296 s->mb_width= h->sps.mb_width;
4297 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4299 h->b_stride= s->mb_width*4;
4300 h->b8_stride= s->mb_width*2;
4302 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4303 if(h->sps.frame_mbs_only_flag)
4304 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4306 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4308 if (s->context_initialized
4309 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4313 if (!s->context_initialized) {
4314 if (MPV_common_init(s) < 0)
4317 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4318 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4319 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4322 for(i=0; i<16; i++){
4323 #define T(x) (x>>2) | ((x<<2) & 0xF)
4324 h->zigzag_scan[i] = T(zigzag_scan[i]);
4325 h-> field_scan[i] = T( field_scan[i]);
4329 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4330 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4331 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4334 for(i=0; i<64; i++){
4335 #define T(x) (x>>3) | ((x&7)<<3)
4336 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4337 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4341 if(h->sps.transform_bypass){ //FIXME same ugly
4342 h->zigzag_scan_q0 = zigzag_scan;
4343 h->field_scan_q0 = field_scan;
4344 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4345 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4347 h->zigzag_scan_q0 = h->zigzag_scan;
4348 h->field_scan_q0 = h->field_scan;
4349 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4350 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4355 s->avctx->width = s->width;
4356 s->avctx->height = s->height;
4357 s->avctx->sample_aspect_ratio= h->sps.sar;
4358 if(!s->avctx->sample_aspect_ratio.den)
4359 s->avctx->sample_aspect_ratio.den = 1;
4361 if(h->sps.timing_info_present_flag){
4362 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4363 if(h->x264_build > 0 && h->x264_build < 44)
4364 s->avctx->time_base.den *= 2;
4365 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4366 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4370 if(h->slice_num == 0){
4371 if(frame_start(h) < 0)
4375 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4376 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4378 h->mb_aff_frame = 0;
4379 if(h->sps.frame_mbs_only_flag){
4380 s->picture_structure= PICT_FRAME;
4382 if(get_bits1(&s->gb)) { //field_pic_flag
4383 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4385 s->picture_structure= PICT_FRAME;
4386 first_mb_in_slice <<= h->sps.mb_aff;
4387 h->mb_aff_frame = h->sps.mb_aff;
4391 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4392 s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width;
4393 if(s->mb_y >= s->mb_height){
4397 if(s->picture_structure==PICT_FRAME){
4398 h->curr_pic_num= h->frame_num;
4399 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4401 h->curr_pic_num= 2*h->frame_num;
4402 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4405 if(h->nal_unit_type == NAL_IDR_SLICE){
4406 get_ue_golomb(&s->gb); /* idr_pic_id */
4409 if(h->sps.poc_type==0){
4410 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4412 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4413 h->delta_poc_bottom= get_se_golomb(&s->gb);
4417 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4418 h->delta_poc[0]= get_se_golomb(&s->gb);
4420 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4421 h->delta_poc[1]= get_se_golomb(&s->gb);
4426 if(h->pps.redundant_pic_cnt_present){
4427 h->redundant_pic_count= get_ue_golomb(&s->gb);
4430 //set defaults, might be overriden a few line later
4431 h->ref_count[0]= h->pps.ref_count[0];
4432 h->ref_count[1]= h->pps.ref_count[1];
4434 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4435 if(h->slice_type == B_TYPE){
4436 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4438 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4440 if(num_ref_idx_active_override_flag){
4441 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4442 if(h->slice_type==B_TYPE)
4443 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4445 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4446 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4452 if(!default_ref_list_done){
4453 fill_default_ref_list(h);
4456 if(decode_ref_pic_list_reordering(h) < 0)
4459 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4460 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4461 pred_weight_table(h);
4462 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4463 implicit_weight_table(h);
4467 if(s->current_picture.reference)
4468 decode_ref_pic_marking(h);
4470 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4471 h->cabac_init_idc = get_ue_golomb(&s->gb);
4473 h->last_qscale_diff = 0;
4474 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4475 if(s->qscale<0 || s->qscale>51){
4476 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4479 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4480 //FIXME qscale / qp ... stuff
4481 if(h->slice_type == SP_TYPE){
4482 get_bits1(&s->gb); /* sp_for_switch_flag */
4484 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4485 get_se_golomb(&s->gb); /* slice_qs_delta */
4488 h->deblocking_filter = 1;
4489 h->slice_alpha_c0_offset = 0;
4490 h->slice_beta_offset = 0;
4491 if( h->pps.deblocking_filter_parameters_present ) {
4492 h->deblocking_filter= get_ue_golomb(&s->gb);
4493 if(h->deblocking_filter < 2)
4494 h->deblocking_filter^= 1; // 1<->0
4496 if( h->deblocking_filter ) {
4497 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4498 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4501 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4502 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4503 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4504 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4505 h->deblocking_filter= 0;
4508 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4509 slice_group_change_cycle= get_bits(&s->gb, ?);
4514 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4515 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4517 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4519 av_get_pict_type_char(h->slice_type),
4520 pps_id, h->frame_num,
4521 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4522 h->ref_count[0], h->ref_count[1],
4524 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4526 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4536 static inline int get_level_prefix(GetBitContext *gb){
4540 OPEN_READER(re, gb);
4541 UPDATE_CACHE(re, gb);
4542 buf=GET_CACHE(re, gb);
4544 log= 32 - av_log2(buf);
4546 print_bin(buf>>(32-log), log);
4547 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4550 LAST_SKIP_BITS(re, gb, log);
4551 CLOSE_READER(re, gb);
4556 static inline int get_dct8x8_allowed(H264Context *h){
4559 if(!IS_SUB_8X8(h->sub_mb_type[i])
4560 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4567 * decodes a residual block.
4568 * @param n block index
4569 * @param scantable scantable
4570 * @param max_coeff number of coefficients in the block
4571 * @return <0 if an error occured
4573 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4574 MpegEncContext * const s = &h->s;
4575 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4577 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4579 //FIXME put trailing_onex into the context
4581 if(n == CHROMA_DC_BLOCK_INDEX){
4582 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4583 total_coeff= coeff_token>>2;
4585 if(n == LUMA_DC_BLOCK_INDEX){
4586 total_coeff= pred_non_zero_count(h, 0);
4587 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4588 total_coeff= coeff_token>>2;
4590 total_coeff= pred_non_zero_count(h, n);
4591 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4592 total_coeff= coeff_token>>2;
4593 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4597 //FIXME set last_non_zero?
4602 trailing_ones= coeff_token&3;
4603 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4604 assert(total_coeff<=16);
4606 for(i=0; i<trailing_ones; i++){
4607 level[i]= 1 - 2*get_bits1(gb);
4611 int level_code, mask;
4612 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4613 int prefix= get_level_prefix(gb);
4615 //first coefficient has suffix_length equal to 0 or 1
4616 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4618 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4620 level_code= (prefix<<suffix_length); //part
4621 }else if(prefix==14){
4623 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4625 level_code= prefix + get_bits(gb, 4); //part
4626 }else if(prefix==15){
4627 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4628 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4630 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4634 if(trailing_ones < 3) level_code += 2;
4639 mask= -(level_code&1);
4640 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4643 //remaining coefficients have suffix_length > 0
4644 for(;i<total_coeff;i++) {
4645 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4646 prefix = get_level_prefix(gb);
4648 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4649 }else if(prefix==15){
4650 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4652 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4655 mask= -(level_code&1);
4656 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4657 if(level_code > suffix_limit[suffix_length])
4662 if(total_coeff == max_coeff)
4665 if(n == CHROMA_DC_BLOCK_INDEX)
4666 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4668 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4671 coeff_num = zeros_left + total_coeff - 1;
4672 j = scantable[coeff_num];
4674 block[j] = level[0];
4675 for(i=1;i<total_coeff;i++) {
4678 else if(zeros_left < 7){
4679 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4681 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4683 zeros_left -= run_before;
4684 coeff_num -= 1 + run_before;
4685 j= scantable[ coeff_num ];
4690 block[j] = (level[0] * qmul[j] + 32)>>6;
4691 for(i=1;i<total_coeff;i++) {
4694 else if(zeros_left < 7){
4695 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4697 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4699 zeros_left -= run_before;
4700 coeff_num -= 1 + run_before;
4701 j= scantable[ coeff_num ];
4703 block[j]= (level[i] * qmul[j] + 32)>>6;
4708 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4716 * decodes a P_SKIP or B_SKIP macroblock
4718 static void decode_mb_skip(H264Context *h){
4719 MpegEncContext * const s = &h->s;
4720 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4723 memset(h->non_zero_count[mb_xy], 0, 16);
4724 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4726 if(h->mb_aff_frame && s->mb_skip_run==0 && (s->mb_y&1)==0){
4727 h->mb_field_decoding_flag= get_bits1(&s->gb);
4729 if(h->mb_field_decoding_flag)
4730 mb_type|= MB_TYPE_INTERLACED;
4732 if( h->slice_type == B_TYPE )
4734 // just for fill_caches. pred_direct_motion will set the real mb_type
4735 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4737 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4738 pred_direct_motion(h, &mb_type);
4740 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4741 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
4747 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4749 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4750 pred_pskip_motion(h, &mx, &my);
4751 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4752 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4754 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4757 write_back_motion(h, mb_type);
4758 s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP;
4759 s->current_picture.qscale_table[mb_xy]= s->qscale;
4760 h->slice_table[ mb_xy ]= h->slice_num;
4761 h->prev_mb_skipped= 1;
4765 * decodes a macroblock
4766 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4768 static int decode_mb_cavlc(H264Context *h){
4769 MpegEncContext * const s = &h->s;
4770 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4771 int mb_type, partition_count, cbp;
4772 int dct8x8_allowed= h->pps.transform_8x8_mode;
4774 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4776 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4777 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4779 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4780 if(s->mb_skip_run==-1)
4781 s->mb_skip_run= get_ue_golomb(&s->gb);
4783 if (s->mb_skip_run--) {
4788 if(h->mb_aff_frame){
4789 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
4790 h->mb_field_decoding_flag = get_bits1(&s->gb);
4792 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4794 h->prev_mb_skipped= 0;
4796 mb_type= get_ue_golomb(&s->gb);
4797 if(h->slice_type == B_TYPE){
4799 partition_count= b_mb_type_info[mb_type].partition_count;
4800 mb_type= b_mb_type_info[mb_type].type;
4803 goto decode_intra_mb;
4805 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4807 partition_count= p_mb_type_info[mb_type].partition_count;
4808 mb_type= p_mb_type_info[mb_type].type;
4811 goto decode_intra_mb;
4814 assert(h->slice_type == I_TYPE);
4817 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4821 cbp= i_mb_type_info[mb_type].cbp;
4822 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4823 mb_type= i_mb_type_info[mb_type].type;
4826 if(h->mb_field_decoding_flag)
4827 mb_type |= MB_TYPE_INTERLACED;
4829 h->slice_table[ mb_xy ]= h->slice_num;
4831 if(IS_INTRA_PCM(mb_type)){
4834 // we assume these blocks are very rare so we dont optimize it
4835 align_get_bits(&s->gb);
4837 // The pixels are stored in the same order as levels in h->mb array.
4838 for(y=0; y<16; y++){
4839 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4840 for(x=0; x<16; x++){
4841 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4842 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4846 const int index= 256 + 4*(y&3) + 32*(y>>2);
4848 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4849 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4853 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4855 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4856 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4860 // In deblocking, the quantizer is 0
4861 s->current_picture.qscale_table[mb_xy]= 0;
4862 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
4863 // All coeffs are present
4864 memset(h->non_zero_count[mb_xy], 16, 16);
4866 s->current_picture.mb_type[mb_xy]= mb_type;
4870 fill_caches(h, mb_type, 0);
4873 if(IS_INTRA(mb_type)){
4874 // init_top_left_availability(h);
4875 if(IS_INTRA4x4(mb_type)){
4878 if(dct8x8_allowed && get_bits1(&s->gb)){
4879 mb_type |= MB_TYPE_8x8DCT;
4883 // fill_intra4x4_pred_table(h);
4884 for(i=0; i<16; i+=di){
4885 const int mode_coded= !get_bits1(&s->gb);
4886 const int predicted_mode= pred_intra_mode(h, i);
4890 const int rem_mode= get_bits(&s->gb, 3);
4891 if(rem_mode<predicted_mode)
4896 mode= predicted_mode;
4900 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4902 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4904 write_back_intra_pred_mode(h);
4905 if( check_intra4x4_pred_mode(h) < 0)
4908 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4909 if(h->intra16x16_pred_mode < 0)
4912 h->chroma_pred_mode= get_ue_golomb(&s->gb);
4914 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
4915 if(h->chroma_pred_mode < 0)
4917 }else if(partition_count==4){
4918 int i, j, sub_partition_count[4], list, ref[2][4];
4920 if(h->slice_type == B_TYPE){
4922 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4923 if(h->sub_mb_type[i] >=13){
4924 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4927 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4928 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4930 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4931 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4932 pred_direct_motion(h, &mb_type);
4933 h->ref_cache[0][scan8[4]] =
4934 h->ref_cache[1][scan8[4]] =
4935 h->ref_cache[0][scan8[12]] =
4936 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4939 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4941 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4942 if(h->sub_mb_type[i] >=4){
4943 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4946 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4947 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4951 for(list=0; list<2; list++){
4952 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4953 if(ref_count == 0) continue;
4954 if (h->mb_aff_frame && h->mb_field_decoding_flag) {
4958 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4959 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4960 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4969 dct8x8_allowed = get_dct8x8_allowed(h);
4971 for(list=0; list<2; list++){
4972 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4973 if(ref_count == 0) continue;
4976 if(IS_DIRECT(h->sub_mb_type[i])) {
4977 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4980 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4981 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4983 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4984 const int sub_mb_type= h->sub_mb_type[i];
4985 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4986 for(j=0; j<sub_partition_count[i]; j++){
4988 const int index= 4*i + block_width*j;
4989 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4990 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4991 mx += get_se_golomb(&s->gb);
4992 my += get_se_golomb(&s->gb);
4993 tprintf("final mv:%d %d\n", mx, my);
4995 if(IS_SUB_8X8(sub_mb_type)){
4996 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
4997 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4998 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
4999 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5000 }else if(IS_SUB_8X4(sub_mb_type)){
5001 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5002 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5003 }else if(IS_SUB_4X8(sub_mb_type)){
5004 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5005 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5007 assert(IS_SUB_4X4(sub_mb_type));
5008 mv_cache[ 0 ][0]= mx;
5009 mv_cache[ 0 ][1]= my;
5013 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5019 }else if(IS_DIRECT(mb_type)){
5020 pred_direct_motion(h, &mb_type);
5021 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5023 int list, mx, my, i;
5024 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5025 if(IS_16X16(mb_type)){
5026 for(list=0; list<2; list++){
5027 if(h->ref_count[list]>0){
5028 if(IS_DIR(mb_type, 0, list)){
5029 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5030 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5032 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
5035 for(list=0; list<2; list++){
5036 if(IS_DIR(mb_type, 0, list)){
5037 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5038 mx += get_se_golomb(&s->gb);
5039 my += get_se_golomb(&s->gb);
5040 tprintf("final mv:%d %d\n", mx, my);
5042 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5044 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5047 else if(IS_16X8(mb_type)){
5048 for(list=0; list<2; list++){
5049 if(h->ref_count[list]>0){
5051 if(IS_DIR(mb_type, i, list)){
5052 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5053 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5055 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5059 for(list=0; list<2; list++){
5061 if(IS_DIR(mb_type, i, list)){
5062 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5063 mx += get_se_golomb(&s->gb);
5064 my += get_se_golomb(&s->gb);
5065 tprintf("final mv:%d %d\n", mx, my);
5067 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5069 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5073 assert(IS_8X16(mb_type));
5074 for(list=0; list<2; list++){
5075 if(h->ref_count[list]>0){
5077 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5078 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5079 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5081 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5085 for(list=0; list<2; list++){
5087 if(IS_DIR(mb_type, i, list)){
5088 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5089 mx += get_se_golomb(&s->gb);
5090 my += get_se_golomb(&s->gb);
5091 tprintf("final mv:%d %d\n", mx, my);
5093 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5095 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5101 if(IS_INTER(mb_type))
5102 write_back_motion(h, mb_type);
5104 if(!IS_INTRA16x16(mb_type)){
5105 cbp= get_ue_golomb(&s->gb);
5107 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5111 if(IS_INTRA4x4(mb_type))
5112 cbp= golomb_to_intra4x4_cbp[cbp];
5114 cbp= golomb_to_inter_cbp[cbp];
5117 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5118 if(get_bits1(&s->gb))
5119 mb_type |= MB_TYPE_8x8DCT;
5121 s->current_picture.mb_type[mb_xy]= mb_type;
5123 if(cbp || IS_INTRA16x16(mb_type)){
5124 int i8x8, i4x4, chroma_idx;
5125 int chroma_qp, dquant;
5126 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5127 const uint8_t *scan, *scan8x8, *dc_scan;
5129 // fill_non_zero_count_cache(h);
5131 if(IS_INTERLACED(mb_type)){
5132 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5133 dc_scan= luma_dc_field_scan;
5135 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5136 dc_scan= luma_dc_zigzag_scan;
5138 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5140 dquant= get_se_golomb(&s->gb);
5142 if( dquant > 25 || dquant < -26 ){
5143 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5147 s->qscale += dquant;
5148 if(((unsigned)s->qscale) > 51){
5149 if(s->qscale<0) s->qscale+= 52;
5150 else s->qscale-= 52;
5153 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5154 if(IS_INTRA16x16(mb_type)){
5155 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5156 return -1; //FIXME continue if partitioned and other return -1 too
5159 assert((cbp&15) == 0 || (cbp&15) == 15);
5162 for(i8x8=0; i8x8<4; i8x8++){
5163 for(i4x4=0; i4x4<4; i4x4++){
5164 const int index= i4x4 + 4*i8x8;
5165 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5171 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5174 for(i8x8=0; i8x8<4; i8x8++){
5175 if(cbp & (1<<i8x8)){
5176 if(IS_8x8DCT(mb_type)){
5177 DCTELEM *buf = &h->mb[64*i8x8];
5179 for(i4x4=0; i4x4<4; i4x4++){
5180 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5181 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5184 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5185 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5187 for(i4x4=0; i4x4<4; i4x4++){
5188 const int index= i4x4 + 4*i8x8;
5190 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5196 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5197 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5203 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5204 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5210 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5211 for(i4x4=0; i4x4<4; i4x4++){
5212 const int index= 16 + 4*chroma_idx + i4x4;
5213 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5219 uint8_t * const nnz= &h->non_zero_count_cache[0];
5220 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5221 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5224 uint8_t * const nnz= &h->non_zero_count_cache[0];
5225 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5226 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5227 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5229 s->current_picture.qscale_table[mb_xy]= s->qscale;
5230 write_back_non_zero_count(h);
5235 static int decode_cabac_field_decoding_flag(H264Context *h) {
5236 MpegEncContext * const s = &h->s;
5237 const int mb_x = s->mb_x;
5238 const int mb_y = s->mb_y & ~1;
5239 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5240 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5242 unsigned int ctx = 0;
5244 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5247 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5251 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5254 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5255 uint8_t *state= &h->cabac_state[ctx_base];
5259 MpegEncContext * const s = &h->s;
5260 const int mba_xy = h->left_mb_xy[0];
5261 const int mbb_xy = h->top_mb_xy;
5263 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5265 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5267 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5268 return 0; /* I4x4 */
5271 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5272 return 0; /* I4x4 */
5275 if( get_cabac_terminate( &h->cabac ) )
5276 return 25; /* PCM */
5278 mb_type = 1; /* I16x16 */
5279 mb_type += 12 * get_cabac( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5280 if( get_cabac( &h->cabac, &state[2] ) ) /* cbp_chroma */
5281 mb_type += 4 + 4 * get_cabac( &h->cabac, &state[2+intra_slice] );
5282 mb_type += 2 * get_cabac( &h->cabac, &state[3+intra_slice] );
5283 mb_type += 1 * get_cabac( &h->cabac, &state[3+2*intra_slice] );
5287 static int decode_cabac_mb_type( H264Context *h ) {
5288 MpegEncContext * const s = &h->s;
5290 if( h->slice_type == I_TYPE ) {
5291 return decode_cabac_intra_mb_type(h, 3, 1);
5292 } else if( h->slice_type == P_TYPE ) {
5293 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5295 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5296 /* P_L0_D16x16, P_8x8 */
5297 return 3 * get_cabac( &h->cabac, &h->cabac_state[16] );
5299 /* P_L0_D8x16, P_L0_D16x8 */
5300 return 2 - get_cabac( &h->cabac, &h->cabac_state[17] );
5303 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5305 } else if( h->slice_type == B_TYPE ) {
5306 const int mba_xy = h->left_mb_xy[0];
5307 const int mbb_xy = h->top_mb_xy;
5311 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5313 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5316 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5317 return 0; /* B_Direct_16x16 */
5319 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5320 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5323 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5324 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5325 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5326 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5328 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5329 else if( bits == 13 ) {
5330 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5331 } else if( bits == 14 )
5332 return 11; /* B_L1_L0_8x16 */
5333 else if( bits == 15 )
5334 return 22; /* B_8x8 */
5336 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5337 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5339 /* TODO SI/SP frames? */
5344 static int decode_cabac_mb_skip( H264Context *h) {
5345 MpegEncContext * const s = &h->s;
5346 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5347 const int mba_xy = mb_xy - 1;
5348 const int mbb_xy = mb_xy - s->mb_stride;
5351 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5353 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5356 if( h->slice_type == B_TYPE )
5358 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5361 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5364 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5367 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5368 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5369 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5371 if( mode >= pred_mode )
5377 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5378 const int mba_xy = h->left_mb_xy[0];
5379 const int mbb_xy = h->top_mb_xy;
5383 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5384 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5387 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5390 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5393 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5395 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5401 static const uint8_t block_idx_x[16] = {
5402 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5404 static const uint8_t block_idx_y[16] = {
5405 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5407 static const uint8_t block_idx_xy[4][4] = {
5414 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5419 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5421 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5424 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5429 x = block_idx_x[4*i8x8];
5430 y = block_idx_y[4*i8x8];
5434 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5435 cbp_a = h->left_cbp;
5436 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5442 /* No need to test for skip as we put 0 for skip block */
5443 /* No need to test for IPCM as we put 1 for IPCM block */
5445 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5446 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5451 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5452 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5456 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5462 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5466 cbp_a = (h->left_cbp>>4)&0x03;
5467 cbp_b = (h-> top_cbp>>4)&0x03;
5470 if( cbp_a > 0 ) ctx++;
5471 if( cbp_b > 0 ) ctx += 2;
5472 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5476 if( cbp_a == 2 ) ctx++;
5477 if( cbp_b == 2 ) ctx += 2;
5478 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5480 static int decode_cabac_mb_dqp( H264Context *h) {
5481 MpegEncContext * const s = &h->s;
5487 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5489 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5491 if( h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) )
5494 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5500 if(val > 102) //prevent infinite loop
5507 return -(val + 1)/2;
5509 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5510 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5512 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5514 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5518 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5520 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5521 return 0; /* B_Direct_8x8 */
5522 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5523 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5525 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5526 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5527 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5530 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5531 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5535 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5536 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5539 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5540 int refa = h->ref_cache[list][scan8[n] - 1];
5541 int refb = h->ref_cache[list][scan8[n] - 8];
5545 if( h->slice_type == B_TYPE) {
5546 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5548 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5557 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5567 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5568 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5569 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5570 int ctxbase = (l == 0) ? 40 : 47;
5575 else if( amvd > 32 )
5580 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5585 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5593 while( get_cabac_bypass( &h->cabac ) ) {
5598 if( get_cabac_bypass( &h->cabac ) )
5602 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
5606 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5611 nza = h->left_cbp&0x100;
5612 nzb = h-> top_cbp&0x100;
5613 } else if( cat == 1 || cat == 2 ) {
5614 nza = h->non_zero_count_cache[scan8[idx] - 1];
5615 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5616 } else if( cat == 3 ) {
5617 nza = (h->left_cbp>>(6+idx))&0x01;
5618 nzb = (h-> top_cbp>>(6+idx))&0x01;
5621 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5622 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5631 return ctx + 4 * cat;
5634 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5635 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5636 static const int significant_coeff_flag_field_offset[2] = { 105, 277 };
5637 static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 };
5638 static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 };
5639 static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 };
5640 static const int coeff_abs_level_m1_offset[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 };
5641 static const int significant_coeff_flag_offset_8x8[63] = {
5642 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5643 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5644 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5645 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
5647 static const int last_coeff_flag_offset_8x8[63] = {
5648 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5649 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5650 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5651 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5657 int coeff_count = 0;
5660 int abslevelgt1 = 0;
5662 uint8_t *significant_coeff_ctx_base;
5663 uint8_t *last_coeff_ctx_base;
5664 uint8_t *abs_level_m1_ctx_base;
5666 /* cat: 0-> DC 16x16 n = 0
5667 * 1-> AC 16x16 n = luma4x4idx
5668 * 2-> Luma4x4 n = luma4x4idx
5669 * 3-> DC Chroma n = iCbCr
5670 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5671 * 5-> Luma8x8 n = 4 * luma8x8idx
5674 /* read coded block flag */
5676 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5677 if( cat == 1 || cat == 2 )
5678 h->non_zero_count_cache[scan8[n]] = 0;
5680 h->non_zero_count_cache[scan8[16+n]] = 0;
5686 significant_coeff_ctx_base = h->cabac_state
5687 + significant_coeff_flag_offset[cat]
5688 + significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5689 last_coeff_ctx_base = h->cabac_state
5690 + last_significant_coeff_flag_offset[cat]
5691 + last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5692 abs_level_m1_ctx_base = h->cabac_state
5693 + coeff_abs_level_m1_offset[cat];
5696 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5697 for(last= 0; last < coefs; last++) { \
5698 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5699 if( get_cabac( &h->cabac, sig_ctx )) { \
5700 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5701 index[coeff_count++] = last; \
5702 if( get_cabac( &h->cabac, last_ctx ) ) { \
5708 DECODE_SIGNIFICANCE( 63, significant_coeff_flag_offset_8x8[last],
5709 last_coeff_flag_offset_8x8[last] );
5711 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5713 if( last == max_coeff -1 ) {
5714 index[coeff_count++] = last;
5716 assert(coeff_count > 0);
5719 h->cbp_table[mb_xy] |= 0x100;
5720 else if( cat == 1 || cat == 2 )
5721 h->non_zero_count_cache[scan8[n]] = coeff_count;
5723 h->cbp_table[mb_xy] |= 0x40 << n;
5725 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5728 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5731 for( i = coeff_count - 1; i >= 0; i-- ) {
5732 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5733 int j= scantable[index[i]];
5735 if( get_cabac( &h->cabac, ctx ) == 0 ) {
5737 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
5740 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6;
5741 else block[j] = ( qmul[j] + 32) >> 6;
5747 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5748 while( coeff_abs < 15 && get_cabac( &h->cabac, ctx ) ) {
5752 if( coeff_abs >= 15 ) {
5754 while( get_cabac_bypass( &h->cabac ) ) {
5755 coeff_abs += 1 << j;
5760 if( get_cabac_bypass( &h->cabac ) )
5761 coeff_abs += 1 << j ;
5766 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
5767 else block[j] = coeff_abs;
5769 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5770 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5779 static void inline compute_mb_neighbors(H264Context *h)
5781 MpegEncContext * const s = &h->s;
5782 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5783 h->top_mb_xy = mb_xy - s->mb_stride;
5784 h->left_mb_xy[0] = mb_xy - 1;
5785 if(h->mb_aff_frame){
5786 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5787 const int top_pair_xy = pair_xy - s->mb_stride;
5788 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5789 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5790 const int curr_mb_frame_flag = !h->mb_field_decoding_flag;
5791 const int bottom = (s->mb_y & 1);
5793 ? !curr_mb_frame_flag // bottom macroblock
5794 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5796 h->top_mb_xy -= s->mb_stride;
5798 if (left_mb_frame_flag != curr_mb_frame_flag) {
5799 h->left_mb_xy[0] = pair_xy - 1;
5806 * decodes a macroblock
5807 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5809 static int decode_mb_cabac(H264Context *h) {
5810 MpegEncContext * const s = &h->s;
5811 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5812 int mb_type, partition_count, cbp = 0;
5813 int dct8x8_allowed= h->pps.transform_8x8_mode;
5815 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5817 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5818 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5819 /* read skip flags */
5820 if( decode_cabac_mb_skip( h ) ) {
5823 h->cbp_table[mb_xy] = 0;
5824 h->chroma_pred_mode_table[mb_xy] = 0;
5825 h->last_qscale_diff = 0;
5831 if(h->mb_aff_frame){
5832 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
5833 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5835 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5837 h->prev_mb_skipped = 0;
5839 compute_mb_neighbors(h);
5840 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5841 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5845 if( h->slice_type == B_TYPE ) {
5847 partition_count= b_mb_type_info[mb_type].partition_count;
5848 mb_type= b_mb_type_info[mb_type].type;
5851 goto decode_intra_mb;
5853 } else if( h->slice_type == P_TYPE ) {
5855 partition_count= p_mb_type_info[mb_type].partition_count;
5856 mb_type= p_mb_type_info[mb_type].type;
5859 goto decode_intra_mb;
5862 assert(h->slice_type == I_TYPE);
5864 partition_count = 0;
5865 cbp= i_mb_type_info[mb_type].cbp;
5866 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5867 mb_type= i_mb_type_info[mb_type].type;
5869 if(h->mb_field_decoding_flag)
5870 mb_type |= MB_TYPE_INTERLACED;
5872 h->slice_table[ mb_xy ]= h->slice_num;
5874 if(IS_INTRA_PCM(mb_type)) {
5878 // We assume these blocks are very rare so we dont optimize it.
5879 // FIXME The two following lines get the bitstream position in the cabac
5880 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5881 ptr= h->cabac.bytestream;
5882 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
5884 // The pixels are stored in the same order as levels in h->mb array.
5885 for(y=0; y<16; y++){
5886 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5887 for(x=0; x<16; x++){
5888 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
5889 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5893 const int index= 256 + 4*(y&3) + 32*(y>>2);
5895 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5896 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5900 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5902 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5903 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5907 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5909 // All blocks are present
5910 h->cbp_table[mb_xy] = 0x1ef;
5911 h->chroma_pred_mode_table[mb_xy] = 0;
5912 // In deblocking, the quantizer is 0
5913 s->current_picture.qscale_table[mb_xy]= 0;
5914 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5915 // All coeffs are present
5916 memset(h->non_zero_count[mb_xy], 16, 16);
5917 s->current_picture.mb_type[mb_xy]= mb_type;
5921 fill_caches(h, mb_type, 0);
5923 if( IS_INTRA( mb_type ) ) {
5925 if( IS_INTRA4x4( mb_type ) ) {
5926 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5927 mb_type |= MB_TYPE_8x8DCT;
5928 for( i = 0; i < 16; i+=4 ) {
5929 int pred = pred_intra_mode( h, i );
5930 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5931 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5934 for( i = 0; i < 16; i++ ) {
5935 int pred = pred_intra_mode( h, i );
5936 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5938 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5941 write_back_intra_pred_mode(h);
5942 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5944 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5945 if( h->intra16x16_pred_mode < 0 ) return -1;
5947 h->chroma_pred_mode_table[mb_xy] =
5948 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5950 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
5951 if( h->chroma_pred_mode < 0 ) return -1;
5952 } else if( partition_count == 4 ) {
5953 int i, j, sub_partition_count[4], list, ref[2][4];
5955 if( h->slice_type == B_TYPE ) {
5956 for( i = 0; i < 4; i++ ) {
5957 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5958 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5959 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5961 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5962 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5963 pred_direct_motion(h, &mb_type);
5964 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5965 for( i = 0; i < 4; i++ )
5966 if( IS_DIRECT(h->sub_mb_type[i]) )
5967 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5971 for( i = 0; i < 4; i++ ) {
5972 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5973 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5974 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5978 for( list = 0; list < 2; list++ ) {
5979 if( h->ref_count[list] > 0 ) {
5980 for( i = 0; i < 4; i++ ) {
5981 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5982 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5983 if( h->ref_count[list] > 1 )
5984 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5990 h->ref_cache[list][ scan8[4*i]+1 ]=
5991 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5997 dct8x8_allowed = get_dct8x8_allowed(h);
5999 for(list=0; list<2; list++){
6001 if(IS_DIRECT(h->sub_mb_type[i])){
6002 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6005 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6007 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6008 const int sub_mb_type= h->sub_mb_type[i];
6009 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6010 for(j=0; j<sub_partition_count[i]; j++){
6013 const int index= 4*i + block_width*j;
6014 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6015 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6016 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6018 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6019 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6020 tprintf("final mv:%d %d\n", mx, my);
6022 if(IS_SUB_8X8(sub_mb_type)){
6023 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
6024 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6025 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
6026 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6028 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
6029 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6030 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
6031 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6032 }else if(IS_SUB_8X4(sub_mb_type)){
6033 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
6034 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
6036 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
6037 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
6038 }else if(IS_SUB_4X8(sub_mb_type)){
6039 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
6040 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
6042 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
6043 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
6045 assert(IS_SUB_4X4(sub_mb_type));
6046 mv_cache[ 0 ][0]= mx;
6047 mv_cache[ 0 ][1]= my;
6049 mvd_cache[ 0 ][0]= mx - mpx;
6050 mvd_cache[ 0 ][1]= my - mpy;
6054 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6055 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6056 p[0] = p[1] = p[8] = p[9] = 0;
6057 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6061 } else if( IS_DIRECT(mb_type) ) {
6062 pred_direct_motion(h, &mb_type);
6063 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6064 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6065 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6067 int list, mx, my, i, mpx, mpy;
6068 if(IS_16X16(mb_type)){
6069 for(list=0; list<2; list++){
6070 if(IS_DIR(mb_type, 0, list)){
6071 if(h->ref_count[list] > 0 ){
6072 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6073 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6076 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6078 for(list=0; list<2; list++){
6079 if(IS_DIR(mb_type, 0, list)){
6080 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6082 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6083 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6084 tprintf("final mv:%d %d\n", mx, my);
6086 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6087 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6089 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6092 else if(IS_16X8(mb_type)){
6093 for(list=0; list<2; list++){
6094 if(h->ref_count[list]>0){
6096 if(IS_DIR(mb_type, i, list)){
6097 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6098 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6100 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6104 for(list=0; list<2; list++){
6106 if(IS_DIR(mb_type, i, list)){
6107 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6108 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6109 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6110 tprintf("final mv:%d %d\n", mx, my);
6112 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6113 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6115 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6116 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6121 assert(IS_8X16(mb_type));
6122 for(list=0; list<2; list++){
6123 if(h->ref_count[list]>0){
6125 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6126 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6127 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6129 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6133 for(list=0; list<2; list++){
6135 if(IS_DIR(mb_type, i, list)){
6136 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6137 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6138 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6140 tprintf("final mv:%d %d\n", mx, my);
6141 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6142 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6144 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6145 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6152 if( IS_INTER( mb_type ) ) {
6153 h->chroma_pred_mode_table[mb_xy] = 0;
6154 write_back_motion( h, mb_type );
6157 if( !IS_INTRA16x16( mb_type ) ) {
6158 cbp = decode_cabac_mb_cbp_luma( h );
6159 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6162 h->cbp_table[mb_xy] = cbp;
6164 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6165 if( decode_cabac_mb_transform_size( h ) )
6166 mb_type |= MB_TYPE_8x8DCT;
6168 s->current_picture.mb_type[mb_xy]= mb_type;
6170 if( cbp || IS_INTRA16x16( mb_type ) ) {
6171 const uint8_t *scan, *scan8x8, *dc_scan;
6174 if(IS_INTERLACED(mb_type)){
6175 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6176 dc_scan= luma_dc_field_scan;
6178 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6179 dc_scan= luma_dc_zigzag_scan;
6181 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6183 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6184 if( dqp == INT_MIN ){
6185 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6189 if(((unsigned)s->qscale) > 51){
6190 if(s->qscale<0) s->qscale+= 52;
6191 else s->qscale-= 52;
6193 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6195 if( IS_INTRA16x16( mb_type ) ) {
6197 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6198 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6201 for( i = 0; i < 16; i++ ) {
6202 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6203 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6207 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6211 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6212 if( cbp & (1<<i8x8) ) {
6213 if( IS_8x8DCT(mb_type) ) {
6214 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6215 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6218 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6219 const int index = 4*i8x8 + i4x4;
6220 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6221 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6225 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6226 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6233 for( c = 0; c < 2; c++ ) {
6234 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6235 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6242 for( c = 0; c < 2; c++ ) {
6243 for( i = 0; i < 4; i++ ) {
6244 const int index = 16 + 4 * c + i;
6245 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6246 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6251 uint8_t * const nnz= &h->non_zero_count_cache[0];
6252 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6253 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6256 uint8_t * const nnz= &h->non_zero_count_cache[0];
6257 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6258 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6259 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6262 s->current_picture.qscale_table[mb_xy]= s->qscale;
6263 write_back_non_zero_count(h);
6269 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6271 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6272 const int alpha = alpha_table[index_a];
6273 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6278 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6279 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6281 /* 16px edge length, because bS=4 is triggered by being at
6282 * the edge of an intra MB, so all 4 bS are the same */
6283 for( d = 0; d < 16; d++ ) {
6284 const int p0 = pix[-1];
6285 const int p1 = pix[-2];
6286 const int p2 = pix[-3];
6288 const int q0 = pix[0];
6289 const int q1 = pix[1];
6290 const int q2 = pix[2];
6292 if( ABS( p0 - q0 ) < alpha &&
6293 ABS( p1 - p0 ) < beta &&
6294 ABS( q1 - q0 ) < beta ) {
6296 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6297 if( ABS( p2 - p0 ) < beta)
6299 const int p3 = pix[-4];
6301 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6302 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6303 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6306 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6308 if( ABS( q2 - q0 ) < beta)
6310 const int q3 = pix[3];
6312 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6313 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6314 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6317 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6321 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6322 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6324 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6330 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6332 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6333 const int alpha = alpha_table[index_a];
6334 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6339 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6340 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6342 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6346 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6348 for( i = 0; i < 16; i++, pix += stride) {
6354 int bS_index = (i >> 1);
6355 if (h->mb_field_decoding_flag) {
6357 bS_index |= (i & 1);
6360 if( bS[bS_index] == 0 ) {
6364 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6365 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6366 alpha = alpha_table[index_a];
6367 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6370 if( bS[bS_index] < 4 ) {
6371 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6372 /* 4px edge length */
6373 const int p0 = pix[-1];
6374 const int p1 = pix[-2];
6375 const int p2 = pix[-3];
6376 const int q0 = pix[0];
6377 const int q1 = pix[1];
6378 const int q2 = pix[2];
6380 if( ABS( p0 - q0 ) < alpha &&
6381 ABS( p1 - p0 ) < beta &&
6382 ABS( q1 - q0 ) < beta ) {
6386 if( ABS( p2 - p0 ) < beta ) {
6387 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6390 if( ABS( q2 - q0 ) < beta ) {
6391 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6395 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6396 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6397 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6398 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6401 /* 4px edge length */
6402 const int p0 = pix[-1];
6403 const int p1 = pix[-2];
6404 const int p2 = pix[-3];
6406 const int q0 = pix[0];
6407 const int q1 = pix[1];
6408 const int q2 = pix[2];
6410 if( ABS( p0 - q0 ) < alpha &&
6411 ABS( p1 - p0 ) < beta &&
6412 ABS( q1 - q0 ) < beta ) {
6414 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6415 if( ABS( p2 - p0 ) < beta)
6417 const int p3 = pix[-4];
6419 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6420 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6421 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6424 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6426 if( ABS( q2 - q0 ) < beta)
6428 const int q3 = pix[3];
6430 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6431 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6432 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6435 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6439 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6440 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6442 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6447 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp[2] ) {
6449 for( i = 0; i < 8; i++, pix += stride) {
6457 if( bS[bS_index] == 0 ) {
6461 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6462 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6463 alpha = alpha_table[index_a];
6464 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6465 if( bS[bS_index] < 4 ) {
6466 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6467 /* 2px edge length (because we use same bS than the one for luma) */
6468 const int p0 = pix[-1];
6469 const int p1 = pix[-2];
6470 const int q0 = pix[0];
6471 const int q1 = pix[1];
6473 if( ABS( p0 - q0 ) < alpha &&
6474 ABS( p1 - p0 ) < beta &&
6475 ABS( q1 - q0 ) < beta ) {
6476 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6478 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6479 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6480 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6483 const int p0 = pix[-1];
6484 const int p1 = pix[-2];
6485 const int q0 = pix[0];
6486 const int q1 = pix[1];
6488 if( ABS( p0 - q0 ) < alpha &&
6489 ABS( p1 - p0 ) < beta &&
6490 ABS( q1 - q0 ) < beta ) {
6492 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6493 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6494 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6500 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6502 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6503 const int alpha = alpha_table[index_a];
6504 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6505 const int pix_next = stride;
6510 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6511 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6513 /* 16px edge length, see filter_mb_edgev */
6514 for( d = 0; d < 16; d++ ) {
6515 const int p0 = pix[-1*pix_next];
6516 const int p1 = pix[-2*pix_next];
6517 const int p2 = pix[-3*pix_next];
6518 const int q0 = pix[0];
6519 const int q1 = pix[1*pix_next];
6520 const int q2 = pix[2*pix_next];
6522 if( ABS( p0 - q0 ) < alpha &&
6523 ABS( p1 - p0 ) < beta &&
6524 ABS( q1 - q0 ) < beta ) {
6526 const int p3 = pix[-4*pix_next];
6527 const int q3 = pix[ 3*pix_next];
6529 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6530 if( ABS( p2 - p0 ) < beta) {
6532 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6533 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6534 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6537 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6539 if( ABS( q2 - q0 ) < beta) {
6541 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6542 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6543 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6546 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6550 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6551 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6553 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6560 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6562 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6563 const int alpha = alpha_table[index_a];
6564 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6569 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6570 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6572 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6576 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6577 MpegEncContext * const s = &h->s;
6578 const int mb_xy= mb_x + mb_y*s->mb_stride;
6579 int first_vertical_edge_done = 0;
6581 /* FIXME: A given frame may occupy more than one position in
6582 * the reference list. So ref2frm should be populated with
6583 * frame numbers, not indices. */
6584 static const int ref2frm[18] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6586 //for sufficiently low qp, filtering wouldn't do anything
6587 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6588 if(!h->mb_aff_frame){
6589 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
6590 int qp = s->current_picture.qscale_table[mb_xy];
6592 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6593 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6599 // left mb is in picture
6600 && h->slice_table[mb_xy-1] != 255
6601 // and current and left pair do not have the same interlaced type
6602 && (IS_INTERLACED(s->current_picture.mb_type[mb_xy]) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6603 // and left mb is in the same slice if deblocking_filter == 2
6604 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6605 /* First vertical edge is different in MBAFF frames
6606 * There are 8 different bS to compute and 2 different Qp
6613 first_vertical_edge_done = 1;
6614 for( i = 0; i < 8; i++ ) {
6616 int b_idx= 8 + 4 + 8*y;
6617 int bn_idx= b_idx - 1;
6619 int mbn_xy = h->mb_field_decoding_flag ? h->left_mb_xy[i>>2] : h->left_mb_xy[i&1];
6621 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6622 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6624 } else if( h->non_zero_count_cache[b_idx] != 0 ||
6625 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6626 h->non_zero_count_cache[bn_idx] != 0 ) {
6631 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6632 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6633 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6634 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6641 if(bS[0]+bS[1]+bS[2]+bS[3] != 0) {
6642 // Do not use s->qscale as luma quantizer because it has not the same
6643 // value in IPCM macroblocks.
6644 qp[0] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[0]] + 1 ) >> 1;
6645 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6646 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[0]] ) + 1 ) >> 1;
6647 qp[1] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[1]] + 1 ) >> 1;
6648 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6649 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[1]] ) + 1 ) >> 1;
6652 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
6653 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6654 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6655 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
6656 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
6659 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6660 for( dir = 0; dir < 2; dir++ )
6663 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6664 const int mb_type = s->current_picture.mb_type[mb_xy];
6665 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6666 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6668 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6669 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6670 // how often to recheck mv-based bS when iterating between edges
6671 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6672 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6673 // how often to recheck mv-based bS when iterating along each edge
6674 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6676 if (first_vertical_edge_done) {
6678 first_vertical_edge_done = 0;
6681 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6685 for( edge = start; edge < edges; edge++ ) {
6686 /* mbn_xy: neighbor macroblock */
6687 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6688 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6692 if( (edge&1) && IS_8x8DCT(mb_type) )
6695 if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0)
6696 && !IS_INTERLACED(mb_type)
6697 && IS_INTERLACED(mbn_type)
6699 // This is a special case in the norm where the filtering must
6700 // be done twice (one each of the field) even if we are in a
6701 // frame macroblock.
6703 unsigned int tmp_linesize = 2 * linesize;
6704 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6705 int mbn_xy = mb_xy - 2 * s->mb_stride;
6709 if( IS_INTRA(mb_type) ||
6710 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6711 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6714 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6717 // Do not use s->qscale as luma quantizer because it has not the same
6718 // value in IPCM macroblocks.
6719 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6720 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6721 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6722 filter_mb_edgeh( h, &img_y[0], tmp_linesize, bS, qp );
6723 chroma_qp = ( h->chroma_qp +
6724 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6725 filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp );
6726 filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp );
6729 mbn_xy += s->mb_stride;
6730 if( IS_INTRA(mb_type) ||
6731 IS_INTRA(mbn_type) ) {
6732 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6735 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6738 // Do not use s->qscale as luma quantizer because it has not the same
6739 // value in IPCM macroblocks.
6740 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6741 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6742 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6743 filter_mb_edgeh( h, &img_y[linesize], tmp_linesize, bS, qp );
6744 chroma_qp = ( h->chroma_qp +
6745 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6746 filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6747 filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6750 if( IS_INTRA(mb_type) ||
6751 IS_INTRA(mbn_type) ) {
6754 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6755 || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6764 bS[0] = bS[1] = bS[2] = bS[3] = value;
6769 if( edge & mask_edge ) {
6770 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6773 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6774 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6775 int bn_idx= b_idx - (dir ? 8:1);
6777 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6778 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6779 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6780 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4;
6782 bS[0] = bS[1] = bS[2] = bS[3] = v;
6788 for( i = 0; i < 4; i++ ) {
6789 int x = dir == 0 ? edge : i;
6790 int y = dir == 0 ? i : edge;
6791 int b_idx= 8 + 4 + x + 8*y;
6792 int bn_idx= b_idx - (dir ? 8:1);
6794 if( h->non_zero_count_cache[b_idx] != 0 ||
6795 h->non_zero_count_cache[bn_idx] != 0 ) {
6801 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6802 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6803 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6804 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6812 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6817 // Do not use s->qscale as luma quantizer because it has not the same
6818 // value in IPCM macroblocks.
6819 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6820 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6821 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6822 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6824 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6825 if( (edge&1) == 0 ) {
6826 int chroma_qp = ( h->chroma_qp +
6827 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6828 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
6829 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
6832 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6833 if( (edge&1) == 0 ) {
6834 int chroma_qp = ( h->chroma_qp +
6835 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6836 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6837 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6844 static int decode_slice(H264Context *h){
6845 MpegEncContext * const s = &h->s;
6846 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6850 if( h->pps.cabac ) {
6854 align_get_bits( &s->gb );
6857 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
6858 ff_init_cabac_decoder( &h->cabac,
6859 s->gb.buffer + get_bits_count(&s->gb)/8,
6860 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6861 /* calculate pre-state */
6862 for( i= 0; i < 460; i++ ) {
6864 if( h->slice_type == I_TYPE )
6865 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6867 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6870 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6872 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6876 int ret = decode_mb_cabac(h);
6879 if(ret>=0) hl_decode_mb(h);
6881 /* XXX: useless as decode_mb_cabac it doesn't support that ... */
6882 if( ret >= 0 && h->mb_aff_frame ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6885 if(ret>=0) ret = decode_mb_cabac(h);
6887 if(ret>=0) hl_decode_mb(h);
6890 eos = get_cabac_terminate( &h->cabac );
6892 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
6893 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6894 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6898 if( ++s->mb_x >= s->mb_width ) {
6900 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6902 if(h->mb_aff_frame) {
6907 if( eos || s->mb_y >= s->mb_height ) {
6908 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6909 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6916 int ret = decode_mb_cavlc(h);
6918 if(ret>=0) hl_decode_mb(h);
6920 if(ret>=0 && h->mb_aff_frame){ //FIXME optimal? or let mb_decode decode 16x32 ?
6922 ret = decode_mb_cavlc(h);
6924 if(ret>=0) hl_decode_mb(h);
6929 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6930 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6935 if(++s->mb_x >= s->mb_width){
6937 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6939 if(h->mb_aff_frame) {
6942 if(s->mb_y >= s->mb_height){
6943 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6945 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6946 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6950 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6957 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6958 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6959 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6960 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6964 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6973 for(;s->mb_y < s->mb_height; s->mb_y++){
6974 for(;s->mb_x < s->mb_width; s->mb_x++){
6975 int ret= decode_mb(h);
6980 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6981 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6986 if(++s->mb_x >= s->mb_width){
6988 if(++s->mb_y >= s->mb_height){
6989 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6990 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6994 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7001 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7002 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7003 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7007 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7014 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7017 return -1; //not reached
7020 static int decode_unregistered_user_data(H264Context *h, int size){
7021 MpegEncContext * const s = &h->s;
7022 uint8_t user_data[16+256];
7028 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7029 user_data[i]= get_bits(&s->gb, 8);
7033 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7034 if(e==1 && build>=0)
7035 h->x264_build= build;
7037 if(s->avctx->debug & FF_DEBUG_BUGS)
7038 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7041 skip_bits(&s->gb, 8);
7046 static int decode_sei(H264Context *h){
7047 MpegEncContext * const s = &h->s;
7049 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7054 type+= show_bits(&s->gb, 8);
7055 }while(get_bits(&s->gb, 8) == 255);
7059 size+= show_bits(&s->gb, 8);
7060 }while(get_bits(&s->gb, 8) == 255);
7064 if(decode_unregistered_user_data(h, size) < 0);
7068 skip_bits(&s->gb, 8*size);
7071 //FIXME check bits here
7072 align_get_bits(&s->gb);
7078 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7079 MpegEncContext * const s = &h->s;
7081 cpb_count = get_ue_golomb(&s->gb) + 1;
7082 get_bits(&s->gb, 4); /* bit_rate_scale */
7083 get_bits(&s->gb, 4); /* cpb_size_scale */
7084 for(i=0; i<cpb_count; i++){
7085 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7086 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7087 get_bits1(&s->gb); /* cbr_flag */
7089 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7090 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7091 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7092 get_bits(&s->gb, 5); /* time_offset_length */
7095 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7096 MpegEncContext * const s = &h->s;
7097 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7098 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7100 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7102 if( aspect_ratio_info_present_flag ) {
7103 aspect_ratio_idc= get_bits(&s->gb, 8);
7104 if( aspect_ratio_idc == EXTENDED_SAR ) {
7105 sps->sar.num= get_bits(&s->gb, 16);
7106 sps->sar.den= get_bits(&s->gb, 16);
7107 }else if(aspect_ratio_idc < 16){
7108 sps->sar= pixel_aspect[aspect_ratio_idc];
7110 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7117 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7119 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7120 get_bits1(&s->gb); /* overscan_appropriate_flag */
7123 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7124 get_bits(&s->gb, 3); /* video_format */
7125 get_bits1(&s->gb); /* video_full_range_flag */
7126 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7127 get_bits(&s->gb, 8); /* colour_primaries */
7128 get_bits(&s->gb, 8); /* transfer_characteristics */
7129 get_bits(&s->gb, 8); /* matrix_coefficients */
7133 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7134 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7135 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7138 sps->timing_info_present_flag = get_bits1(&s->gb);
7139 if(sps->timing_info_present_flag){
7140 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7141 sps->time_scale = get_bits_long(&s->gb, 32);
7142 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7145 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7146 if(nal_hrd_parameters_present_flag)
7147 decode_hrd_parameters(h, sps);
7148 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7149 if(vcl_hrd_parameters_present_flag)
7150 decode_hrd_parameters(h, sps);
7151 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7152 get_bits1(&s->gb); /* low_delay_hrd_flag */
7153 get_bits1(&s->gb); /* pic_struct_present_flag */
7155 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7156 if(sps->bitstream_restriction_flag){
7157 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7158 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7159 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7160 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7161 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7162 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7163 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7169 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7170 const uint8_t *jvt_list, const uint8_t *fallback_list){
7171 MpegEncContext * const s = &h->s;
7172 int i, last = 8, next = 8;
7173 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7174 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7175 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7177 for(i=0;i<size;i++){
7179 next = (last + get_se_golomb(&s->gb)) & 0xff;
7180 if(!i && !next){ /* matrix not written, we use the preset one */
7181 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7184 last = factors[scan[i]] = next ? next : last;
7188 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7189 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7190 MpegEncContext * const s = &h->s;
7191 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7192 const uint8_t *fallback[4] = {
7193 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7194 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7195 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7196 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7198 if(get_bits1(&s->gb)){
7199 sps->scaling_matrix_present |= is_sps;
7200 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7201 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7202 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7203 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7204 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7205 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7206 if(is_sps || pps->transform_8x8_mode){
7207 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7208 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7210 } else if(fallback_sps) {
7211 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7212 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7216 static inline int decode_seq_parameter_set(H264Context *h){
7217 MpegEncContext * const s = &h->s;
7218 int profile_idc, level_idc;
7222 profile_idc= get_bits(&s->gb, 8);
7223 get_bits1(&s->gb); //constraint_set0_flag
7224 get_bits1(&s->gb); //constraint_set1_flag
7225 get_bits1(&s->gb); //constraint_set2_flag
7226 get_bits1(&s->gb); //constraint_set3_flag
7227 get_bits(&s->gb, 4); // reserved
7228 level_idc= get_bits(&s->gb, 8);
7229 sps_id= get_ue_golomb(&s->gb);
7231 sps= &h->sps_buffer[ sps_id ];
7232 sps->profile_idc= profile_idc;
7233 sps->level_idc= level_idc;
7235 if(sps->profile_idc >= 100){ //high profile
7236 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7237 get_bits1(&s->gb); //residual_color_transform_flag
7238 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7239 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7240 sps->transform_bypass = get_bits1(&s->gb);
7241 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7243 sps->scaling_matrix_present = 0;
7245 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7246 sps->poc_type= get_ue_golomb(&s->gb);
7248 if(sps->poc_type == 0){ //FIXME #define
7249 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7250 } else if(sps->poc_type == 1){//FIXME #define
7251 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7252 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7253 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7254 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7256 for(i=0; i<sps->poc_cycle_length; i++)
7257 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7259 if(sps->poc_type > 2){
7260 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7264 sps->ref_frame_count= get_ue_golomb(&s->gb);
7265 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7266 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7268 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7269 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7270 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7271 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7272 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7275 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7276 if(!sps->frame_mbs_only_flag)
7277 sps->mb_aff= get_bits1(&s->gb);
7281 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7283 sps->crop= get_bits1(&s->gb);
7285 sps->crop_left = get_ue_golomb(&s->gb);
7286 sps->crop_right = get_ue_golomb(&s->gb);
7287 sps->crop_top = get_ue_golomb(&s->gb);
7288 sps->crop_bottom= get_ue_golomb(&s->gb);
7289 if(sps->crop_left || sps->crop_top){
7290 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7296 sps->crop_bottom= 0;
7299 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7300 if( sps->vui_parameters_present_flag )
7301 decode_vui_parameters(h, sps);
7303 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7304 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7305 sps_id, sps->profile_idc, sps->level_idc,
7307 sps->ref_frame_count,
7308 sps->mb_width, sps->mb_height,
7309 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7310 sps->direct_8x8_inference_flag ? "8B8" : "",
7311 sps->crop_left, sps->crop_right,
7312 sps->crop_top, sps->crop_bottom,
7313 sps->vui_parameters_present_flag ? "VUI" : ""
7319 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7320 MpegEncContext * const s = &h->s;
7321 int pps_id= get_ue_golomb(&s->gb);
7322 PPS *pps= &h->pps_buffer[pps_id];
7324 pps->sps_id= get_ue_golomb(&s->gb);
7325 pps->cabac= get_bits1(&s->gb);
7326 pps->pic_order_present= get_bits1(&s->gb);
7327 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7328 if(pps->slice_group_count > 1 ){
7329 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7330 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7331 switch(pps->mb_slice_group_map_type){
7334 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7335 | run_length[ i ] |1 |ue(v) |
7340 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7342 | top_left_mb[ i ] |1 |ue(v) |
7343 | bottom_right_mb[ i ] |1 |ue(v) |
7351 | slice_group_change_direction_flag |1 |u(1) |
7352 | slice_group_change_rate_minus1 |1 |ue(v) |
7357 | slice_group_id_cnt_minus1 |1 |ue(v) |
7358 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7360 | slice_group_id[ i ] |1 |u(v) |
7365 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7366 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7367 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7368 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7372 pps->weighted_pred= get_bits1(&s->gb);
7373 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7374 pps->init_qp= get_se_golomb(&s->gb) + 26;
7375 pps->init_qs= get_se_golomb(&s->gb) + 26;
7376 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7377 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7378 pps->constrained_intra_pred= get_bits1(&s->gb);
7379 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7381 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7382 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7384 if(get_bits_count(&s->gb) < bit_length){
7385 pps->transform_8x8_mode= get_bits1(&s->gb);
7386 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7387 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7390 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7391 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7392 pps_id, pps->sps_id,
7393 pps->cabac ? "CABAC" : "CAVLC",
7394 pps->slice_group_count,
7395 pps->ref_count[0], pps->ref_count[1],
7396 pps->weighted_pred ? "weighted" : "",
7397 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7398 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7399 pps->constrained_intra_pred ? "CONSTR" : "",
7400 pps->redundant_pic_cnt_present ? "REDU" : "",
7401 pps->transform_8x8_mode ? "8x8DCT" : ""
7409 * finds the end of the current frame in the bitstream.
7410 * @return the position of the first byte of the next frame, or -1
7412 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7415 ParseContext *pc = &(h->s.parse_context);
7416 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7417 // mb_addr= pc->mb_addr - 1;
7419 for(i=0; i<=buf_size; i++){
7420 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7421 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7422 if(pc->frame_start_found){
7423 // If there isn't one more byte in the buffer
7424 // the test on first_mb_in_slice cannot be done yet
7425 // do it at next call.
7426 if (i >= buf_size) break;
7427 if (buf[i] & 0x80) {
7428 // first_mb_in_slice is 0, probably the first nal of a new
7430 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7432 pc->frame_start_found= 0;
7436 pc->frame_start_found = 1;
7438 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7439 if(pc->frame_start_found){
7441 pc->frame_start_found= 0;
7446 state= (state<<8) | buf[i];
7450 return END_NOT_FOUND;
7453 static int h264_parse(AVCodecParserContext *s,
7454 AVCodecContext *avctx,
7455 uint8_t **poutbuf, int *poutbuf_size,
7456 const uint8_t *buf, int buf_size)
7458 H264Context *h = s->priv_data;
7459 ParseContext *pc = &h->s.parse_context;
7462 next= find_frame_end(h, buf, buf_size);
7464 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
7470 *poutbuf = (uint8_t *)buf;
7471 *poutbuf_size = buf_size;
7475 static int h264_split(AVCodecContext *avctx,
7476 const uint8_t *buf, int buf_size)
7479 uint32_t state = -1;
7482 for(i=0; i<=buf_size; i++){
7483 if((state&0xFFFFFF1F) == 0x107)
7485 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7487 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
7489 while(i>4 && buf[i-5]==0) i--;
7494 state= (state<<8) | buf[i];
7500 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7501 MpegEncContext * const s = &h->s;
7502 AVCodecContext * const avctx= s->avctx;
7506 for(i=0; i<50; i++){
7507 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7511 s->current_picture_ptr= NULL;
7520 if(buf_index >= buf_size) break;
7522 for(i = 0; i < h->nal_length_size; i++)
7523 nalsize = (nalsize << 8) | buf[buf_index++];
7529 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7534 // start code prefix search
7535 for(; buf_index + 3 < buf_size; buf_index++){
7536 // this should allways succeed in the first iteration
7537 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7541 if(buf_index+3 >= buf_size) break;
7546 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7547 if(ptr[dst_length - 1] == 0) dst_length--;
7548 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
7550 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7551 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7554 if (h->is_avc && (nalsize != consumed))
7555 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7557 buf_index += consumed;
7559 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
7560 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7563 switch(h->nal_unit_type){
7565 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7567 init_get_bits(&s->gb, ptr, bit_length);
7569 h->inter_gb_ptr= &s->gb;
7570 s->data_partitioning = 0;
7572 if(decode_slice_header(h) < 0){
7573 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7576 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
7577 if(h->redundant_pic_count==0 && s->hurry_up < 5
7578 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7579 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7580 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7581 && avctx->skip_frame < AVDISCARD_ALL)
7585 init_get_bits(&s->gb, ptr, bit_length);
7587 h->inter_gb_ptr= NULL;
7588 s->data_partitioning = 1;
7590 if(decode_slice_header(h) < 0){
7591 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7595 init_get_bits(&h->intra_gb, ptr, bit_length);
7596 h->intra_gb_ptr= &h->intra_gb;
7599 init_get_bits(&h->inter_gb, ptr, bit_length);
7600 h->inter_gb_ptr= &h->inter_gb;
7602 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
7604 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7605 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7606 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7607 && avctx->skip_frame < AVDISCARD_ALL)
7611 init_get_bits(&s->gb, ptr, bit_length);
7615 init_get_bits(&s->gb, ptr, bit_length);
7616 decode_seq_parameter_set(h);
7618 if(s->flags& CODEC_FLAG_LOW_DELAY)
7621 if(avctx->has_b_frames < 2)
7622 avctx->has_b_frames= !s->low_delay;
7625 init_get_bits(&s->gb, ptr, bit_length);
7627 decode_picture_parameter_set(h, bit_length);
7631 case NAL_END_SEQUENCE:
7632 case NAL_END_STREAM:
7633 case NAL_FILLER_DATA:
7635 case NAL_AUXILIARY_SLICE:
7638 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
7642 if(!s->current_picture_ptr) return buf_index; //no frame
7644 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7645 s->current_picture_ptr->pict_type= s->pict_type;
7647 h->prev_frame_num_offset= h->frame_num_offset;
7648 h->prev_frame_num= h->frame_num;
7649 if(s->current_picture_ptr->reference){
7650 h->prev_poc_msb= h->poc_msb;
7651 h->prev_poc_lsb= h->poc_lsb;
7653 if(s->current_picture_ptr->reference)
7654 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7664 * returns the number of bytes consumed for building the current frame
7666 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7667 if(s->flags&CODEC_FLAG_TRUNCATED){
7668 pos -= s->parse_context.last_index;
7669 if(pos<0) pos=0; // FIXME remove (unneeded?)
7673 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
7674 if(pos+10>buf_size) pos=buf_size; // oops ;)
7680 static int decode_frame(AVCodecContext *avctx,
7681 void *data, int *data_size,
7682 uint8_t *buf, int buf_size)
7684 H264Context *h = avctx->priv_data;
7685 MpegEncContext *s = &h->s;
7686 AVFrame *pict = data;
7689 s->flags= avctx->flags;
7690 s->flags2= avctx->flags2;
7692 /* no supplementary picture */
7693 if (buf_size == 0) {
7697 if(s->flags&CODEC_FLAG_TRUNCATED){
7698 int next= find_frame_end(h, buf, buf_size);
7700 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7702 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7705 if(h->is_avc && !h->got_avcC) {
7706 int i, cnt, nalsize;
7707 unsigned char *p = avctx->extradata;
7708 if(avctx->extradata_size < 7) {
7709 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7713 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7716 /* sps and pps in the avcC always have length coded with 2 bytes,
7717 so put a fake nal_length_size = 2 while parsing them */
7718 h->nal_length_size = 2;
7719 // Decode sps from avcC
7720 cnt = *(p+5) & 0x1f; // Number of sps
7722 for (i = 0; i < cnt; i++) {
7723 nalsize = BE_16(p) + 2;
7724 if(decode_nal_units(h, p, nalsize) < 0) {
7725 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7730 // Decode pps from avcC
7731 cnt = *(p++); // Number of pps
7732 for (i = 0; i < cnt; i++) {
7733 nalsize = BE_16(p) + 2;
7734 if(decode_nal_units(h, p, nalsize) != nalsize) {
7735 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7740 // Now store right nal length size, that will be use to parse all other nals
7741 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7742 // Do not reparse avcC
7746 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
7747 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7751 buf_index=decode_nal_units(h, buf, buf_size);
7755 //FIXME do something with unavailable reference frames
7757 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
7758 if(!s->current_picture_ptr){
7759 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
7764 Picture *out = s->current_picture_ptr;
7765 #if 0 //decode order
7766 *data_size = sizeof(AVFrame);
7768 /* Sort B-frames into display order */
7769 Picture *cur = s->current_picture_ptr;
7770 Picture *prev = h->delayed_output_pic;
7775 int dropped_frame = 0;
7778 if(h->sps.bitstream_restriction_flag
7779 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7780 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7784 while(h->delayed_pic[pics]) pics++;
7785 h->delayed_pic[pics++] = cur;
7786 if(cur->reference == 0)
7789 for(i=0; h->delayed_pic[i]; i++)
7790 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7793 out = h->delayed_pic[0];
7794 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7795 if(h->delayed_pic[i]->poc < out->poc){
7796 out = h->delayed_pic[i];
7800 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7801 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7803 else if(prev && pics <= s->avctx->has_b_frames)
7805 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7807 ((!cross_idr && prev && out->poc > prev->poc + 2)
7808 || cur->pict_type == B_TYPE)))
7811 s->avctx->has_b_frames++;
7814 else if(out_of_order)
7817 if(out_of_order || pics > s->avctx->has_b_frames){
7818 dropped_frame = (out != h->delayed_pic[out_idx]);
7819 for(i=out_idx; h->delayed_pic[i]; i++)
7820 h->delayed_pic[i] = h->delayed_pic[i+1];
7823 if(prev == out && !dropped_frame)
7826 *data_size = sizeof(AVFrame);
7827 if(prev && prev != out && prev->reference == 1)
7828 prev->reference = 0;
7829 h->delayed_output_pic = out;
7833 *pict= *(AVFrame*)out;
7835 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7838 assert(pict->data[0] || !*data_size);
7839 ff_print_debug_info(s, pict);
7840 //printf("out %d\n", (int)pict->data[0]);
7843 /* Return the Picture timestamp as the frame number */
7844 /* we substract 1 because it is added on utils.c */
7845 avctx->frame_number = s->picture_number - 1;
7847 return get_consumed_bytes(s, buf_index, buf_size);
7850 static inline void fill_mb_avail(H264Context *h){
7851 MpegEncContext * const s = &h->s;
7852 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7855 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7856 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7857 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7863 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7864 h->mb_avail[4]= 1; //FIXME move out
7865 h->mb_avail[5]= 0; //FIXME move out
7871 #define SIZE (COUNT*40)
7877 // int int_temp[10000];
7879 AVCodecContext avctx;
7881 dsputil_init(&dsp, &avctx);
7883 init_put_bits(&pb, temp, SIZE);
7884 printf("testing unsigned exp golomb\n");
7885 for(i=0; i<COUNT; i++){
7887 set_ue_golomb(&pb, i);
7888 STOP_TIMER("set_ue_golomb");
7890 flush_put_bits(&pb);
7892 init_get_bits(&gb, temp, 8*SIZE);
7893 for(i=0; i<COUNT; i++){
7896 s= show_bits(&gb, 24);
7899 j= get_ue_golomb(&gb);
7901 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7904 STOP_TIMER("get_ue_golomb");
7908 init_put_bits(&pb, temp, SIZE);
7909 printf("testing signed exp golomb\n");
7910 for(i=0; i<COUNT; i++){
7912 set_se_golomb(&pb, i - COUNT/2);
7913 STOP_TIMER("set_se_golomb");
7915 flush_put_bits(&pb);
7917 init_get_bits(&gb, temp, 8*SIZE);
7918 for(i=0; i<COUNT; i++){
7921 s= show_bits(&gb, 24);
7924 j= get_se_golomb(&gb);
7925 if(j != i - COUNT/2){
7926 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7929 STOP_TIMER("get_se_golomb");
7932 printf("testing 4x4 (I)DCT\n");
7935 uint8_t src[16], ref[16];
7936 uint64_t error= 0, max_error=0;
7938 for(i=0; i<COUNT; i++){
7940 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7941 for(j=0; j<16; j++){
7942 ref[j]= random()%255;
7943 src[j]= random()%255;
7946 h264_diff_dct_c(block, src, ref, 4);
7949 for(j=0; j<16; j++){
7950 // printf("%d ", block[j]);
7951 block[j]= block[j]*4;
7952 if(j&1) block[j]= (block[j]*4 + 2)/5;
7953 if(j&4) block[j]= (block[j]*4 + 2)/5;
7957 s->dsp.h264_idct_add(ref, block, 4);
7958 /* for(j=0; j<16; j++){
7959 printf("%d ", ref[j]);
7963 for(j=0; j<16; j++){
7964 int diff= ABS(src[j] - ref[j]);
7967 max_error= FFMAX(max_error, diff);
7970 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7972 printf("testing quantizer\n");
7973 for(qp=0; qp<52; qp++){
7975 src1_block[i]= src2_block[i]= random()%255;
7979 printf("Testing NAL layer\n");
7981 uint8_t bitstream[COUNT];
7982 uint8_t nal[COUNT*2];
7984 memset(&h, 0, sizeof(H264Context));
7986 for(i=0; i<COUNT; i++){
7994 for(j=0; j<COUNT; j++){
7995 bitstream[j]= (random() % 255) + 1;
7998 for(j=0; j<zeros; j++){
7999 int pos= random() % COUNT;
8000 while(bitstream[pos] == 0){
8009 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8011 printf("encoding failed\n");
8015 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8019 if(out_length != COUNT){
8020 printf("incorrect length %d %d\n", out_length, COUNT);
8024 if(consumed != nal_length){
8025 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8029 if(memcmp(bitstream, out, COUNT)){
8030 printf("missmatch\n");
8035 printf("Testing RBSP\n");
8043 static int decode_end(AVCodecContext *avctx)
8045 H264Context *h = avctx->priv_data;
8046 MpegEncContext *s = &h->s;
8048 av_freep(&h->rbsp_buffer);
8049 free_tables(h); //FIXME cleanup init stuff perhaps
8052 // memset(h, 0, sizeof(H264Context));
8058 AVCodec h264_decoder = {
8062 sizeof(H264Context),
8067 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8071 AVCodecParser h264_parser = {
8073 sizeof(H264Context),