2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
58 * Sequence parameter set
64 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
65 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
66 int poc_type; ///< pic_order_cnt_type
67 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
68 int delta_pic_order_always_zero_flag;
69 int offset_for_non_ref_pic;
70 int offset_for_top_to_bottom_field;
71 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
72 int ref_frame_count; ///< num_ref_frames
73 int gaps_in_frame_num_allowed_flag;
74 int mb_width; ///< frame_width_in_mbs_minus1 + 1
75 int mb_height; ///< frame_height_in_mbs_minus1 + 1
76 int frame_mbs_only_flag;
77 int mb_aff; ///<mb_adaptive_frame_field_flag
78 int direct_8x8_inference_flag;
79 int crop; ///< frame_cropping_flag
80 int crop_left; ///< frame_cropping_rect_left_offset
81 int crop_right; ///< frame_cropping_rect_right_offset
82 int crop_top; ///< frame_cropping_rect_top_offset
83 int crop_bottom; ///< frame_cropping_rect_bottom_offset
84 int vui_parameters_present_flag;
86 int timing_info_present_flag;
87 uint32_t num_units_in_tick;
89 int fixed_frame_rate_flag;
90 short offset_for_ref_frame[256]; //FIXME dyn aloc?
91 int bitstream_restriction_flag;
92 int num_reorder_frames;
93 int scaling_matrix_present;
94 uint8_t scaling_matrix4[6][16];
95 uint8_t scaling_matrix8[2][64];
99 * Picture parameter set
103 int cabac; ///< entropy_coding_mode_flag
104 int pic_order_present; ///< pic_order_present_flag
105 int slice_group_count; ///< num_slice_groups_minus1 + 1
106 int mb_slice_group_map_type;
107 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
108 int weighted_pred; ///< weighted_pred_flag
109 int weighted_bipred_idc;
110 int init_qp; ///< pic_init_qp_minus26 + 26
111 int init_qs; ///< pic_init_qs_minus26 + 26
112 int chroma_qp_index_offset;
113 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
114 int constrained_intra_pred; ///< constrained_intra_pred_flag
115 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
116 int transform_8x8_mode; ///< transform_8x8_mode_flag
117 uint8_t scaling_matrix4[6][16];
118 uint8_t scaling_matrix8[2][64];
122 * Memory management control operation opcode.
124 typedef enum MMCOOpcode{
135 * Memory management control operation.
146 typedef struct H264Context{
154 #define NAL_IDR_SLICE 5
159 #define NAL_END_SEQUENCE 10
160 #define NAL_END_STREAM 11
161 #define NAL_FILLER_DATA 12
162 #define NAL_SPS_EXT 13
163 #define NAL_AUXILIARY_SLICE 19
164 uint8_t *rbsp_buffer;
165 unsigned int rbsp_buffer_size;
168 * Used to parse AVC variant of h264
170 int is_avc; ///< this flag is != 0 if codec is avc1
171 int got_avcC; ///< flag used to parse avcC data only once
172 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
176 int prev_mb_skipped; //FIXME remove (IMHO not used)
179 int chroma_pred_mode;
180 int intra16x16_pred_mode;
185 int8_t intra4x4_pred_mode_cache[5*8];
186 int8_t (*intra4x4_pred_mode)[8];
187 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
188 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
189 void (*pred8x8 [4+3])(uint8_t *src, int stride);
190 void (*pred16x16[4+3])(uint8_t *src, int stride);
191 unsigned int topleft_samples_available;
192 unsigned int top_samples_available;
193 unsigned int topright_samples_available;
194 unsigned int left_samples_available;
195 uint8_t (*top_borders[2])[16+2*8];
196 uint8_t left_border[2*(17+2*9)];
199 * non zero coeff count cache.
200 * is 64 if not available.
202 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
203 uint8_t (*non_zero_count)[16];
206 * Motion vector cache.
208 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
209 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
210 #define LIST_NOT_USED -1 //FIXME rename?
211 #define PART_NOT_AVAILABLE -2
214 * is 1 if the specific list MV&references are set to 0,0,-2.
216 int mv_cache_clean[2];
219 * number of neighbors (top and/or left) that used 8x8 dct
221 int neighbor_transform_size;
224 * block_offset[ 0..23] for frame macroblocks
225 * block_offset[24..47] for field macroblocks
227 int block_offset[2*(16+8)];
229 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
231 int b_stride; //FIXME use s->b4_stride
237 int unknown_svq3_flag;
238 int next_slice_index;
240 SPS sps_buffer[MAX_SPS_COUNT];
241 SPS sps; ///< current sps
243 PPS pps_buffer[MAX_PPS_COUNT];
247 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
249 uint32_t dequant4_buffer[6][52][16];
250 uint32_t dequant8_buffer[2][52][64];
251 uint32_t (*dequant4_coeff[6])[16];
252 uint32_t (*dequant8_coeff[2])[64];
253 int dequant_coeff_pps; ///< reinit tables when pps changes
256 uint8_t *slice_table_base;
257 uint8_t *slice_table; ///< slice_table_base + mb_stride + 1
259 int slice_type_fixed;
261 //interlacing specific flags
263 int mb_field_decoding_flag;
270 int delta_poc_bottom;
273 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
274 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
275 int frame_num_offset; ///< for POC type 2
276 int prev_frame_num_offset; ///< for POC type 2
277 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
280 * frame_num for frames or 2*frame_num for field pics.
285 * max_frame_num or 2*max_frame_num for field pics.
289 //Weighted pred stuff
291 int use_weight_chroma;
292 int luma_log2_weight_denom;
293 int chroma_log2_weight_denom;
294 int luma_weight[2][16];
295 int luma_offset[2][16];
296 int chroma_weight[2][16][2];
297 int chroma_offset[2][16][2];
298 int implicit_weight[16][16];
301 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
302 int slice_alpha_c0_offset;
303 int slice_beta_offset;
305 int redundant_pic_count;
307 int direct_spatial_mv_pred;
308 int dist_scale_factor[16];
309 int map_col_to_list0[2][16];
312 * num_ref_idx_l0/1_active_minus1 + 1
314 int ref_count[2];// FIXME split for AFF
315 Picture *short_ref[32];
316 Picture *long_ref[32];
317 Picture default_ref_list[2][32];
318 Picture ref_list[2][32]; //FIXME size?
319 Picture field_ref_list[2][32]; //FIXME size?
320 Picture *delayed_pic[16]; //FIXME size?
321 Picture *delayed_output_pic;
324 * memory management control operations buffer.
326 MMCO mmco[MAX_MMCO_COUNT];
329 int long_ref_count; ///< number of actual long term references
330 int short_ref_count; ///< number of actual short term references
333 GetBitContext intra_gb;
334 GetBitContext inter_gb;
335 GetBitContext *intra_gb_ptr;
336 GetBitContext *inter_gb_ptr;
338 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
344 uint8_t cabac_state[460];
347 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
351 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
352 uint8_t *chroma_pred_mode_table;
353 int last_qscale_diff;
354 int16_t (*mvd_table[2])[2];
355 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
356 uint8_t *direct_table;
357 uint8_t direct_cache[5*8];
359 uint8_t zigzag_scan[16];
360 uint8_t field_scan[16];
361 uint8_t zigzag_scan8x8[64];
362 uint8_t zigzag_scan8x8_cavlc[64];
363 const uint8_t *zigzag_scan_q0;
364 const uint8_t *field_scan_q0;
365 const uint8_t *zigzag_scan8x8_q0;
366 const uint8_t *zigzag_scan8x8_cavlc_q0;
371 static VLC coeff_token_vlc[4];
372 static VLC chroma_dc_coeff_token_vlc;
374 static VLC total_zeros_vlc[15];
375 static VLC chroma_dc_total_zeros_vlc[3];
377 static VLC run_vlc[6];
380 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
381 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
382 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
384 static always_inline uint32_t pack16to32(int a, int b){
385 #ifdef WORDS_BIGENDIAN
386 return (b&0xFFFF) + (a<<16);
388 return (a&0xFFFF) + (b<<16);
394 * @param h height of the rectangle, should be a constant
395 * @param w width of the rectangle, should be a constant
396 * @param size the size of val (1 or 4), should be a constant
398 static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
399 uint8_t *p= (uint8_t*)vp;
400 assert(size==1 || size==4);
405 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
406 assert((stride&(w-1))==0);
407 //FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it
410 *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
411 }else if(w==2 && h==4){
412 *(uint16_t*)(p + 0*stride)=
413 *(uint16_t*)(p + 1*stride)=
414 *(uint16_t*)(p + 2*stride)=
415 *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
416 }else if(w==4 && h==1){
417 *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
418 }else if(w==4 && h==2){
419 *(uint32_t*)(p + 0*stride)=
420 *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
421 }else if(w==4 && h==4){
422 *(uint32_t*)(p + 0*stride)=
423 *(uint32_t*)(p + 1*stride)=
424 *(uint32_t*)(p + 2*stride)=
425 *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
426 }else if(w==8 && h==1){
428 *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
429 }else if(w==8 && h==2){
430 *(uint32_t*)(p + 0 + 0*stride)=
431 *(uint32_t*)(p + 4 + 0*stride)=
432 *(uint32_t*)(p + 0 + 1*stride)=
433 *(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101;
434 }else if(w==8 && h==4){
435 *(uint64_t*)(p + 0*stride)=
436 *(uint64_t*)(p + 1*stride)=
437 *(uint64_t*)(p + 2*stride)=
438 *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
439 }else if(w==16 && h==2){
440 *(uint64_t*)(p + 0+0*stride)=
441 *(uint64_t*)(p + 8+0*stride)=
442 *(uint64_t*)(p + 0+1*stride)=
443 *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
444 }else if(w==16 && h==4){
445 *(uint64_t*)(p + 0+0*stride)=
446 *(uint64_t*)(p + 8+0*stride)=
447 *(uint64_t*)(p + 0+1*stride)=
448 *(uint64_t*)(p + 8+1*stride)=
449 *(uint64_t*)(p + 0+2*stride)=
450 *(uint64_t*)(p + 8+2*stride)=
451 *(uint64_t*)(p + 0+3*stride)=
452 *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
457 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
458 MpegEncContext * const s = &h->s;
459 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
460 int topleft_xy, top_xy, topright_xy, left_xy[2];
461 int topleft_type, top_type, topright_type, left_type[2];
465 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
466 // the actual condition is whether we're on the edge of a slice,
467 // and even then the intra and nnz parts are unnecessary.
468 if(for_deblock && h->slice_num == 1)
471 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
473 top_xy = mb_xy - s->mb_stride;
474 topleft_xy = top_xy - 1;
475 topright_xy= top_xy + 1;
476 left_xy[1] = left_xy[0] = mb_xy-1;
486 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
487 const int top_pair_xy = pair_xy - s->mb_stride;
488 const int topleft_pair_xy = top_pair_xy - 1;
489 const int topright_pair_xy = top_pair_xy + 1;
490 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
491 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
492 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
493 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
494 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
495 const int bottom = (s->mb_y & 1);
496 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
498 ? !curr_mb_frame_flag // bottom macroblock
499 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
501 top_xy -= s->mb_stride;
504 ? !curr_mb_frame_flag // bottom macroblock
505 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
507 topleft_xy -= s->mb_stride;
510 ? !curr_mb_frame_flag // bottom macroblock
511 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
513 topright_xy -= s->mb_stride;
515 if (left_mb_frame_flag != curr_mb_frame_flag) {
516 left_xy[1] = left_xy[0] = pair_xy - 1;
517 if (curr_mb_frame_flag) {
538 left_xy[1] += s->mb_stride;
551 h->top_mb_xy = top_xy;
552 h->left_mb_xy[0] = left_xy[0];
553 h->left_mb_xy[1] = left_xy[1];
555 topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
556 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
557 topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
558 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
559 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
561 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
562 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
563 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
564 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
565 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
568 if(IS_INTRA(mb_type)){
569 h->topleft_samples_available=
570 h->top_samples_available=
571 h->left_samples_available= 0xFFFF;
572 h->topright_samples_available= 0xEEEA;
574 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
575 h->topleft_samples_available= 0xB3FF;
576 h->top_samples_available= 0x33FF;
577 h->topright_samples_available= 0x26EA;
580 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
581 h->topleft_samples_available&= 0xDF5F;
582 h->left_samples_available&= 0x5F5F;
586 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
587 h->topleft_samples_available&= 0x7FFF;
589 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
590 h->topright_samples_available&= 0xFBFF;
592 if(IS_INTRA4x4(mb_type)){
593 if(IS_INTRA4x4(top_type)){
594 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
595 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
596 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
597 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
600 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
605 h->intra4x4_pred_mode_cache[4+8*0]=
606 h->intra4x4_pred_mode_cache[5+8*0]=
607 h->intra4x4_pred_mode_cache[6+8*0]=
608 h->intra4x4_pred_mode_cache[7+8*0]= pred;
611 if(IS_INTRA4x4(left_type[i])){
612 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
613 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
616 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
621 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
622 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
637 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
639 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
640 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
641 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
642 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
644 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
645 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
647 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
648 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
651 h->non_zero_count_cache[4+8*0]=
652 h->non_zero_count_cache[5+8*0]=
653 h->non_zero_count_cache[6+8*0]=
654 h->non_zero_count_cache[7+8*0]=
656 h->non_zero_count_cache[1+8*0]=
657 h->non_zero_count_cache[2+8*0]=
659 h->non_zero_count_cache[1+8*3]=
660 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
664 for (i=0; i<2; i++) {
666 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
667 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
668 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
669 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
671 h->non_zero_count_cache[3+8*1 + 2*8*i]=
672 h->non_zero_count_cache[3+8*2 + 2*8*i]=
673 h->non_zero_count_cache[0+8*1 + 8*i]=
674 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
681 h->top_cbp = h->cbp_table[top_xy];
682 } else if(IS_INTRA(mb_type)) {
689 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
690 } else if(IS_INTRA(mb_type)) {
696 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
699 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
704 //FIXME direct mb can skip much of this
705 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
707 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
708 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
709 /*if(!h->mv_cache_clean[list]){
710 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
711 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
712 h->mv_cache_clean[list]= 1;
716 h->mv_cache_clean[list]= 0;
718 if(USES_LIST(top_type, list)){
719 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
720 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
721 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
722 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
723 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
724 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
725 h->ref_cache[list][scan8[0] + 0 - 1*8]=
726 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
727 h->ref_cache[list][scan8[0] + 2 - 1*8]=
728 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
730 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
731 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
732 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
733 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
734 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
737 //FIXME unify cleanup or sth
738 if(USES_LIST(left_type[0], list)){
739 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
740 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
741 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
742 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
743 h->ref_cache[list][scan8[0] - 1 + 0*8]=
744 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
746 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
747 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
748 h->ref_cache[list][scan8[0] - 1 + 0*8]=
749 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
752 if(USES_LIST(left_type[1], list)){
753 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
754 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
755 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
756 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
757 h->ref_cache[list][scan8[0] - 1 + 2*8]=
758 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
760 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
761 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
762 h->ref_cache[list][scan8[0] - 1 + 2*8]=
763 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
764 assert((!left_type[0]) == (!left_type[1]));
767 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
770 if(USES_LIST(topleft_type, list)){
771 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
772 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
773 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
774 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
776 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
777 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
780 if(USES_LIST(topright_type, list)){
781 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
782 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
783 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
784 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
786 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
787 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
791 h->ref_cache[list][scan8[5 ]+1] =
792 h->ref_cache[list][scan8[7 ]+1] =
793 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
794 h->ref_cache[list][scan8[4 ]] =
795 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
796 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
797 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
798 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
799 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
800 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
803 /* XXX beurk, Load mvd */
804 if(USES_LIST(topleft_type, list)){
805 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
806 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy];
808 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0;
811 if(USES_LIST(top_type, list)){
812 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
813 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
814 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
815 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
816 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
818 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
819 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
820 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
821 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
823 if(USES_LIST(left_type[0], list)){
824 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
825 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
826 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
828 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
829 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
831 if(USES_LIST(left_type[1], list)){
832 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
833 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
834 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
836 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
837 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
839 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
840 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
841 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
842 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
843 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
845 if(h->slice_type == B_TYPE){
846 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
848 if(IS_DIRECT(top_type)){
849 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
850 }else if(IS_8X8(top_type)){
851 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
852 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
853 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
855 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
859 if(IS_DIRECT(left_type[0])){
860 h->direct_cache[scan8[0] - 1 + 0*8]=
861 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
862 }else if(IS_8X8(left_type[0])){
863 int b8_xy = h->mb2b8_xy[left_xy[0]] + 1;
864 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[b8_xy];
865 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[b8_xy + h->b8_stride];
867 h->direct_cache[scan8[0] - 1 + 0*8]=
868 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
876 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
879 static inline void write_back_intra_pred_mode(H264Context *h){
880 MpegEncContext * const s = &h->s;
881 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
883 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
884 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
885 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
886 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
887 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
888 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
889 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
893 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
895 static inline int check_intra4x4_pred_mode(H264Context *h){
896 MpegEncContext * const s = &h->s;
897 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
898 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
901 if(!(h->top_samples_available&0x8000)){
903 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
905 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
908 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
913 if(!(h->left_samples_available&0x8000)){
915 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
917 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
920 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
926 } //FIXME cleanup like next
929 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
931 static inline int check_intra_pred_mode(H264Context *h, int mode){
932 MpegEncContext * const s = &h->s;
933 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
934 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
936 if(mode < 0 || mode > 6) {
937 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
941 if(!(h->top_samples_available&0x8000)){
944 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
949 if(!(h->left_samples_available&0x8000)){
952 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
961 * gets the predicted intra4x4 prediction mode.
963 static inline int pred_intra_mode(H264Context *h, int n){
964 const int index8= scan8[n];
965 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
966 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
967 const int min= FFMIN(left, top);
969 tprintf("mode:%d %d min:%d\n", left ,top, min);
971 if(min<0) return DC_PRED;
975 static inline void write_back_non_zero_count(H264Context *h){
976 MpegEncContext * const s = &h->s;
977 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
979 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
980 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
981 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
982 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
983 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
984 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
985 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
987 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
988 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
989 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
991 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
992 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
993 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
997 * gets the predicted number of non zero coefficients.
998 * @param n block index
1000 static inline int pred_non_zero_count(H264Context *h, int n){
1001 const int index8= scan8[n];
1002 const int left= h->non_zero_count_cache[index8 - 1];
1003 const int top = h->non_zero_count_cache[index8 - 8];
1006 if(i<64) i= (i+1)>>1;
1008 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1013 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1014 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1016 if(topright_ref != PART_NOT_AVAILABLE){
1017 *C= h->mv_cache[list][ i - 8 + part_width ];
1018 return topright_ref;
1020 tprintf("topright MV not available\n");
1022 *C= h->mv_cache[list][ i - 8 - 1 ];
1023 return h->ref_cache[list][ i - 8 - 1 ];
1028 * gets the predicted MV.
1029 * @param n the block index
1030 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1031 * @param mx the x component of the predicted motion vector
1032 * @param my the y component of the predicted motion vector
1034 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1035 const int index8= scan8[n];
1036 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1037 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1038 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1039 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1041 int diagonal_ref, match_count;
1043 assert(part_width==1 || part_width==2 || part_width==4);
1053 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1054 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1055 tprintf("pred_motion match_count=%d\n", match_count);
1056 if(match_count > 1){ //most common
1057 *mx= mid_pred(A[0], B[0], C[0]);
1058 *my= mid_pred(A[1], B[1], C[1]);
1059 }else if(match_count==1){
1063 }else if(top_ref==ref){
1071 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1075 *mx= mid_pred(A[0], B[0], C[0]);
1076 *my= mid_pred(A[1], B[1], C[1]);
1080 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1084 * gets the directionally predicted 16x8 MV.
1085 * @param n the block index
1086 * @param mx the x component of the predicted motion vector
1087 * @param my the y component of the predicted motion vector
1089 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1091 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1092 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1094 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1102 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1103 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1105 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1107 if(left_ref == ref){
1115 pred_motion(h, n, 4, list, ref, mx, my);
1119 * gets the directionally predicted 8x16 MV.
1120 * @param n the block index
1121 * @param mx the x component of the predicted motion vector
1122 * @param my the y component of the predicted motion vector
1124 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1126 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1127 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1129 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1131 if(left_ref == ref){
1140 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1142 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1144 if(diagonal_ref == ref){
1152 pred_motion(h, n, 2, list, ref, mx, my);
1155 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1156 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1157 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1159 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1161 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1162 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1163 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1169 pred_motion(h, 0, 4, 0, 0, mx, my);
1174 static inline void direct_dist_scale_factor(H264Context * const h){
1175 const int poc = h->s.current_picture_ptr->poc;
1176 const int poc1 = h->ref_list[1][0].poc;
1178 for(i=0; i<h->ref_count[0]; i++){
1179 int poc0 = h->ref_list[0][i].poc;
1180 int td = clip(poc1 - poc0, -128, 127);
1181 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1182 h->dist_scale_factor[i] = 256;
1184 int tb = clip(poc - poc0, -128, 127);
1185 int tx = (16384 + (ABS(td) >> 1)) / td;
1186 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1190 static inline void direct_ref_list_init(H264Context * const h){
1191 MpegEncContext * const s = &h->s;
1192 Picture * const ref1 = &h->ref_list[1][0];
1193 Picture * const cur = s->current_picture_ptr;
1195 if(cur->pict_type == I_TYPE)
1196 cur->ref_count[0] = 0;
1197 if(cur->pict_type != B_TYPE)
1198 cur->ref_count[1] = 0;
1199 for(list=0; list<2; list++){
1200 cur->ref_count[list] = h->ref_count[list];
1201 for(j=0; j<h->ref_count[list]; j++)
1202 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1204 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1206 for(list=0; list<2; list++){
1207 for(i=0; i<ref1->ref_count[list]; i++){
1208 const int poc = ref1->ref_poc[list][i];
1209 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1210 for(j=0; j<h->ref_count[list]; j++)
1211 if(h->ref_list[list][j].poc == poc){
1212 h->map_col_to_list0[list][i] = j;
1219 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1220 MpegEncContext * const s = &h->s;
1221 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1222 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1223 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1224 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1225 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1226 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1227 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1228 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1229 const int is_b8x8 = IS_8X8(*mb_type);
1233 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1234 /* FIXME save sub mb types from previous frames (or derive from MVs)
1235 * so we know exactly what block size to use */
1236 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1237 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1238 }else if(!is_b8x8 && (IS_16X16(mb_type_col) || IS_INTRA(mb_type_col))){
1239 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1240 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1242 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1243 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1246 *mb_type |= MB_TYPE_DIRECT2;
1248 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1250 if(h->direct_spatial_mv_pred){
1255 /* ref = min(neighbors) */
1256 for(list=0; list<2; list++){
1257 int refa = h->ref_cache[list][scan8[0] - 1];
1258 int refb = h->ref_cache[list][scan8[0] - 8];
1259 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1261 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1263 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1265 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1271 if(ref[0] < 0 && ref[1] < 0){
1272 ref[0] = ref[1] = 0;
1273 mv[0][0] = mv[0][1] =
1274 mv[1][0] = mv[1][1] = 0;
1276 for(list=0; list<2; list++){
1278 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1280 mv[list][0] = mv[list][1] = 0;
1285 *mb_type &= ~MB_TYPE_P0L1;
1286 sub_mb_type &= ~MB_TYPE_P0L1;
1287 }else if(ref[0] < 0){
1288 *mb_type &= ~MB_TYPE_P0L0;
1289 sub_mb_type &= ~MB_TYPE_P0L0;
1292 if(IS_16X16(*mb_type)){
1293 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1294 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1295 if(!IS_INTRA(mb_type_col)
1296 && ( (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1)
1297 || (l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1
1298 && (h->x264_build>33 || !h->x264_build)))){
1300 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1302 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1304 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1306 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1308 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1309 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1312 for(i8=0; i8<4; i8++){
1313 const int x8 = i8&1;
1314 const int y8 = i8>>1;
1316 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1318 h->sub_mb_type[i8] = sub_mb_type;
1320 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1321 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1322 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1323 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1326 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1327 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1328 && (h->x264_build>33 || !h->x264_build)))){
1329 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1330 if(IS_SUB_8X8(sub_mb_type)){
1331 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1332 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1334 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1336 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1339 for(i4=0; i4<4; i4++){
1340 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1341 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1343 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1345 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1351 }else{ /* direct temporal mv pred */
1352 if(IS_16X16(*mb_type)){
1353 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1354 if(IS_INTRA(mb_type_col)){
1355 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1356 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1357 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1359 const int ref0 = l1ref0[0] >= 0 ? h->map_col_to_list0[0][l1ref0[0]]
1360 : h->map_col_to_list0[1][l1ref1[0]];
1361 const int dist_scale_factor = h->dist_scale_factor[ref0];
1362 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1364 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1365 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1366 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1367 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1368 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1371 for(i8=0; i8<4; i8++){
1372 const int x8 = i8&1;
1373 const int y8 = i8>>1;
1374 int ref0, dist_scale_factor;
1375 const int16_t (*l1mv)[2]= l1mv0;
1377 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1379 h->sub_mb_type[i8] = sub_mb_type;
1380 if(IS_INTRA(mb_type_col)){
1381 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1382 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1383 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1384 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1388 ref0 = l1ref0[x8 + y8*h->b8_stride];
1390 ref0 = h->map_col_to_list0[0][ref0];
1392 ref0 = h->map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1395 dist_scale_factor = h->dist_scale_factor[ref0];
1397 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1398 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1399 if(IS_SUB_8X8(sub_mb_type)){
1400 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1401 int mx = (dist_scale_factor * mv_col[0] + 128) >> 8;
1402 int my = (dist_scale_factor * mv_col[1] + 128) >> 8;
1403 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1404 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1406 for(i4=0; i4<4; i4++){
1407 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1408 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1409 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1410 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1411 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1412 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1419 static inline void write_back_motion(H264Context *h, int mb_type){
1420 MpegEncContext * const s = &h->s;
1421 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1422 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1425 if(!USES_LIST(mb_type, 0))
1426 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1428 for(list=0; list<2; list++){
1430 if(!USES_LIST(mb_type, list))
1434 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1435 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1437 if( h->pps.cabac ) {
1439 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1440 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1445 uint8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1446 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1447 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1448 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1449 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1453 if(h->slice_type == B_TYPE && h->pps.cabac){
1454 if(IS_8X8(mb_type)){
1455 uint8_t *direct_table = &h->direct_table[b8_xy];
1456 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1457 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1458 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1464 * Decodes a network abstraction layer unit.
1465 * @param consumed is the number of bytes used as input
1466 * @param length is the length of the array
1467 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1468 * @returns decoded bytes, might be src+1 if no escapes
1470 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1474 // src[0]&0x80; //forbidden bit
1475 h->nal_ref_idc= src[0]>>5;
1476 h->nal_unit_type= src[0]&0x1F;
1480 for(i=0; i<length; i++)
1481 printf("%2X ", src[i]);
1483 for(i=0; i+1<length; i+=2){
1484 if(src[i]) continue;
1485 if(i>0 && src[i-1]==0) i--;
1486 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1488 /* startcode, so we must be past the end */
1495 if(i>=length-1){ //no escaped 0
1496 *dst_length= length;
1497 *consumed= length+1; //+1 for the header
1501 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1502 dst= h->rbsp_buffer;
1504 //printf("decoding esc\n");
1507 //remove escapes (very rare 1:2^22)
1508 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1509 if(src[si+2]==3){ //escape
1514 }else //next start code
1518 dst[di++]= src[si++];
1522 *consumed= si + 1;//+1 for the header
1523 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1529 * @param src the data which should be escaped
1530 * @param dst the target buffer, dst+1 == src is allowed as a special case
1531 * @param length the length of the src data
1532 * @param dst_length the length of the dst array
1533 * @returns length of escaped data in bytes or -1 if an error occured
1535 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1536 int i, escape_count, si, di;
1540 assert(dst_length>0);
1542 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1544 if(length==0) return 1;
1547 for(i=0; i<length; i+=2){
1548 if(src[i]) continue;
1549 if(i>0 && src[i-1]==0)
1551 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1557 if(escape_count==0){
1559 memcpy(dst+1, src, length);
1563 if(length + escape_count + 1> dst_length)
1566 //this should be damn rare (hopefully)
1568 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1569 temp= h->rbsp_buffer;
1570 //printf("encoding esc\n");
1575 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1576 temp[di++]= 0; si++;
1577 temp[di++]= 0; si++;
1579 temp[di++]= src[si++];
1582 temp[di++]= src[si++];
1584 memcpy(dst+1, temp, length+escape_count);
1586 assert(di == length+escape_count);
1592 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1594 static void encode_rbsp_trailing(PutBitContext *pb){
1597 length= (-put_bits_count(pb))&7;
1598 if(length) put_bits(pb, length, 0);
1603 * identifies the exact end of the bitstream
1604 * @return the length of the trailing, or 0 if damaged
1606 static int decode_rbsp_trailing(uint8_t *src){
1610 tprintf("rbsp trailing %X\n", v);
1620 * idct tranforms the 16 dc values and dequantize them.
1621 * @param qp quantization parameter
1623 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1626 int temp[16]; //FIXME check if this is a good idea
1627 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1628 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1630 //memset(block, 64, 2*256);
1633 const int offset= y_offset[i];
1634 const int z0= block[offset+stride*0] + block[offset+stride*4];
1635 const int z1= block[offset+stride*0] - block[offset+stride*4];
1636 const int z2= block[offset+stride*1] - block[offset+stride*5];
1637 const int z3= block[offset+stride*1] + block[offset+stride*5];
1646 const int offset= x_offset[i];
1647 const int z0= temp[4*0+i] + temp[4*2+i];
1648 const int z1= temp[4*0+i] - temp[4*2+i];
1649 const int z2= temp[4*1+i] - temp[4*3+i];
1650 const int z3= temp[4*1+i] + temp[4*3+i];
1652 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1653 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1654 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1655 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1661 * dct tranforms the 16 dc values.
1662 * @param qp quantization parameter ??? FIXME
1664 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1665 // const int qmul= dequant_coeff[qp][0];
1667 int temp[16]; //FIXME check if this is a good idea
1668 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1669 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1672 const int offset= y_offset[i];
1673 const int z0= block[offset+stride*0] + block[offset+stride*4];
1674 const int z1= block[offset+stride*0] - block[offset+stride*4];
1675 const int z2= block[offset+stride*1] - block[offset+stride*5];
1676 const int z3= block[offset+stride*1] + block[offset+stride*5];
1685 const int offset= x_offset[i];
1686 const int z0= temp[4*0+i] + temp[4*2+i];
1687 const int z1= temp[4*0+i] - temp[4*2+i];
1688 const int z2= temp[4*1+i] - temp[4*3+i];
1689 const int z3= temp[4*1+i] + temp[4*3+i];
1691 block[stride*0 +offset]= (z0 + z3)>>1;
1692 block[stride*2 +offset]= (z1 + z2)>>1;
1693 block[stride*8 +offset]= (z1 - z2)>>1;
1694 block[stride*10+offset]= (z0 - z3)>>1;
1702 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1703 const int stride= 16*2;
1704 const int xStride= 16;
1707 a= block[stride*0 + xStride*0];
1708 b= block[stride*0 + xStride*1];
1709 c= block[stride*1 + xStride*0];
1710 d= block[stride*1 + xStride*1];
1717 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1718 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1719 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1720 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1724 static void chroma_dc_dct_c(DCTELEM *block){
1725 const int stride= 16*2;
1726 const int xStride= 16;
1729 a= block[stride*0 + xStride*0];
1730 b= block[stride*0 + xStride*1];
1731 c= block[stride*1 + xStride*0];
1732 d= block[stride*1 + xStride*1];
1739 block[stride*0 + xStride*0]= (a+c);
1740 block[stride*0 + xStride*1]= (e+b);
1741 block[stride*1 + xStride*0]= (a-c);
1742 block[stride*1 + xStride*1]= (e-b);
1747 * gets the chroma qp.
1749 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1751 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
1756 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1758 //FIXME try int temp instead of block
1761 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1762 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1763 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1764 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1765 const int z0= d0 + d3;
1766 const int z3= d0 - d3;
1767 const int z1= d1 + d2;
1768 const int z2= d1 - d2;
1770 block[0 + 4*i]= z0 + z1;
1771 block[1 + 4*i]= 2*z3 + z2;
1772 block[2 + 4*i]= z0 - z1;
1773 block[3 + 4*i]= z3 - 2*z2;
1777 const int z0= block[0*4 + i] + block[3*4 + i];
1778 const int z3= block[0*4 + i] - block[3*4 + i];
1779 const int z1= block[1*4 + i] + block[2*4 + i];
1780 const int z2= block[1*4 + i] - block[2*4 + i];
1782 block[0*4 + i]= z0 + z1;
1783 block[1*4 + i]= 2*z3 + z2;
1784 block[2*4 + i]= z0 - z1;
1785 block[3*4 + i]= z3 - 2*z2;
1790 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1791 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1792 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1794 const int * const quant_table= quant_coeff[qscale];
1795 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1796 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1797 const unsigned int threshold2= (threshold1<<1);
1803 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1804 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1805 const unsigned int dc_threshold2= (dc_threshold1<<1);
1807 int level= block[0]*quant_coeff[qscale+18][0];
1808 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1810 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1813 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1816 // last_non_zero = i;
1821 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1822 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1823 const unsigned int dc_threshold2= (dc_threshold1<<1);
1825 int level= block[0]*quant_table[0];
1826 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1828 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1831 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1834 // last_non_zero = i;
1847 const int j= scantable[i];
1848 int level= block[j]*quant_table[j];
1850 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1851 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1852 if(((unsigned)(level+threshold1))>threshold2){
1854 level= (bias + level)>>QUANT_SHIFT;
1857 level= (bias - level)>>QUANT_SHIFT;
1866 return last_non_zero;
1869 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1870 const uint32_t a= ((uint32_t*)(src-stride))[0];
1871 ((uint32_t*)(src+0*stride))[0]= a;
1872 ((uint32_t*)(src+1*stride))[0]= a;
1873 ((uint32_t*)(src+2*stride))[0]= a;
1874 ((uint32_t*)(src+3*stride))[0]= a;
1877 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1878 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1879 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1880 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1881 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1884 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1885 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1886 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1888 ((uint32_t*)(src+0*stride))[0]=
1889 ((uint32_t*)(src+1*stride))[0]=
1890 ((uint32_t*)(src+2*stride))[0]=
1891 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1894 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1895 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1897 ((uint32_t*)(src+0*stride))[0]=
1898 ((uint32_t*)(src+1*stride))[0]=
1899 ((uint32_t*)(src+2*stride))[0]=
1900 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1903 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1904 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1906 ((uint32_t*)(src+0*stride))[0]=
1907 ((uint32_t*)(src+1*stride))[0]=
1908 ((uint32_t*)(src+2*stride))[0]=
1909 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1912 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1913 ((uint32_t*)(src+0*stride))[0]=
1914 ((uint32_t*)(src+1*stride))[0]=
1915 ((uint32_t*)(src+2*stride))[0]=
1916 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1920 #define LOAD_TOP_RIGHT_EDGE\
1921 const int t4= topright[0];\
1922 const int t5= topright[1];\
1923 const int t6= topright[2];\
1924 const int t7= topright[3];\
1926 #define LOAD_LEFT_EDGE\
1927 const int l0= src[-1+0*stride];\
1928 const int l1= src[-1+1*stride];\
1929 const int l2= src[-1+2*stride];\
1930 const int l3= src[-1+3*stride];\
1932 #define LOAD_TOP_EDGE\
1933 const int t0= src[ 0-1*stride];\
1934 const int t1= src[ 1-1*stride];\
1935 const int t2= src[ 2-1*stride];\
1936 const int t3= src[ 3-1*stride];\
1938 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1939 const int lt= src[-1-1*stride];
1943 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
1945 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
1948 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
1952 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1955 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1957 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1958 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1961 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1966 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1968 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1971 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1975 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1978 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1980 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1981 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1984 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1985 const int lt= src[-1-1*stride];
1988 const __attribute__((unused)) int unu= l3;
1991 src[1+2*stride]=(lt + t0 + 1)>>1;
1993 src[2+2*stride]=(t0 + t1 + 1)>>1;
1995 src[3+2*stride]=(t1 + t2 + 1)>>1;
1996 src[3+0*stride]=(t2 + t3 + 1)>>1;
1998 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2000 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2002 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2003 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2004 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2005 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2008 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2011 const __attribute__((unused)) int unu= t7;
2013 src[0+0*stride]=(t0 + t1 + 1)>>1;
2015 src[0+2*stride]=(t1 + t2 + 1)>>1;
2017 src[1+2*stride]=(t2 + t3 + 1)>>1;
2019 src[2+2*stride]=(t3 + t4+ 1)>>1;
2020 src[3+2*stride]=(t4 + t5+ 1)>>1;
2021 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2023 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2025 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2027 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2028 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2031 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2034 src[0+0*stride]=(l0 + l1 + 1)>>1;
2035 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2037 src[0+1*stride]=(l1 + l2 + 1)>>1;
2039 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2041 src[0+2*stride]=(l2 + l3 + 1)>>1;
2043 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2052 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2053 const int lt= src[-1-1*stride];
2056 const __attribute__((unused)) int unu= t3;
2059 src[2+1*stride]=(lt + l0 + 1)>>1;
2061 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2062 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2063 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2065 src[2+2*stride]=(l0 + l1 + 1)>>1;
2067 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2069 src[2+3*stride]=(l1 + l2+ 1)>>1;
2071 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2072 src[0+3*stride]=(l2 + l3 + 1)>>1;
2073 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2076 static void pred16x16_vertical_c(uint8_t *src, int stride){
2078 const uint32_t a= ((uint32_t*)(src-stride))[0];
2079 const uint32_t b= ((uint32_t*)(src-stride))[1];
2080 const uint32_t c= ((uint32_t*)(src-stride))[2];
2081 const uint32_t d= ((uint32_t*)(src-stride))[3];
2083 for(i=0; i<16; i++){
2084 ((uint32_t*)(src+i*stride))[0]= a;
2085 ((uint32_t*)(src+i*stride))[1]= b;
2086 ((uint32_t*)(src+i*stride))[2]= c;
2087 ((uint32_t*)(src+i*stride))[3]= d;
2091 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2094 for(i=0; i<16; i++){
2095 ((uint32_t*)(src+i*stride))[0]=
2096 ((uint32_t*)(src+i*stride))[1]=
2097 ((uint32_t*)(src+i*stride))[2]=
2098 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2102 static void pred16x16_dc_c(uint8_t *src, int stride){
2106 dc+= src[-1+i*stride];
2113 dc= 0x01010101*((dc + 16)>>5);
2115 for(i=0; i<16; i++){
2116 ((uint32_t*)(src+i*stride))[0]=
2117 ((uint32_t*)(src+i*stride))[1]=
2118 ((uint32_t*)(src+i*stride))[2]=
2119 ((uint32_t*)(src+i*stride))[3]= dc;
2123 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2127 dc+= src[-1+i*stride];
2130 dc= 0x01010101*((dc + 8)>>4);
2132 for(i=0; i<16; i++){
2133 ((uint32_t*)(src+i*stride))[0]=
2134 ((uint32_t*)(src+i*stride))[1]=
2135 ((uint32_t*)(src+i*stride))[2]=
2136 ((uint32_t*)(src+i*stride))[3]= dc;
2140 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2146 dc= 0x01010101*((dc + 8)>>4);
2148 for(i=0; i<16; i++){
2149 ((uint32_t*)(src+i*stride))[0]=
2150 ((uint32_t*)(src+i*stride))[1]=
2151 ((uint32_t*)(src+i*stride))[2]=
2152 ((uint32_t*)(src+i*stride))[3]= dc;
2156 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2159 for(i=0; i<16; i++){
2160 ((uint32_t*)(src+i*stride))[0]=
2161 ((uint32_t*)(src+i*stride))[1]=
2162 ((uint32_t*)(src+i*stride))[2]=
2163 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2167 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2170 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2171 const uint8_t * const src0 = src+7-stride;
2172 const uint8_t *src1 = src+8*stride-1;
2173 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2174 int H = src0[1] - src0[-1];
2175 int V = src1[0] - src2[ 0];
2176 for(k=2; k<=8; ++k) {
2177 src1 += stride; src2 -= stride;
2178 H += k*(src0[k] - src0[-k]);
2179 V += k*(src1[0] - src2[ 0]);
2182 H = ( 5*(H/4) ) / 16;
2183 V = ( 5*(V/4) ) / 16;
2185 /* required for 100% accuracy */
2186 i = H; H = V; V = i;
2188 H = ( 5*H+32 ) >> 6;
2189 V = ( 5*V+32 ) >> 6;
2192 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2193 for(j=16; j>0; --j) {
2196 for(i=-16; i<0; i+=4) {
2197 src[16+i] = cm[ (b ) >> 5 ];
2198 src[17+i] = cm[ (b+ H) >> 5 ];
2199 src[18+i] = cm[ (b+2*H) >> 5 ];
2200 src[19+i] = cm[ (b+3*H) >> 5 ];
2207 static void pred16x16_plane_c(uint8_t *src, int stride){
2208 pred16x16_plane_compat_c(src, stride, 0);
2211 static void pred8x8_vertical_c(uint8_t *src, int stride){
2213 const uint32_t a= ((uint32_t*)(src-stride))[0];
2214 const uint32_t b= ((uint32_t*)(src-stride))[1];
2217 ((uint32_t*)(src+i*stride))[0]= a;
2218 ((uint32_t*)(src+i*stride))[1]= b;
2222 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2226 ((uint32_t*)(src+i*stride))[0]=
2227 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2231 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2235 ((uint32_t*)(src+i*stride))[0]=
2236 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2240 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2246 dc0+= src[-1+i*stride];
2247 dc2+= src[-1+(i+4)*stride];
2249 dc0= 0x01010101*((dc0 + 2)>>2);
2250 dc2= 0x01010101*((dc2 + 2)>>2);
2253 ((uint32_t*)(src+i*stride))[0]=
2254 ((uint32_t*)(src+i*stride))[1]= dc0;
2257 ((uint32_t*)(src+i*stride))[0]=
2258 ((uint32_t*)(src+i*stride))[1]= dc2;
2262 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2268 dc0+= src[i-stride];
2269 dc1+= src[4+i-stride];
2271 dc0= 0x01010101*((dc0 + 2)>>2);
2272 dc1= 0x01010101*((dc1 + 2)>>2);
2275 ((uint32_t*)(src+i*stride))[0]= dc0;
2276 ((uint32_t*)(src+i*stride))[1]= dc1;
2279 ((uint32_t*)(src+i*stride))[0]= dc0;
2280 ((uint32_t*)(src+i*stride))[1]= dc1;
2285 static void pred8x8_dc_c(uint8_t *src, int stride){
2287 int dc0, dc1, dc2, dc3;
2291 dc0+= src[-1+i*stride] + src[i-stride];
2292 dc1+= src[4+i-stride];
2293 dc2+= src[-1+(i+4)*stride];
2295 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2296 dc0= 0x01010101*((dc0 + 4)>>3);
2297 dc1= 0x01010101*((dc1 + 2)>>2);
2298 dc2= 0x01010101*((dc2 + 2)>>2);
2301 ((uint32_t*)(src+i*stride))[0]= dc0;
2302 ((uint32_t*)(src+i*stride))[1]= dc1;
2305 ((uint32_t*)(src+i*stride))[0]= dc2;
2306 ((uint32_t*)(src+i*stride))[1]= dc3;
2310 static void pred8x8_plane_c(uint8_t *src, int stride){
2313 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2314 const uint8_t * const src0 = src+3-stride;
2315 const uint8_t *src1 = src+4*stride-1;
2316 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2317 int H = src0[1] - src0[-1];
2318 int V = src1[0] - src2[ 0];
2319 for(k=2; k<=4; ++k) {
2320 src1 += stride; src2 -= stride;
2321 H += k*(src0[k] - src0[-k]);
2322 V += k*(src1[0] - src2[ 0]);
2324 H = ( 17*H+16 ) >> 5;
2325 V = ( 17*V+16 ) >> 5;
2327 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2328 for(j=8; j>0; --j) {
2331 src[0] = cm[ (b ) >> 5 ];
2332 src[1] = cm[ (b+ H) >> 5 ];
2333 src[2] = cm[ (b+2*H) >> 5 ];
2334 src[3] = cm[ (b+3*H) >> 5 ];
2335 src[4] = cm[ (b+4*H) >> 5 ];
2336 src[5] = cm[ (b+5*H) >> 5 ];
2337 src[6] = cm[ (b+6*H) >> 5 ];
2338 src[7] = cm[ (b+7*H) >> 5 ];
2343 #define SRC(x,y) src[(x)+(y)*stride]
2345 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2346 #define PREDICT_8x8_LOAD_LEFT \
2347 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2348 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2349 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2350 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2353 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2354 #define PREDICT_8x8_LOAD_TOP \
2355 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2356 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2357 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2358 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2359 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2362 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2363 #define PREDICT_8x8_LOAD_TOPRIGHT \
2364 int t8, t9, t10, t11, t12, t13, t14, t15; \
2365 if(has_topright) { \
2366 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2367 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2368 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2370 #define PREDICT_8x8_LOAD_TOPLEFT \
2371 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2373 #define PREDICT_8x8_DC(v) \
2375 for( y = 0; y < 8; y++ ) { \
2376 ((uint32_t*)src)[0] = \
2377 ((uint32_t*)src)[1] = v; \
2381 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2383 PREDICT_8x8_DC(0x80808080);
2385 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2387 PREDICT_8x8_LOAD_LEFT;
2388 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2391 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2393 PREDICT_8x8_LOAD_TOP;
2394 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2397 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2399 PREDICT_8x8_LOAD_LEFT;
2400 PREDICT_8x8_LOAD_TOP;
2401 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2402 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2405 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2407 PREDICT_8x8_LOAD_LEFT;
2408 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2409 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2410 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2413 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2416 PREDICT_8x8_LOAD_TOP;
2425 for( y = 1; y < 8; y++ )
2426 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2428 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2430 PREDICT_8x8_LOAD_TOP;
2431 PREDICT_8x8_LOAD_TOPRIGHT;
2432 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2433 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2434 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2435 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2436 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2437 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2438 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2439 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2440 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2441 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2442 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2443 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2444 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2445 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2446 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2448 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2450 PREDICT_8x8_LOAD_TOP;
2451 PREDICT_8x8_LOAD_LEFT;
2452 PREDICT_8x8_LOAD_TOPLEFT;
2453 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2454 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2455 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2456 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2457 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2458 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2459 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2460 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2461 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2462 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2463 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2464 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2465 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2466 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2467 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2470 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2472 PREDICT_8x8_LOAD_TOP;
2473 PREDICT_8x8_LOAD_LEFT;
2474 PREDICT_8x8_LOAD_TOPLEFT;
2475 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2476 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2477 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2478 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2479 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2480 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2481 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2482 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2483 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2484 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2485 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2486 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2487 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2488 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2489 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2490 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2491 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2492 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2493 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2494 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2495 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2496 SRC(7,0)= (t6 + t7 + 1) >> 1;
2498 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2500 PREDICT_8x8_LOAD_TOP;
2501 PREDICT_8x8_LOAD_LEFT;
2502 PREDICT_8x8_LOAD_TOPLEFT;
2503 SRC(0,7)= (l6 + l7 + 1) >> 1;
2504 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2505 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2506 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2507 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2508 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2509 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2510 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2511 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2512 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2513 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2514 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2515 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2516 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2517 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2518 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2519 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2520 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2521 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2522 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2523 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2524 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2526 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2528 PREDICT_8x8_LOAD_TOP;
2529 PREDICT_8x8_LOAD_TOPRIGHT;
2530 SRC(0,0)= (t0 + t1 + 1) >> 1;
2531 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2532 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2533 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2534 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2535 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2536 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2537 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2538 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2539 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2540 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2541 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2542 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2543 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2544 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2545 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2546 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2547 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2548 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2549 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2550 SRC(7,6)= (t10 + t11 + 1) >> 1;
2551 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2553 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2555 PREDICT_8x8_LOAD_LEFT;
2556 SRC(0,0)= (l0 + l1 + 1) >> 1;
2557 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2558 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2559 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2560 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2561 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2562 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2563 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2564 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2565 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2566 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2567 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2568 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2569 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2570 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2571 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2572 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2573 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2575 #undef PREDICT_8x8_LOAD_LEFT
2576 #undef PREDICT_8x8_LOAD_TOP
2577 #undef PREDICT_8x8_LOAD_TOPLEFT
2578 #undef PREDICT_8x8_LOAD_TOPRIGHT
2579 #undef PREDICT_8x8_DC
2585 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2586 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2587 int src_x_offset, int src_y_offset,
2588 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2589 MpegEncContext * const s = &h->s;
2590 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2591 const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2592 const int luma_xy= (mx&3) + ((my&3)<<2);
2593 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
2594 uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
2595 uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
2596 int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
2597 int extra_height= extra_width;
2599 const int full_mx= mx>>2;
2600 const int full_my= my>>2;
2601 const int pic_width = 16*s->mb_width;
2602 const int pic_height = 16*s->mb_height;
2607 if(mx&7) extra_width -= 3;
2608 if(my&7) extra_height -= 3;
2610 if( full_mx < 0-extra_width
2611 || full_my < 0-extra_height
2612 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2613 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2614 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2615 src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
2619 qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
2621 qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
2624 if(s->flags&CODEC_FLAG_GRAY) return;
2627 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2628 src_cb= s->edge_emu_buffer;
2630 chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
2633 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2634 src_cr= s->edge_emu_buffer;
2636 chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
2639 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2640 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2641 int x_offset, int y_offset,
2642 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2643 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2644 int list0, int list1){
2645 MpegEncContext * const s = &h->s;
2646 qpel_mc_func *qpix_op= qpix_put;
2647 h264_chroma_mc_func chroma_op= chroma_put;
2649 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2650 dest_cb += x_offset + y_offset*s->uvlinesize;
2651 dest_cr += x_offset + y_offset*s->uvlinesize;
2652 x_offset += 8*s->mb_x;
2653 y_offset += 8*s->mb_y;
2656 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2657 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2658 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2659 qpix_op, chroma_op);
2662 chroma_op= chroma_avg;
2666 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2667 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2668 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2669 qpix_op, chroma_op);
2673 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2674 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2675 int x_offset, int y_offset,
2676 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2677 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2678 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2679 int list0, int list1){
2680 MpegEncContext * const s = &h->s;
2682 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2683 dest_cb += x_offset + y_offset*s->uvlinesize;
2684 dest_cr += x_offset + y_offset*s->uvlinesize;
2685 x_offset += 8*s->mb_x;
2686 y_offset += 8*s->mb_y;
2689 /* don't optimize for luma-only case, since B-frames usually
2690 * use implicit weights => chroma too. */
2691 uint8_t *tmp_cb = s->obmc_scratchpad;
2692 uint8_t *tmp_cr = tmp_cb + 8*s->uvlinesize;
2693 uint8_t *tmp_y = tmp_cr + 8*s->uvlinesize;
2694 int refn0 = h->ref_cache[0][ scan8[n] ];
2695 int refn1 = h->ref_cache[1][ scan8[n] ];
2697 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2698 dest_y, dest_cb, dest_cr,
2699 x_offset, y_offset, qpix_put, chroma_put);
2700 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2701 tmp_y, tmp_cb, tmp_cr,
2702 x_offset, y_offset, qpix_put, chroma_put);
2704 if(h->use_weight == 2){
2705 int weight0 = h->implicit_weight[refn0][refn1];
2706 int weight1 = 64 - weight0;
2707 luma_weight_avg( dest_y, tmp_y, s-> linesize, 5, weight0, weight1, 0);
2708 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0);
2709 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0);
2711 luma_weight_avg(dest_y, tmp_y, s->linesize, h->luma_log2_weight_denom,
2712 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2713 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
2714 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2715 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2716 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
2717 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2718 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2719 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
2722 int list = list1 ? 1 : 0;
2723 int refn = h->ref_cache[list][ scan8[n] ];
2724 Picture *ref= &h->ref_list[list][refn];
2725 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2726 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2727 qpix_put, chroma_put);
2729 luma_weight_op(dest_y, s->linesize, h->luma_log2_weight_denom,
2730 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2731 if(h->use_weight_chroma){
2732 chroma_weight_op(dest_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2733 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2734 chroma_weight_op(dest_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2735 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2740 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2741 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2742 int x_offset, int y_offset,
2743 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2744 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2745 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2746 int list0, int list1){
2747 if((h->use_weight==2 && list0 && list1
2748 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2749 || h->use_weight==1)
2750 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2751 x_offset, y_offset, qpix_put, chroma_put,
2752 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2754 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2755 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2758 static inline void prefetch_motion(H264Context *h, int list){
2759 /* fetch pixels for estimated mv 4 macroblocks ahead
2760 * optimized for 64byte cache lines */
2761 MpegEncContext * const s = &h->s;
2762 const int refn = h->ref_cache[list][scan8[0]];
2764 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
2765 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
2766 uint8_t **src= h->ref_list[list][refn].data;
2767 int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
2768 s->dsp.prefetch(src[0]+off, s->linesize, 4);
2769 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
2770 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
2774 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2775 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2776 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2777 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2778 MpegEncContext * const s = &h->s;
2779 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2780 const int mb_type= s->current_picture.mb_type[mb_xy];
2782 assert(IS_INTER(mb_type));
2784 prefetch_motion(h, 0);
2786 if(IS_16X16(mb_type)){
2787 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2788 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2789 &weight_op[0], &weight_avg[0],
2790 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2791 }else if(IS_16X8(mb_type)){
2792 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2793 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2794 &weight_op[1], &weight_avg[1],
2795 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2796 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2797 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2798 &weight_op[1], &weight_avg[1],
2799 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2800 }else if(IS_8X16(mb_type)){
2801 mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
2802 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2803 &weight_op[2], &weight_avg[2],
2804 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2805 mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
2806 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2807 &weight_op[2], &weight_avg[2],
2808 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2812 assert(IS_8X8(mb_type));
2815 const int sub_mb_type= h->sub_mb_type[i];
2817 int x_offset= (i&1)<<2;
2818 int y_offset= (i&2)<<1;
2820 if(IS_SUB_8X8(sub_mb_type)){
2821 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2822 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2823 &weight_op[3], &weight_avg[3],
2824 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2825 }else if(IS_SUB_8X4(sub_mb_type)){
2826 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2827 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2828 &weight_op[4], &weight_avg[4],
2829 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2830 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2831 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2832 &weight_op[4], &weight_avg[4],
2833 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2834 }else if(IS_SUB_4X8(sub_mb_type)){
2835 mc_part(h, n , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2836 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2837 &weight_op[5], &weight_avg[5],
2838 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2839 mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2840 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2841 &weight_op[5], &weight_avg[5],
2842 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2845 assert(IS_SUB_4X4(sub_mb_type));
2847 int sub_x_offset= x_offset + 2*(j&1);
2848 int sub_y_offset= y_offset + (j&2);
2849 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2850 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2851 &weight_op[6], &weight_avg[6],
2852 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2858 prefetch_motion(h, 1);
2861 static void decode_init_vlc(H264Context *h){
2862 static int done = 0;
2868 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
2869 &chroma_dc_coeff_token_len [0], 1, 1,
2870 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
2873 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
2874 &coeff_token_len [i][0], 1, 1,
2875 &coeff_token_bits[i][0], 1, 1, 1);
2879 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2880 &chroma_dc_total_zeros_len [i][0], 1, 1,
2881 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
2883 for(i=0; i<15; i++){
2884 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
2885 &total_zeros_len [i][0], 1, 1,
2886 &total_zeros_bits[i][0], 1, 1, 1);
2890 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2891 &run_len [i][0], 1, 1,
2892 &run_bits[i][0], 1, 1, 1);
2894 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2895 &run_len [6][0], 1, 1,
2896 &run_bits[6][0], 1, 1, 1);
2901 * Sets the intra prediction function pointers.
2903 static void init_pred_ptrs(H264Context *h){
2904 // MpegEncContext * const s = &h->s;
2906 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
2907 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
2908 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
2909 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2910 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2911 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
2912 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
2913 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
2914 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
2915 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
2916 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
2917 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
2919 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
2920 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
2921 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
2922 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
2923 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
2924 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
2925 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
2926 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
2927 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
2928 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
2929 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
2930 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
2932 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
2933 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
2934 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
2935 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
2936 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
2937 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
2938 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
2940 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
2941 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
2942 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
2943 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
2944 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
2945 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
2946 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
2949 static void free_tables(H264Context *h){
2950 av_freep(&h->intra4x4_pred_mode);
2951 av_freep(&h->chroma_pred_mode_table);
2952 av_freep(&h->cbp_table);
2953 av_freep(&h->mvd_table[0]);
2954 av_freep(&h->mvd_table[1]);
2955 av_freep(&h->direct_table);
2956 av_freep(&h->non_zero_count);
2957 av_freep(&h->slice_table_base);
2958 av_freep(&h->top_borders[1]);
2959 av_freep(&h->top_borders[0]);
2960 h->slice_table= NULL;
2962 av_freep(&h->mb2b_xy);
2963 av_freep(&h->mb2b8_xy);
2965 av_freep(&h->s.obmc_scratchpad);
2968 static void init_dequant8_coeff_table(H264Context *h){
2970 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2971 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2972 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2974 for(i=0; i<2; i++ ){
2975 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2976 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2980 for(q=0; q<52; q++){
2981 int shift = div6[q];
2984 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2985 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2986 h->pps.scaling_matrix8[i][x]) << shift;
2991 static void init_dequant4_coeff_table(H264Context *h){
2993 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2994 for(i=0; i<6; i++ ){
2995 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2997 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2998 h->dequant4_coeff[i] = h->dequant4_buffer[j];
3005 for(q=0; q<52; q++){
3006 int shift = div6[q] + 2;
3009 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
3010 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
3011 h->pps.scaling_matrix4[i][x]) << shift;
3016 static void init_dequant_tables(H264Context *h){
3018 init_dequant4_coeff_table(h);
3019 if(h->pps.transform_8x8_mode)
3020 init_dequant8_coeff_table(h);
3021 if(h->sps.transform_bypass){
3024 h->dequant4_coeff[i][0][x] = 1<<6;
3025 if(h->pps.transform_8x8_mode)
3028 h->dequant8_coeff[i][0][x] = 1<<6;
3035 * needs width/height
3037 static int alloc_tables(H264Context *h){
3038 MpegEncContext * const s = &h->s;
3039 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3042 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3044 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3045 CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t))
3046 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3047 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3048 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3050 if( h->pps.cabac ) {
3051 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3052 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3053 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3054 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3057 memset(h->slice_table_base, -1, big_mb_num * sizeof(uint8_t));
3058 h->slice_table= h->slice_table_base + s->mb_stride + 1;
3060 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3061 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3062 for(y=0; y<s->mb_height; y++){
3063 for(x=0; x<s->mb_width; x++){
3064 const int mb_xy= x + y*s->mb_stride;
3065 const int b_xy = 4*x + 4*y*h->b_stride;
3066 const int b8_xy= 2*x + 2*y*h->b8_stride;
3068 h->mb2b_xy [mb_xy]= b_xy;
3069 h->mb2b8_xy[mb_xy]= b8_xy;
3073 s->obmc_scratchpad = NULL;
3075 if(!h->dequant4_coeff[0])
3076 init_dequant_tables(h);
3084 static void common_init(H264Context *h){
3085 MpegEncContext * const s = &h->s;
3087 s->width = s->avctx->width;
3088 s->height = s->avctx->height;
3089 s->codec_id= s->avctx->codec->id;
3093 h->dequant_coeff_pps= -1;
3094 s->unrestricted_mv=1;
3095 s->decode=1; //FIXME
3097 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3098 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3101 static int decode_init(AVCodecContext *avctx){
3102 H264Context *h= avctx->priv_data;
3103 MpegEncContext * const s = &h->s;
3105 MPV_decode_defaults(s);
3110 s->out_format = FMT_H264;
3111 s->workaround_bugs= avctx->workaround_bugs;
3114 // s->decode_mb= ff_h263_decode_mb;
3116 avctx->pix_fmt= PIX_FMT_YUV420P;
3120 if(avctx->extradata_size > 0 && avctx->extradata &&
3121 *(char *)avctx->extradata == 1){
3131 static int frame_start(H264Context *h){
3132 MpegEncContext * const s = &h->s;
3135 if(MPV_frame_start(s, s->avctx) < 0)
3137 ff_er_frame_start(s);
3139 assert(s->linesize && s->uvlinesize);
3141 for(i=0; i<16; i++){
3142 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3143 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3146 h->block_offset[16+i]=
3147 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3148 h->block_offset[24+16+i]=
3149 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3152 /* can't be in alloc_tables because linesize isn't known there.
3153 * FIXME: redo bipred weight to not require extra buffer? */
3154 if(!s->obmc_scratchpad)
3155 s->obmc_scratchpad = av_malloc(16*s->linesize + 2*8*s->uvlinesize);
3157 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3161 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3162 MpegEncContext * const s = &h->s;
3166 src_cb -= uvlinesize;
3167 src_cr -= uvlinesize;
3169 // There are two lines saved, the line above the the top macroblock of a pair,
3170 // and the line above the bottom macroblock
3171 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3172 for(i=1; i<17; i++){
3173 h->left_border[i]= src_y[15+i* linesize];
3176 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3177 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3179 if(!(s->flags&CODEC_FLAG_GRAY)){
3180 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3181 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3183 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3184 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3186 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3187 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3191 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3192 MpegEncContext * const s = &h->s;
3195 int deblock_left = (s->mb_x > 0);
3196 int deblock_top = (s->mb_y > 0);
3198 src_y -= linesize + 1;
3199 src_cb -= uvlinesize + 1;
3200 src_cr -= uvlinesize + 1;
3202 #define XCHG(a,b,t,xchg)\
3209 for(i = !deblock_top; i<17; i++){
3210 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3215 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3216 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3217 if(s->mb_x+1 < s->mb_width){
3218 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3222 if(!(s->flags&CODEC_FLAG_GRAY)){
3224 for(i = !deblock_top; i<9; i++){
3225 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3226 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3230 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3231 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3236 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3237 MpegEncContext * const s = &h->s;
3240 src_y -= 2 * linesize;
3241 src_cb -= 2 * uvlinesize;
3242 src_cr -= 2 * uvlinesize;
3244 // There are two lines saved, the line above the the top macroblock of a pair,
3245 // and the line above the bottom macroblock
3246 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3247 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3248 for(i=2; i<34; i++){
3249 h->left_border[i]= src_y[15+i* linesize];
3252 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3253 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3254 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3255 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3257 if(!(s->flags&CODEC_FLAG_GRAY)){
3258 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3259 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3260 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3261 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3262 for(i=2; i<18; i++){
3263 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3264 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3266 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3267 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3268 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3269 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3273 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3274 MpegEncContext * const s = &h->s;
3277 int deblock_left = (s->mb_x > 0);
3278 int deblock_top = (s->mb_y > 0);
3280 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3282 src_y -= 2 * linesize + 1;
3283 src_cb -= 2 * uvlinesize + 1;
3284 src_cr -= 2 * uvlinesize + 1;
3286 #define XCHG(a,b,t,xchg)\
3293 for(i = (!deblock_top)<<1; i<34; i++){
3294 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3299 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3300 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3301 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3302 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3305 if(!(s->flags&CODEC_FLAG_GRAY)){
3307 for(i = (!deblock_top) << 1; i<18; i++){
3308 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3309 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3313 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3314 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3315 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3316 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3321 static void hl_decode_mb(H264Context *h){
3322 MpegEncContext * const s = &h->s;
3323 const int mb_x= s->mb_x;
3324 const int mb_y= s->mb_y;
3325 const int mb_xy= mb_x + mb_y*s->mb_stride;
3326 const int mb_type= s->current_picture.mb_type[mb_xy];
3327 uint8_t *dest_y, *dest_cb, *dest_cr;
3328 int linesize, uvlinesize /*dct_offset*/;
3330 int *block_offset = &h->block_offset[0];
3331 const unsigned int bottom = mb_y & 1;
3332 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3333 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3334 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3339 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3340 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3341 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3343 if (h->mb_field_decoding_flag) {
3344 linesize = s->linesize * 2;
3345 uvlinesize = s->uvlinesize * 2;
3346 block_offset = &h->block_offset[24];
3347 if(mb_y&1){ //FIXME move out of this func?
3348 dest_y -= s->linesize*15;
3349 dest_cb-= s->uvlinesize*7;
3350 dest_cr-= s->uvlinesize*7;
3353 linesize = s->linesize;
3354 uvlinesize = s->uvlinesize;
3355 // dct_offset = s->linesize * 16;
3358 if(transform_bypass){
3360 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3361 }else if(IS_8x8DCT(mb_type)){
3362 idct_dc_add = s->dsp.h264_idct8_dc_add;
3363 idct_add = s->dsp.h264_idct8_add;
3365 idct_dc_add = s->dsp.h264_idct_dc_add;
3366 idct_add = s->dsp.h264_idct_add;
3369 if (IS_INTRA_PCM(mb_type)) {
3372 // The pixels are stored in h->mb array in the same order as levels,
3373 // copy them in output in the correct order.
3374 for(i=0; i<16; i++) {
3375 for (y=0; y<4; y++) {
3376 for (x=0; x<4; x++) {
3377 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3381 for(i=16; i<16+4; i++) {
3382 for (y=0; y<4; y++) {
3383 for (x=0; x<4; x++) {
3384 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3388 for(i=20; i<20+4; i++) {
3389 for (y=0; y<4; y++) {
3390 for (x=0; x<4; x++) {
3391 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3396 if(IS_INTRA(mb_type)){
3397 if(h->deblocking_filter) {
3398 if (h->mb_aff_frame) {
3400 xchg_pair_border(h, dest_y, dest_cb, dest_cr, s->linesize, s->uvlinesize, 1);
3402 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3406 if(!(s->flags&CODEC_FLAG_GRAY)){
3407 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3408 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3411 if(IS_INTRA4x4(mb_type)){
3413 if(IS_8x8DCT(mb_type)){
3414 for(i=0; i<16; i+=4){
3415 uint8_t * const ptr= dest_y + block_offset[i];
3416 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3417 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3418 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3419 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3421 if(nnz == 1 && h->mb[i*16])
3422 idct_dc_add(ptr, h->mb + i*16, linesize);
3424 idct_add(ptr, h->mb + i*16, linesize);
3428 for(i=0; i<16; i++){
3429 uint8_t * const ptr= dest_y + block_offset[i];
3431 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3434 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3435 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3436 assert(mb_y || linesize <= block_offset[i]);
3437 if(!topright_avail){
3438 tr= ptr[3 - linesize]*0x01010101;
3439 topright= (uint8_t*) &tr;
3441 topright= ptr + 4 - linesize;
3445 h->pred4x4[ dir ](ptr, topright, linesize);
3446 nnz = h->non_zero_count_cache[ scan8[i] ];
3448 if(s->codec_id == CODEC_ID_H264){
3449 if(nnz == 1 && h->mb[i*16])
3450 idct_dc_add(ptr, h->mb + i*16, linesize);
3452 idct_add(ptr, h->mb + i*16, linesize);
3454 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3459 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3460 if(s->codec_id == CODEC_ID_H264){
3461 if(!transform_bypass)
3462 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3464 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3466 if(h->deblocking_filter) {
3467 if (h->mb_aff_frame) {
3469 uint8_t *pair_dest_y = s->current_picture.data[0] + ((mb_y-1) * 16* s->linesize ) + mb_x * 16;
3470 uint8_t *pair_dest_cb = s->current_picture.data[1] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3471 uint8_t *pair_dest_cr = s->current_picture.data[2] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3473 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3477 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3480 }else if(s->codec_id == CODEC_ID_H264){
3481 hl_motion(h, dest_y, dest_cb, dest_cr,
3482 s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
3483 s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
3484 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3488 if(!IS_INTRA4x4(mb_type)){
3489 if(s->codec_id == CODEC_ID_H264){
3490 if(IS_INTRA16x16(mb_type)){
3491 for(i=0; i<16; i++){
3492 if(h->non_zero_count_cache[ scan8[i] ])
3493 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3494 else if(h->mb[i*16])
3495 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3498 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3499 for(i=0; i<16; i+=di){
3500 int nnz = h->non_zero_count_cache[ scan8[i] ];
3502 if(nnz==1 && h->mb[i*16])
3503 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3505 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3510 for(i=0; i<16; i++){
3511 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3512 uint8_t * const ptr= dest_y + block_offset[i];
3513 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3519 if(!(s->flags&CODEC_FLAG_GRAY)){
3520 uint8_t *dest[2] = {dest_cb, dest_cr};
3521 if(transform_bypass){
3522 idct_add = idct_dc_add = s->dsp.add_pixels4;
3524 idct_add = s->dsp.h264_idct_add;
3525 idct_dc_add = s->dsp.h264_idct_dc_add;
3526 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3527 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3529 if(s->codec_id == CODEC_ID_H264){
3530 for(i=16; i<16+8; i++){
3531 if(h->non_zero_count_cache[ scan8[i] ])
3532 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3533 else if(h->mb[i*16])
3534 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3537 for(i=16; i<16+8; i++){
3538 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3539 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3540 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3546 if(h->deblocking_filter) {
3547 if (h->mb_aff_frame) {
3548 const int mb_y = s->mb_y - 1;
3549 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3550 const int mb_xy= mb_x + mb_y*s->mb_stride;
3551 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3552 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3553 uint8_t tmp = s->current_picture.data[1][384];
3554 if (!bottom) return;
3555 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3556 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3557 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3559 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3560 // TODO deblock a pair
3563 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3564 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3565 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3566 if (tmp != s->current_picture.data[1][384]) {
3567 tprintf("modified pixel 8,1 (1)\n");
3571 tprintf("call mbaff filter_mb\n");
3572 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3573 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3574 if (tmp != s->current_picture.data[1][384]) {
3575 tprintf("modified pixel 8,1 (2)\n");
3578 tprintf("call filter_mb\n");
3579 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3580 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3581 filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3587 * fills the default_ref_list.
3589 static int fill_default_ref_list(H264Context *h){
3590 MpegEncContext * const s = &h->s;
3592 int smallest_poc_greater_than_current = -1;
3593 Picture sorted_short_ref[32];
3595 if(h->slice_type==B_TYPE){
3599 /* sort frame according to poc in B slice */
3600 for(out_i=0; out_i<h->short_ref_count; out_i++){
3602 int best_poc=INT_MAX;
3604 for(i=0; i<h->short_ref_count; i++){
3605 const int poc= h->short_ref[i]->poc;
3606 if(poc > limit && poc < best_poc){
3612 assert(best_i != INT_MIN);
3615 sorted_short_ref[out_i]= *h->short_ref[best_i];
3616 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3617 if (-1 == smallest_poc_greater_than_current) {
3618 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3619 smallest_poc_greater_than_current = out_i;
3625 if(s->picture_structure == PICT_FRAME){
3626 if(h->slice_type==B_TYPE){
3628 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3630 // find the largest poc
3631 for(list=0; list<2; list++){
3634 int step= list ? -1 : 1;
3636 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3637 while(j<0 || j>= h->short_ref_count){
3638 if(j != -99 && step == (list ? -1 : 1))
3641 j= smallest_poc_greater_than_current + (step>>1);
3643 if(sorted_short_ref[j].reference != 3) continue;
3644 h->default_ref_list[list][index ]= sorted_short_ref[j];
3645 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3648 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3649 if(h->long_ref[i] == NULL) continue;
3650 if(h->long_ref[i]->reference != 3) continue;
3652 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3653 h->default_ref_list[ list ][index++].pic_id= i;;
3656 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3657 // swap the two first elements of L1 when
3658 // L0 and L1 are identical
3659 Picture temp= h->default_ref_list[1][0];
3660 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3661 h->default_ref_list[1][1] = temp;
3664 if(index < h->ref_count[ list ])
3665 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3669 for(i=0; i<h->short_ref_count; i++){
3670 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3671 h->default_ref_list[0][index ]= *h->short_ref[i];
3672 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3674 for(i = 0; i < 16; i++){
3675 if(h->long_ref[i] == NULL) continue;
3676 if(h->long_ref[i]->reference != 3) continue;
3677 h->default_ref_list[0][index ]= *h->long_ref[i];
3678 h->default_ref_list[0][index++].pic_id= i;;
3680 if(index < h->ref_count[0])
3681 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3684 if(h->slice_type==B_TYPE){
3686 //FIXME second field balh
3690 for (i=0; i<h->ref_count[0]; i++) {
3691 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3693 if(h->slice_type==B_TYPE){
3694 for (i=0; i<h->ref_count[1]; i++) {
3695 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3702 static void print_short_term(H264Context *h);
3703 static void print_long_term(H264Context *h);
3705 static int decode_ref_pic_list_reordering(H264Context *h){
3706 MpegEncContext * const s = &h->s;
3709 print_short_term(h);
3711 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3713 for(list=0; list<2; list++){
3714 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3716 if(get_bits1(&s->gb)){
3717 int pred= h->curr_pic_num;
3719 for(index=0; ; index++){
3720 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3723 Picture *ref = NULL;
3725 if(reordering_of_pic_nums_idc==3)
3728 if(index >= h->ref_count[list]){
3729 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3733 if(reordering_of_pic_nums_idc<3){
3734 if(reordering_of_pic_nums_idc<2){
3735 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3737 if(abs_diff_pic_num >= h->max_pic_num){
3738 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3742 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3743 else pred+= abs_diff_pic_num;
3744 pred &= h->max_pic_num - 1;
3746 for(i= h->short_ref_count-1; i>=0; i--){
3747 ref = h->short_ref[i];
3748 assert(ref->reference == 3);
3749 assert(!ref->long_ref);
3750 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3754 ref->pic_id= ref->frame_num;
3756 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3757 ref = h->long_ref[pic_id];
3758 ref->pic_id= pic_id;
3759 assert(ref->reference == 3);
3760 assert(ref->long_ref);
3765 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3766 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3768 for(i=index; i+1<h->ref_count[list]; i++){
3769 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3772 for(; i > index; i--){
3773 h->ref_list[list][i]= h->ref_list[list][i-1];
3775 h->ref_list[list][index]= *ref;
3778 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3784 if(h->slice_type!=B_TYPE) break;
3786 for(list=0; list<2; list++){
3787 for(index= 0; index < h->ref_count[list]; index++){
3788 if(!h->ref_list[list][index].data[0])
3789 h->ref_list[list][index]= s->current_picture;
3791 if(h->slice_type!=B_TYPE) break;
3794 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3795 direct_dist_scale_factor(h);
3796 direct_ref_list_init(h);
3800 static int pred_weight_table(H264Context *h){
3801 MpegEncContext * const s = &h->s;
3803 int luma_def, chroma_def;
3806 h->use_weight_chroma= 0;
3807 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3808 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3809 luma_def = 1<<h->luma_log2_weight_denom;
3810 chroma_def = 1<<h->chroma_log2_weight_denom;
3812 for(list=0; list<2; list++){
3813 for(i=0; i<h->ref_count[list]; i++){
3814 int luma_weight_flag, chroma_weight_flag;
3816 luma_weight_flag= get_bits1(&s->gb);
3817 if(luma_weight_flag){
3818 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3819 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3820 if( h->luma_weight[list][i] != luma_def
3821 || h->luma_offset[list][i] != 0)
3824 h->luma_weight[list][i]= luma_def;
3825 h->luma_offset[list][i]= 0;
3828 chroma_weight_flag= get_bits1(&s->gb);
3829 if(chroma_weight_flag){
3832 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3833 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3834 if( h->chroma_weight[list][i][j] != chroma_def
3835 || h->chroma_offset[list][i][j] != 0)
3836 h->use_weight_chroma= 1;
3841 h->chroma_weight[list][i][j]= chroma_def;
3842 h->chroma_offset[list][i][j]= 0;
3846 if(h->slice_type != B_TYPE) break;
3848 h->use_weight= h->use_weight || h->use_weight_chroma;
3852 static void implicit_weight_table(H264Context *h){
3853 MpegEncContext * const s = &h->s;
3855 int cur_poc = s->current_picture_ptr->poc;
3857 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3858 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3860 h->use_weight_chroma= 0;
3865 h->use_weight_chroma= 2;
3866 h->luma_log2_weight_denom= 5;
3867 h->chroma_log2_weight_denom= 5;
3870 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3871 int poc0 = h->ref_list[0][ref0].poc;
3872 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3873 int poc1 = h->ref_list[1][ref1].poc;
3874 int td = clip(poc1 - poc0, -128, 127);
3876 int tb = clip(cur_poc - poc0, -128, 127);
3877 int tx = (16384 + (ABS(td) >> 1)) / td;
3878 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3879 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3880 h->implicit_weight[ref0][ref1] = 32;
3882 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3884 h->implicit_weight[ref0][ref1] = 32;
3889 static inline void unreference_pic(H264Context *h, Picture *pic){
3892 if(pic == h->delayed_output_pic)
3895 for(i = 0; h->delayed_pic[i]; i++)
3896 if(pic == h->delayed_pic[i]){
3904 * instantaneous decoder refresh.
3906 static void idr(H264Context *h){
3909 for(i=0; i<16; i++){
3910 if (h->long_ref[i] != NULL) {
3911 unreference_pic(h, h->long_ref[i]);
3912 h->long_ref[i]= NULL;
3915 h->long_ref_count=0;
3917 for(i=0; i<h->short_ref_count; i++){
3918 unreference_pic(h, h->short_ref[i]);
3919 h->short_ref[i]= NULL;
3921 h->short_ref_count=0;
3924 /* forget old pics after a seek */
3925 static void flush_dpb(AVCodecContext *avctx){
3926 H264Context *h= avctx->priv_data;
3928 for(i=0; i<16; i++) {
3929 if(h->delayed_pic[i])
3930 h->delayed_pic[i]->reference= 0;
3931 h->delayed_pic[i]= NULL;
3933 if(h->delayed_output_pic)
3934 h->delayed_output_pic->reference= 0;
3935 h->delayed_output_pic= NULL;
3937 if(h->s.current_picture_ptr)
3938 h->s.current_picture_ptr->reference= 0;
3943 * @return the removed picture or NULL if an error occurs
3945 static Picture * remove_short(H264Context *h, int frame_num){
3946 MpegEncContext * const s = &h->s;
3949 if(s->avctx->debug&FF_DEBUG_MMCO)
3950 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3952 for(i=0; i<h->short_ref_count; i++){
3953 Picture *pic= h->short_ref[i];
3954 if(s->avctx->debug&FF_DEBUG_MMCO)
3955 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3956 if(pic->frame_num == frame_num){
3957 h->short_ref[i]= NULL;
3958 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
3959 h->short_ref_count--;
3968 * @return the removed picture or NULL if an error occurs
3970 static Picture * remove_long(H264Context *h, int i){
3973 pic= h->long_ref[i];
3974 h->long_ref[i]= NULL;
3975 if(pic) h->long_ref_count--;
3981 * print short term list
3983 static void print_short_term(H264Context *h) {
3985 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3986 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3987 for(i=0; i<h->short_ref_count; i++){
3988 Picture *pic= h->short_ref[i];
3989 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3995 * print long term list
3997 static void print_long_term(H264Context *h) {
3999 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4000 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
4001 for(i = 0; i < 16; i++){
4002 Picture *pic= h->long_ref[i];
4004 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4011 * Executes the reference picture marking (memory management control operations).
4013 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
4014 MpegEncContext * const s = &h->s;
4016 int current_is_long=0;
4019 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
4020 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4022 for(i=0; i<mmco_count; i++){
4023 if(s->avctx->debug&FF_DEBUG_MMCO)
4024 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4026 switch(mmco[i].opcode){
4027 case MMCO_SHORT2UNUSED:
4028 pic= remove_short(h, mmco[i].short_frame_num);
4030 unreference_pic(h, pic);
4031 else if(s->avctx->debug&FF_DEBUG_MMCO)
4032 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4034 case MMCO_SHORT2LONG:
4035 pic= remove_long(h, mmco[i].long_index);
4036 if(pic) unreference_pic(h, pic);
4038 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4039 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4040 h->long_ref_count++;
4042 case MMCO_LONG2UNUSED:
4043 pic= remove_long(h, mmco[i].long_index);
4045 unreference_pic(h, pic);
4046 else if(s->avctx->debug&FF_DEBUG_MMCO)
4047 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4050 pic= remove_long(h, mmco[i].long_index);
4051 if(pic) unreference_pic(h, pic);
4053 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4054 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4055 h->long_ref_count++;
4059 case MMCO_SET_MAX_LONG:
4060 assert(mmco[i].long_index <= 16);
4061 // just remove the long term which index is greater than new max
4062 for(j = mmco[i].long_index; j<16; j++){
4063 pic = remove_long(h, j);
4064 if (pic) unreference_pic(h, pic);
4068 while(h->short_ref_count){
4069 pic= remove_short(h, h->short_ref[0]->frame_num);
4070 unreference_pic(h, pic);
4072 for(j = 0; j < 16; j++) {
4073 pic= remove_long(h, j);
4074 if(pic) unreference_pic(h, pic);
4081 if(!current_is_long){
4082 pic= remove_short(h, s->current_picture_ptr->frame_num);
4084 unreference_pic(h, pic);
4085 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4088 if(h->short_ref_count)
4089 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4091 h->short_ref[0]= s->current_picture_ptr;
4092 h->short_ref[0]->long_ref=0;
4093 h->short_ref_count++;
4096 print_short_term(h);
4101 static int decode_ref_pic_marking(H264Context *h){
4102 MpegEncContext * const s = &h->s;
4105 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4106 s->broken_link= get_bits1(&s->gb) -1;
4107 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4108 if(h->mmco[0].long_index == -1)
4111 h->mmco[0].opcode= MMCO_LONG;
4115 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4116 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4117 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4119 h->mmco[i].opcode= opcode;
4120 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4121 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4122 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4123 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4127 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4128 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4129 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4130 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4135 if(opcode > MMCO_LONG){
4136 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4139 if(opcode == MMCO_END)
4144 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4146 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4147 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4148 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4158 static int init_poc(H264Context *h){
4159 MpegEncContext * const s = &h->s;
4160 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4163 if(h->nal_unit_type == NAL_IDR_SLICE){
4164 h->frame_num_offset= 0;
4166 if(h->frame_num < h->prev_frame_num)
4167 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4169 h->frame_num_offset= h->prev_frame_num_offset;
4172 if(h->sps.poc_type==0){
4173 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4175 if(h->nal_unit_type == NAL_IDR_SLICE){
4180 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4181 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4182 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4183 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4185 h->poc_msb = h->prev_poc_msb;
4186 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4188 field_poc[1] = h->poc_msb + h->poc_lsb;
4189 if(s->picture_structure == PICT_FRAME)
4190 field_poc[1] += h->delta_poc_bottom;
4191 }else if(h->sps.poc_type==1){
4192 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4195 if(h->sps.poc_cycle_length != 0)
4196 abs_frame_num = h->frame_num_offset + h->frame_num;
4200 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4203 expected_delta_per_poc_cycle = 0;
4204 for(i=0; i < h->sps.poc_cycle_length; i++)
4205 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4207 if(abs_frame_num > 0){
4208 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4209 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4211 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4212 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4213 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4217 if(h->nal_ref_idc == 0)
4218 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4220 field_poc[0] = expectedpoc + h->delta_poc[0];
4221 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4223 if(s->picture_structure == PICT_FRAME)
4224 field_poc[1] += h->delta_poc[1];
4227 if(h->nal_unit_type == NAL_IDR_SLICE){
4230 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4231 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4237 if(s->picture_structure != PICT_BOTTOM_FIELD)
4238 s->current_picture_ptr->field_poc[0]= field_poc[0];
4239 if(s->picture_structure != PICT_TOP_FIELD)
4240 s->current_picture_ptr->field_poc[1]= field_poc[1];
4241 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4242 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4248 * decodes a slice header.
4249 * this will allso call MPV_common_init() and frame_start() as needed
4251 static int decode_slice_header(H264Context *h){
4252 MpegEncContext * const s = &h->s;
4253 int first_mb_in_slice, pps_id;
4254 int num_ref_idx_active_override_flag;
4255 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4257 int default_ref_list_done = 0;
4259 s->current_picture.reference= h->nal_ref_idc != 0;
4260 s->dropable= h->nal_ref_idc == 0;
4262 first_mb_in_slice= get_ue_golomb(&s->gb);
4264 slice_type= get_ue_golomb(&s->gb);
4266 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4271 h->slice_type_fixed=1;
4273 h->slice_type_fixed=0;
4275 slice_type= slice_type_map[ slice_type ];
4276 if (slice_type == I_TYPE
4277 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4278 default_ref_list_done = 1;
4280 h->slice_type= slice_type;
4282 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4284 pps_id= get_ue_golomb(&s->gb);
4286 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4289 h->pps= h->pps_buffer[pps_id];
4290 if(h->pps.slice_group_count == 0){
4291 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4295 h->sps= h->sps_buffer[ h->pps.sps_id ];
4296 if(h->sps.log2_max_frame_num == 0){
4297 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4301 if(h->dequant_coeff_pps != pps_id){
4302 h->dequant_coeff_pps = pps_id;
4303 init_dequant_tables(h);
4306 s->mb_width= h->sps.mb_width;
4307 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4309 h->b_stride= s->mb_width*4;
4310 h->b8_stride= s->mb_width*2;
4312 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4313 if(h->sps.frame_mbs_only_flag)
4314 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4316 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4318 if (s->context_initialized
4319 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4323 if (!s->context_initialized) {
4324 if (MPV_common_init(s) < 0)
4327 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4328 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4329 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4332 for(i=0; i<16; i++){
4333 #define T(x) (x>>2) | ((x<<2) & 0xF)
4334 h->zigzag_scan[i] = T(zigzag_scan[i]);
4335 h-> field_scan[i] = T( field_scan[i]);
4339 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4340 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4341 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4344 for(i=0; i<64; i++){
4345 #define T(x) (x>>3) | ((x&7)<<3)
4346 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4347 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4351 if(h->sps.transform_bypass){ //FIXME same ugly
4352 h->zigzag_scan_q0 = zigzag_scan;
4353 h->field_scan_q0 = field_scan;
4354 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4355 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4357 h->zigzag_scan_q0 = h->zigzag_scan;
4358 h->field_scan_q0 = h->field_scan;
4359 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4360 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4365 s->avctx->width = s->width;
4366 s->avctx->height = s->height;
4367 s->avctx->sample_aspect_ratio= h->sps.sar;
4368 if(!s->avctx->sample_aspect_ratio.den)
4369 s->avctx->sample_aspect_ratio.den = 1;
4371 if(h->sps.timing_info_present_flag){
4372 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4373 if(h->x264_build > 0 && h->x264_build < 44)
4374 s->avctx->time_base.den *= 2;
4375 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4376 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4380 if(h->slice_num == 0){
4381 if(frame_start(h) < 0)
4385 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4386 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4388 h->mb_aff_frame = 0;
4389 if(h->sps.frame_mbs_only_flag){
4390 s->picture_structure= PICT_FRAME;
4392 if(get_bits1(&s->gb)) { //field_pic_flag
4393 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4395 s->picture_structure= PICT_FRAME;
4396 first_mb_in_slice <<= h->sps.mb_aff;
4397 h->mb_aff_frame = h->sps.mb_aff;
4401 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4402 s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width;
4403 if(s->mb_y >= s->mb_height){
4407 if(s->picture_structure==PICT_FRAME){
4408 h->curr_pic_num= h->frame_num;
4409 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4411 h->curr_pic_num= 2*h->frame_num;
4412 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4415 if(h->nal_unit_type == NAL_IDR_SLICE){
4416 get_ue_golomb(&s->gb); /* idr_pic_id */
4419 if(h->sps.poc_type==0){
4420 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4422 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4423 h->delta_poc_bottom= get_se_golomb(&s->gb);
4427 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4428 h->delta_poc[0]= get_se_golomb(&s->gb);
4430 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4431 h->delta_poc[1]= get_se_golomb(&s->gb);
4436 if(h->pps.redundant_pic_cnt_present){
4437 h->redundant_pic_count= get_ue_golomb(&s->gb);
4440 //set defaults, might be overriden a few line later
4441 h->ref_count[0]= h->pps.ref_count[0];
4442 h->ref_count[1]= h->pps.ref_count[1];
4444 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4445 if(h->slice_type == B_TYPE){
4446 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4448 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4450 if(num_ref_idx_active_override_flag){
4451 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4452 if(h->slice_type==B_TYPE)
4453 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4455 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4456 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4462 if(!default_ref_list_done){
4463 fill_default_ref_list(h);
4466 if(decode_ref_pic_list_reordering(h) < 0)
4469 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4470 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4471 pred_weight_table(h);
4472 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4473 implicit_weight_table(h);
4477 if(s->current_picture.reference)
4478 decode_ref_pic_marking(h);
4480 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4481 h->cabac_init_idc = get_ue_golomb(&s->gb);
4483 h->last_qscale_diff = 0;
4484 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4485 if(s->qscale<0 || s->qscale>51){
4486 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4489 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4490 //FIXME qscale / qp ... stuff
4491 if(h->slice_type == SP_TYPE){
4492 get_bits1(&s->gb); /* sp_for_switch_flag */
4494 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4495 get_se_golomb(&s->gb); /* slice_qs_delta */
4498 h->deblocking_filter = 1;
4499 h->slice_alpha_c0_offset = 0;
4500 h->slice_beta_offset = 0;
4501 if( h->pps.deblocking_filter_parameters_present ) {
4502 h->deblocking_filter= get_ue_golomb(&s->gb);
4503 if(h->deblocking_filter < 2)
4504 h->deblocking_filter^= 1; // 1<->0
4506 if( h->deblocking_filter ) {
4507 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4508 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4511 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4512 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4513 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4514 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4515 h->deblocking_filter= 0;
4518 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4519 slice_group_change_cycle= get_bits(&s->gb, ?);
4524 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4525 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4527 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4529 av_get_pict_type_char(h->slice_type),
4530 pps_id, h->frame_num,
4531 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4532 h->ref_count[0], h->ref_count[1],
4534 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4536 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4546 static inline int get_level_prefix(GetBitContext *gb){
4550 OPEN_READER(re, gb);
4551 UPDATE_CACHE(re, gb);
4552 buf=GET_CACHE(re, gb);
4554 log= 32 - av_log2(buf);
4556 print_bin(buf>>(32-log), log);
4557 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4560 LAST_SKIP_BITS(re, gb, log);
4561 CLOSE_READER(re, gb);
4566 static inline int get_dct8x8_allowed(H264Context *h){
4569 if(!IS_SUB_8X8(h->sub_mb_type[i])
4570 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4577 * decodes a residual block.
4578 * @param n block index
4579 * @param scantable scantable
4580 * @param max_coeff number of coefficients in the block
4581 * @return <0 if an error occured
4583 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4584 MpegEncContext * const s = &h->s;
4585 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4587 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4589 //FIXME put trailing_onex into the context
4591 if(n == CHROMA_DC_BLOCK_INDEX){
4592 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4593 total_coeff= coeff_token>>2;
4595 if(n == LUMA_DC_BLOCK_INDEX){
4596 total_coeff= pred_non_zero_count(h, 0);
4597 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4598 total_coeff= coeff_token>>2;
4600 total_coeff= pred_non_zero_count(h, n);
4601 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4602 total_coeff= coeff_token>>2;
4603 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4607 //FIXME set last_non_zero?
4612 trailing_ones= coeff_token&3;
4613 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4614 assert(total_coeff<=16);
4616 for(i=0; i<trailing_ones; i++){
4617 level[i]= 1 - 2*get_bits1(gb);
4621 int level_code, mask;
4622 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4623 int prefix= get_level_prefix(gb);
4625 //first coefficient has suffix_length equal to 0 or 1
4626 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4628 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4630 level_code= (prefix<<suffix_length); //part
4631 }else if(prefix==14){
4633 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4635 level_code= prefix + get_bits(gb, 4); //part
4636 }else if(prefix==15){
4637 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4638 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4640 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4644 if(trailing_ones < 3) level_code += 2;
4649 mask= -(level_code&1);
4650 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4653 //remaining coefficients have suffix_length > 0
4654 for(;i<total_coeff;i++) {
4655 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4656 prefix = get_level_prefix(gb);
4658 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4659 }else if(prefix==15){
4660 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4662 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4665 mask= -(level_code&1);
4666 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4667 if(level_code > suffix_limit[suffix_length])
4672 if(total_coeff == max_coeff)
4675 if(n == CHROMA_DC_BLOCK_INDEX)
4676 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4678 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4681 coeff_num = zeros_left + total_coeff - 1;
4682 j = scantable[coeff_num];
4684 block[j] = level[0];
4685 for(i=1;i<total_coeff;i++) {
4688 else if(zeros_left < 7){
4689 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4691 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4693 zeros_left -= run_before;
4694 coeff_num -= 1 + run_before;
4695 j= scantable[ coeff_num ];
4700 block[j] = (level[0] * qmul[j] + 32)>>6;
4701 for(i=1;i<total_coeff;i++) {
4704 else if(zeros_left < 7){
4705 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4707 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4709 zeros_left -= run_before;
4710 coeff_num -= 1 + run_before;
4711 j= scantable[ coeff_num ];
4713 block[j]= (level[i] * qmul[j] + 32)>>6;
4718 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4726 * decodes a P_SKIP or B_SKIP macroblock
4728 static void decode_mb_skip(H264Context *h){
4729 MpegEncContext * const s = &h->s;
4730 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4733 memset(h->non_zero_count[mb_xy], 0, 16);
4734 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4736 if(h->mb_aff_frame && s->mb_skip_run==0 && (s->mb_y&1)==0){
4737 h->mb_field_decoding_flag= get_bits1(&s->gb);
4739 if(h->mb_field_decoding_flag)
4740 mb_type|= MB_TYPE_INTERLACED;
4742 if( h->slice_type == B_TYPE )
4744 // just for fill_caches. pred_direct_motion will set the real mb_type
4745 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4747 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4748 pred_direct_motion(h, &mb_type);
4750 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4751 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
4757 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4759 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4760 pred_pskip_motion(h, &mx, &my);
4761 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4762 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4764 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4767 write_back_motion(h, mb_type);
4768 s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP;
4769 s->current_picture.qscale_table[mb_xy]= s->qscale;
4770 h->slice_table[ mb_xy ]= h->slice_num;
4771 h->prev_mb_skipped= 1;
4775 * decodes a macroblock
4776 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4778 static int decode_mb_cavlc(H264Context *h){
4779 MpegEncContext * const s = &h->s;
4780 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4781 int mb_type, partition_count, cbp;
4782 int dct8x8_allowed= h->pps.transform_8x8_mode;
4784 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4786 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4787 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4789 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4790 if(s->mb_skip_run==-1)
4791 s->mb_skip_run= get_ue_golomb(&s->gb);
4793 if (s->mb_skip_run--) {
4798 if(h->mb_aff_frame){
4799 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
4800 h->mb_field_decoding_flag = get_bits1(&s->gb);
4802 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4804 h->prev_mb_skipped= 0;
4806 mb_type= get_ue_golomb(&s->gb);
4807 if(h->slice_type == B_TYPE){
4809 partition_count= b_mb_type_info[mb_type].partition_count;
4810 mb_type= b_mb_type_info[mb_type].type;
4813 goto decode_intra_mb;
4815 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4817 partition_count= p_mb_type_info[mb_type].partition_count;
4818 mb_type= p_mb_type_info[mb_type].type;
4821 goto decode_intra_mb;
4824 assert(h->slice_type == I_TYPE);
4827 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4831 cbp= i_mb_type_info[mb_type].cbp;
4832 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4833 mb_type= i_mb_type_info[mb_type].type;
4836 if(h->mb_field_decoding_flag)
4837 mb_type |= MB_TYPE_INTERLACED;
4839 h->slice_table[ mb_xy ]= h->slice_num;
4841 if(IS_INTRA_PCM(mb_type)){
4844 // we assume these blocks are very rare so we dont optimize it
4845 align_get_bits(&s->gb);
4847 // The pixels are stored in the same order as levels in h->mb array.
4848 for(y=0; y<16; y++){
4849 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4850 for(x=0; x<16; x++){
4851 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4852 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4856 const int index= 256 + 4*(y&3) + 32*(y>>2);
4858 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4859 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4863 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4865 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4866 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4870 // In deblocking, the quantizer is 0
4871 s->current_picture.qscale_table[mb_xy]= 0;
4872 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
4873 // All coeffs are present
4874 memset(h->non_zero_count[mb_xy], 16, 16);
4876 s->current_picture.mb_type[mb_xy]= mb_type;
4880 fill_caches(h, mb_type, 0);
4883 if(IS_INTRA(mb_type)){
4884 // init_top_left_availability(h);
4885 if(IS_INTRA4x4(mb_type)){
4888 if(dct8x8_allowed && get_bits1(&s->gb)){
4889 mb_type |= MB_TYPE_8x8DCT;
4893 // fill_intra4x4_pred_table(h);
4894 for(i=0; i<16; i+=di){
4895 const int mode_coded= !get_bits1(&s->gb);
4896 const int predicted_mode= pred_intra_mode(h, i);
4900 const int rem_mode= get_bits(&s->gb, 3);
4901 if(rem_mode<predicted_mode)
4906 mode= predicted_mode;
4910 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4912 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4914 write_back_intra_pred_mode(h);
4915 if( check_intra4x4_pred_mode(h) < 0)
4918 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4919 if(h->intra16x16_pred_mode < 0)
4922 h->chroma_pred_mode= get_ue_golomb(&s->gb);
4924 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
4925 if(h->chroma_pred_mode < 0)
4927 }else if(partition_count==4){
4928 int i, j, sub_partition_count[4], list, ref[2][4];
4930 if(h->slice_type == B_TYPE){
4932 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4933 if(h->sub_mb_type[i] >=13){
4934 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4937 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4938 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4940 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4941 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4942 pred_direct_motion(h, &mb_type);
4943 h->ref_cache[0][scan8[4]] =
4944 h->ref_cache[1][scan8[4]] =
4945 h->ref_cache[0][scan8[12]] =
4946 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4949 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4951 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4952 if(h->sub_mb_type[i] >=4){
4953 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4956 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4957 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4961 for(list=0; list<2; list++){
4962 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4963 if(ref_count == 0) continue;
4964 if (h->mb_aff_frame && h->mb_field_decoding_flag) {
4968 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4969 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4970 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4979 dct8x8_allowed = get_dct8x8_allowed(h);
4981 for(list=0; list<2; list++){
4982 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4983 if(ref_count == 0) continue;
4986 if(IS_DIRECT(h->sub_mb_type[i])) {
4987 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4990 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4991 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4993 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4994 const int sub_mb_type= h->sub_mb_type[i];
4995 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4996 for(j=0; j<sub_partition_count[i]; j++){
4998 const int index= 4*i + block_width*j;
4999 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5000 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
5001 mx += get_se_golomb(&s->gb);
5002 my += get_se_golomb(&s->gb);
5003 tprintf("final mv:%d %d\n", mx, my);
5005 if(IS_SUB_8X8(sub_mb_type)){
5006 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5007 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5008 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5009 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5010 }else if(IS_SUB_8X4(sub_mb_type)){
5011 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5012 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5013 }else if(IS_SUB_4X8(sub_mb_type)){
5014 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5015 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5017 assert(IS_SUB_4X4(sub_mb_type));
5018 mv_cache[ 0 ][0]= mx;
5019 mv_cache[ 0 ][1]= my;
5023 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5029 }else if(IS_DIRECT(mb_type)){
5030 pred_direct_motion(h, &mb_type);
5031 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5033 int list, mx, my, i;
5034 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5035 if(IS_16X16(mb_type)){
5036 for(list=0; list<2; list++){
5037 if(h->ref_count[list]>0){
5038 if(IS_DIR(mb_type, 0, list)){
5039 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5040 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5042 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
5045 for(list=0; list<2; list++){
5046 if(IS_DIR(mb_type, 0, list)){
5047 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5048 mx += get_se_golomb(&s->gb);
5049 my += get_se_golomb(&s->gb);
5050 tprintf("final mv:%d %d\n", mx, my);
5052 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5054 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5057 else if(IS_16X8(mb_type)){
5058 for(list=0; list<2; list++){
5059 if(h->ref_count[list]>0){
5061 if(IS_DIR(mb_type, i, list)){
5062 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5063 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5065 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5069 for(list=0; list<2; list++){
5071 if(IS_DIR(mb_type, i, list)){
5072 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5073 mx += get_se_golomb(&s->gb);
5074 my += get_se_golomb(&s->gb);
5075 tprintf("final mv:%d %d\n", mx, my);
5077 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5079 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5083 assert(IS_8X16(mb_type));
5084 for(list=0; list<2; list++){
5085 if(h->ref_count[list]>0){
5087 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5088 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5089 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5091 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5095 for(list=0; list<2; list++){
5097 if(IS_DIR(mb_type, i, list)){
5098 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5099 mx += get_se_golomb(&s->gb);
5100 my += get_se_golomb(&s->gb);
5101 tprintf("final mv:%d %d\n", mx, my);
5103 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5105 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5111 if(IS_INTER(mb_type))
5112 write_back_motion(h, mb_type);
5114 if(!IS_INTRA16x16(mb_type)){
5115 cbp= get_ue_golomb(&s->gb);
5117 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5121 if(IS_INTRA4x4(mb_type))
5122 cbp= golomb_to_intra4x4_cbp[cbp];
5124 cbp= golomb_to_inter_cbp[cbp];
5127 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5128 if(get_bits1(&s->gb))
5129 mb_type |= MB_TYPE_8x8DCT;
5131 s->current_picture.mb_type[mb_xy]= mb_type;
5133 if(cbp || IS_INTRA16x16(mb_type)){
5134 int i8x8, i4x4, chroma_idx;
5135 int chroma_qp, dquant;
5136 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5137 const uint8_t *scan, *scan8x8, *dc_scan;
5139 // fill_non_zero_count_cache(h);
5141 if(IS_INTERLACED(mb_type)){
5142 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5143 dc_scan= luma_dc_field_scan;
5145 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5146 dc_scan= luma_dc_zigzag_scan;
5148 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5150 dquant= get_se_golomb(&s->gb);
5152 if( dquant > 25 || dquant < -26 ){
5153 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5157 s->qscale += dquant;
5158 if(((unsigned)s->qscale) > 51){
5159 if(s->qscale<0) s->qscale+= 52;
5160 else s->qscale-= 52;
5163 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5164 if(IS_INTRA16x16(mb_type)){
5165 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5166 return -1; //FIXME continue if partitioned and other return -1 too
5169 assert((cbp&15) == 0 || (cbp&15) == 15);
5172 for(i8x8=0; i8x8<4; i8x8++){
5173 for(i4x4=0; i4x4<4; i4x4++){
5174 const int index= i4x4 + 4*i8x8;
5175 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5181 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5184 for(i8x8=0; i8x8<4; i8x8++){
5185 if(cbp & (1<<i8x8)){
5186 if(IS_8x8DCT(mb_type)){
5187 DCTELEM *buf = &h->mb[64*i8x8];
5189 for(i4x4=0; i4x4<4; i4x4++){
5190 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5191 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5194 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5195 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5197 for(i4x4=0; i4x4<4; i4x4++){
5198 const int index= i4x4 + 4*i8x8;
5200 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5206 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5207 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5213 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5214 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5220 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5221 for(i4x4=0; i4x4<4; i4x4++){
5222 const int index= 16 + 4*chroma_idx + i4x4;
5223 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5229 uint8_t * const nnz= &h->non_zero_count_cache[0];
5230 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5231 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5234 uint8_t * const nnz= &h->non_zero_count_cache[0];
5235 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5236 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5237 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5239 s->current_picture.qscale_table[mb_xy]= s->qscale;
5240 write_back_non_zero_count(h);
5245 static int decode_cabac_field_decoding_flag(H264Context *h) {
5246 MpegEncContext * const s = &h->s;
5247 const int mb_x = s->mb_x;
5248 const int mb_y = s->mb_y & ~1;
5249 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5250 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5252 unsigned int ctx = 0;
5254 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5257 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5261 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5264 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5265 uint8_t *state= &h->cabac_state[ctx_base];
5269 MpegEncContext * const s = &h->s;
5270 const int mba_xy = h->left_mb_xy[0];
5271 const int mbb_xy = h->top_mb_xy;
5273 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5275 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5277 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5278 return 0; /* I4x4 */
5281 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5282 return 0; /* I4x4 */
5285 if( get_cabac_terminate( &h->cabac ) )
5286 return 25; /* PCM */
5288 mb_type = 1; /* I16x16 */
5289 mb_type += 12 * get_cabac( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5290 if( get_cabac( &h->cabac, &state[2] ) ) /* cbp_chroma */
5291 mb_type += 4 + 4 * get_cabac( &h->cabac, &state[2+intra_slice] );
5292 mb_type += 2 * get_cabac( &h->cabac, &state[3+intra_slice] );
5293 mb_type += 1 * get_cabac( &h->cabac, &state[3+2*intra_slice] );
5297 static int decode_cabac_mb_type( H264Context *h ) {
5298 MpegEncContext * const s = &h->s;
5300 if( h->slice_type == I_TYPE ) {
5301 return decode_cabac_intra_mb_type(h, 3, 1);
5302 } else if( h->slice_type == P_TYPE ) {
5303 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5305 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5306 /* P_L0_D16x16, P_8x8 */
5307 return 3 * get_cabac( &h->cabac, &h->cabac_state[16] );
5309 /* P_L0_D8x16, P_L0_D16x8 */
5310 return 2 - get_cabac( &h->cabac, &h->cabac_state[17] );
5313 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5315 } else if( h->slice_type == B_TYPE ) {
5316 const int mba_xy = h->left_mb_xy[0];
5317 const int mbb_xy = h->top_mb_xy;
5321 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5323 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5326 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5327 return 0; /* B_Direct_16x16 */
5329 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5330 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5333 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5334 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5335 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5336 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5338 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5339 else if( bits == 13 ) {
5340 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5341 } else if( bits == 14 )
5342 return 11; /* B_L1_L0_8x16 */
5343 else if( bits == 15 )
5344 return 22; /* B_8x8 */
5346 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5347 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5349 /* TODO SI/SP frames? */
5354 static int decode_cabac_mb_skip( H264Context *h) {
5355 MpegEncContext * const s = &h->s;
5356 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5357 const int mba_xy = mb_xy - 1;
5358 const int mbb_xy = mb_xy - s->mb_stride;
5361 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5363 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5366 if( h->slice_type == B_TYPE )
5368 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5371 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5374 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5377 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5378 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5379 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5381 if( mode >= pred_mode )
5387 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5388 const int mba_xy = h->left_mb_xy[0];
5389 const int mbb_xy = h->top_mb_xy;
5393 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5394 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5397 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5400 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5403 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5405 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5411 static const uint8_t block_idx_x[16] = {
5412 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5414 static const uint8_t block_idx_y[16] = {
5415 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5417 static const uint8_t block_idx_xy[4][4] = {
5424 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5429 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5431 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5434 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5439 x = block_idx_x[4*i8x8];
5440 y = block_idx_y[4*i8x8];
5444 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5445 cbp_a = h->left_cbp;
5446 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5452 /* No need to test for skip as we put 0 for skip block */
5453 /* No need to test for IPCM as we put 1 for IPCM block */
5455 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5456 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5461 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5462 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5466 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5472 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5476 cbp_a = (h->left_cbp>>4)&0x03;
5477 cbp_b = (h-> top_cbp>>4)&0x03;
5480 if( cbp_a > 0 ) ctx++;
5481 if( cbp_b > 0 ) ctx += 2;
5482 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5486 if( cbp_a == 2 ) ctx++;
5487 if( cbp_b == 2 ) ctx += 2;
5488 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5490 static int decode_cabac_mb_dqp( H264Context *h) {
5491 MpegEncContext * const s = &h->s;
5497 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5499 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5501 if( h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) )
5504 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5510 if(val > 102) //prevent infinite loop
5517 return -(val + 1)/2;
5519 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5520 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5522 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5524 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5528 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5530 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5531 return 0; /* B_Direct_8x8 */
5532 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5533 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5535 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5536 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5537 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5540 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5541 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5545 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5546 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5549 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5550 int refa = h->ref_cache[list][scan8[n] - 1];
5551 int refb = h->ref_cache[list][scan8[n] - 8];
5555 if( h->slice_type == B_TYPE) {
5556 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5558 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5567 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5577 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5578 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5579 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5580 int ctxbase = (l == 0) ? 40 : 47;
5585 else if( amvd > 32 )
5590 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5595 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5603 while( get_cabac_bypass( &h->cabac ) ) {
5608 if( get_cabac_bypass( &h->cabac ) )
5612 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
5616 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5621 nza = h->left_cbp&0x100;
5622 nzb = h-> top_cbp&0x100;
5623 } else if( cat == 1 || cat == 2 ) {
5624 nza = h->non_zero_count_cache[scan8[idx] - 1];
5625 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5626 } else if( cat == 3 ) {
5627 nza = (h->left_cbp>>(6+idx))&0x01;
5628 nzb = (h-> top_cbp>>(6+idx))&0x01;
5631 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5632 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5641 return ctx + 4 * cat;
5644 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5645 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5646 static const int significant_coeff_flag_field_offset[2] = { 105, 277 };
5647 static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 };
5648 static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 };
5649 static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 };
5650 static const int coeff_abs_level_m1_offset[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 };
5651 static const int significant_coeff_flag_offset_8x8[63] = {
5652 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5653 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5654 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5655 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
5657 static const int last_coeff_flag_offset_8x8[63] = {
5658 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5659 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5660 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5661 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5667 int coeff_count = 0;
5670 int abslevelgt1 = 0;
5672 uint8_t *significant_coeff_ctx_base;
5673 uint8_t *last_coeff_ctx_base;
5674 uint8_t *abs_level_m1_ctx_base;
5676 /* cat: 0-> DC 16x16 n = 0
5677 * 1-> AC 16x16 n = luma4x4idx
5678 * 2-> Luma4x4 n = luma4x4idx
5679 * 3-> DC Chroma n = iCbCr
5680 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5681 * 5-> Luma8x8 n = 4 * luma8x8idx
5684 /* read coded block flag */
5686 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5687 if( cat == 1 || cat == 2 )
5688 h->non_zero_count_cache[scan8[n]] = 0;
5690 h->non_zero_count_cache[scan8[16+n]] = 0;
5696 significant_coeff_ctx_base = h->cabac_state
5697 + significant_coeff_flag_offset[cat]
5698 + significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5699 last_coeff_ctx_base = h->cabac_state
5700 + last_significant_coeff_flag_offset[cat]
5701 + last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5702 abs_level_m1_ctx_base = h->cabac_state
5703 + coeff_abs_level_m1_offset[cat];
5706 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5707 for(last= 0; last < coefs; last++) { \
5708 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5709 if( get_cabac( &h->cabac, sig_ctx )) { \
5710 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5711 index[coeff_count++] = last; \
5712 if( get_cabac( &h->cabac, last_ctx ) ) { \
5718 DECODE_SIGNIFICANCE( 63, significant_coeff_flag_offset_8x8[last],
5719 last_coeff_flag_offset_8x8[last] );
5721 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5723 if( last == max_coeff -1 ) {
5724 index[coeff_count++] = last;
5726 assert(coeff_count > 0);
5729 h->cbp_table[mb_xy] |= 0x100;
5730 else if( cat == 1 || cat == 2 )
5731 h->non_zero_count_cache[scan8[n]] = coeff_count;
5733 h->cbp_table[mb_xy] |= 0x40 << n;
5735 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5738 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5741 for( i = coeff_count - 1; i >= 0; i-- ) {
5742 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5743 int j= scantable[index[i]];
5745 if( get_cabac( &h->cabac, ctx ) == 0 ) {
5747 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
5750 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6;
5751 else block[j] = ( qmul[j] + 32) >> 6;
5757 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5758 while( coeff_abs < 15 && get_cabac( &h->cabac, ctx ) ) {
5762 if( coeff_abs >= 15 ) {
5764 while( get_cabac_bypass( &h->cabac ) ) {
5765 coeff_abs += 1 << j;
5770 if( get_cabac_bypass( &h->cabac ) )
5771 coeff_abs += 1 << j ;
5776 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
5777 else block[j] = coeff_abs;
5779 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5780 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5789 static void inline compute_mb_neighbors(H264Context *h)
5791 MpegEncContext * const s = &h->s;
5792 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5793 h->top_mb_xy = mb_xy - s->mb_stride;
5794 h->left_mb_xy[0] = mb_xy - 1;
5795 if(h->mb_aff_frame){
5796 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5797 const int top_pair_xy = pair_xy - s->mb_stride;
5798 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5799 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5800 const int curr_mb_frame_flag = !h->mb_field_decoding_flag;
5801 const int bottom = (s->mb_y & 1);
5803 ? !curr_mb_frame_flag // bottom macroblock
5804 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5806 h->top_mb_xy -= s->mb_stride;
5808 if (left_mb_frame_flag != curr_mb_frame_flag) {
5809 h->left_mb_xy[0] = pair_xy - 1;
5816 * decodes a macroblock
5817 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5819 static int decode_mb_cabac(H264Context *h) {
5820 MpegEncContext * const s = &h->s;
5821 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5822 int mb_type, partition_count, cbp = 0;
5823 int dct8x8_allowed= h->pps.transform_8x8_mode;
5825 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5827 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5828 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5829 /* read skip flags */
5830 if( decode_cabac_mb_skip( h ) ) {
5833 h->cbp_table[mb_xy] = 0;
5834 h->chroma_pred_mode_table[mb_xy] = 0;
5835 h->last_qscale_diff = 0;
5841 if(h->mb_aff_frame){
5842 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
5843 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5845 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5847 h->prev_mb_skipped = 0;
5849 compute_mb_neighbors(h);
5850 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5851 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5855 if( h->slice_type == B_TYPE ) {
5857 partition_count= b_mb_type_info[mb_type].partition_count;
5858 mb_type= b_mb_type_info[mb_type].type;
5861 goto decode_intra_mb;
5863 } else if( h->slice_type == P_TYPE ) {
5865 partition_count= p_mb_type_info[mb_type].partition_count;
5866 mb_type= p_mb_type_info[mb_type].type;
5869 goto decode_intra_mb;
5872 assert(h->slice_type == I_TYPE);
5874 partition_count = 0;
5875 cbp= i_mb_type_info[mb_type].cbp;
5876 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5877 mb_type= i_mb_type_info[mb_type].type;
5879 if(h->mb_field_decoding_flag)
5880 mb_type |= MB_TYPE_INTERLACED;
5882 h->slice_table[ mb_xy ]= h->slice_num;
5884 if(IS_INTRA_PCM(mb_type)) {
5888 // We assume these blocks are very rare so we dont optimize it.
5889 // FIXME The two following lines get the bitstream position in the cabac
5890 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5891 ptr= h->cabac.bytestream;
5892 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
5894 // The pixels are stored in the same order as levels in h->mb array.
5895 for(y=0; y<16; y++){
5896 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5897 for(x=0; x<16; x++){
5898 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
5899 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5903 const int index= 256 + 4*(y&3) + 32*(y>>2);
5905 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5906 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5910 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5912 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5913 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5917 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5919 // All blocks are present
5920 h->cbp_table[mb_xy] = 0x1ef;
5921 h->chroma_pred_mode_table[mb_xy] = 0;
5922 // In deblocking, the quantizer is 0
5923 s->current_picture.qscale_table[mb_xy]= 0;
5924 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5925 // All coeffs are present
5926 memset(h->non_zero_count[mb_xy], 16, 16);
5927 s->current_picture.mb_type[mb_xy]= mb_type;
5931 fill_caches(h, mb_type, 0);
5933 if( IS_INTRA( mb_type ) ) {
5935 if( IS_INTRA4x4( mb_type ) ) {
5936 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5937 mb_type |= MB_TYPE_8x8DCT;
5938 for( i = 0; i < 16; i+=4 ) {
5939 int pred = pred_intra_mode( h, i );
5940 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5941 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5944 for( i = 0; i < 16; i++ ) {
5945 int pred = pred_intra_mode( h, i );
5946 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5948 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5951 write_back_intra_pred_mode(h);
5952 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5954 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5955 if( h->intra16x16_pred_mode < 0 ) return -1;
5957 h->chroma_pred_mode_table[mb_xy] =
5958 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5960 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
5961 if( h->chroma_pred_mode < 0 ) return -1;
5962 } else if( partition_count == 4 ) {
5963 int i, j, sub_partition_count[4], list, ref[2][4];
5965 if( h->slice_type == B_TYPE ) {
5966 for( i = 0; i < 4; i++ ) {
5967 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5968 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5969 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5971 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5972 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5973 pred_direct_motion(h, &mb_type);
5974 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5975 for( i = 0; i < 4; i++ )
5976 if( IS_DIRECT(h->sub_mb_type[i]) )
5977 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5981 for( i = 0; i < 4; i++ ) {
5982 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5983 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5984 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5988 for( list = 0; list < 2; list++ ) {
5989 if( h->ref_count[list] > 0 ) {
5990 for( i = 0; i < 4; i++ ) {
5991 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5992 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5993 if( h->ref_count[list] > 1 )
5994 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6000 h->ref_cache[list][ scan8[4*i]+1 ]=
6001 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6007 dct8x8_allowed = get_dct8x8_allowed(h);
6009 for(list=0; list<2; list++){
6011 if(IS_DIRECT(h->sub_mb_type[i])){
6012 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6015 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6017 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6018 const int sub_mb_type= h->sub_mb_type[i];
6019 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6020 for(j=0; j<sub_partition_count[i]; j++){
6023 const int index= 4*i + block_width*j;
6024 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6025 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6026 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6028 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6029 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6030 tprintf("final mv:%d %d\n", mx, my);
6032 if(IS_SUB_8X8(sub_mb_type)){
6033 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
6034 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6035 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
6036 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6038 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
6039 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6040 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
6041 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6042 }else if(IS_SUB_8X4(sub_mb_type)){
6043 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
6044 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
6046 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
6047 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
6048 }else if(IS_SUB_4X8(sub_mb_type)){
6049 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
6050 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
6052 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
6053 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
6055 assert(IS_SUB_4X4(sub_mb_type));
6056 mv_cache[ 0 ][0]= mx;
6057 mv_cache[ 0 ][1]= my;
6059 mvd_cache[ 0 ][0]= mx - mpx;
6060 mvd_cache[ 0 ][1]= my - mpy;
6064 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6065 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6066 p[0] = p[1] = p[8] = p[9] = 0;
6067 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6071 } else if( IS_DIRECT(mb_type) ) {
6072 pred_direct_motion(h, &mb_type);
6073 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6074 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6075 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6077 int list, mx, my, i, mpx, mpy;
6078 if(IS_16X16(mb_type)){
6079 for(list=0; list<2; list++){
6080 if(IS_DIR(mb_type, 0, list)){
6081 if(h->ref_count[list] > 0 ){
6082 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6083 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6086 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6088 for(list=0; list<2; list++){
6089 if(IS_DIR(mb_type, 0, list)){
6090 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6092 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6093 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6094 tprintf("final mv:%d %d\n", mx, my);
6096 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6097 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6099 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6102 else if(IS_16X8(mb_type)){
6103 for(list=0; list<2; list++){
6104 if(h->ref_count[list]>0){
6106 if(IS_DIR(mb_type, i, list)){
6107 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6108 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6110 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6114 for(list=0; list<2; list++){
6116 if(IS_DIR(mb_type, i, list)){
6117 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6118 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6119 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6120 tprintf("final mv:%d %d\n", mx, my);
6122 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6123 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6125 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6126 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6131 assert(IS_8X16(mb_type));
6132 for(list=0; list<2; list++){
6133 if(h->ref_count[list]>0){
6135 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6136 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6137 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6139 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6143 for(list=0; list<2; list++){
6145 if(IS_DIR(mb_type, i, list)){
6146 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6147 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6148 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6150 tprintf("final mv:%d %d\n", mx, my);
6151 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6152 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6154 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6155 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6162 if( IS_INTER( mb_type ) ) {
6163 h->chroma_pred_mode_table[mb_xy] = 0;
6164 write_back_motion( h, mb_type );
6167 if( !IS_INTRA16x16( mb_type ) ) {
6168 cbp = decode_cabac_mb_cbp_luma( h );
6169 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6172 h->cbp_table[mb_xy] = cbp;
6174 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6175 if( decode_cabac_mb_transform_size( h ) )
6176 mb_type |= MB_TYPE_8x8DCT;
6178 s->current_picture.mb_type[mb_xy]= mb_type;
6180 if( cbp || IS_INTRA16x16( mb_type ) ) {
6181 const uint8_t *scan, *scan8x8, *dc_scan;
6184 if(IS_INTERLACED(mb_type)){
6185 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6186 dc_scan= luma_dc_field_scan;
6188 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6189 dc_scan= luma_dc_zigzag_scan;
6191 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6193 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6194 if( dqp == INT_MIN ){
6195 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6199 if(((unsigned)s->qscale) > 51){
6200 if(s->qscale<0) s->qscale+= 52;
6201 else s->qscale-= 52;
6203 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6205 if( IS_INTRA16x16( mb_type ) ) {
6207 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6208 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6211 for( i = 0; i < 16; i++ ) {
6212 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6213 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6217 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6221 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6222 if( cbp & (1<<i8x8) ) {
6223 if( IS_8x8DCT(mb_type) ) {
6224 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6225 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6228 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6229 const int index = 4*i8x8 + i4x4;
6230 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6231 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6235 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6236 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6243 for( c = 0; c < 2; c++ ) {
6244 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6245 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6252 for( c = 0; c < 2; c++ ) {
6253 for( i = 0; i < 4; i++ ) {
6254 const int index = 16 + 4 * c + i;
6255 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6256 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6261 uint8_t * const nnz= &h->non_zero_count_cache[0];
6262 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6263 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6266 uint8_t * const nnz= &h->non_zero_count_cache[0];
6267 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6268 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6269 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6272 s->current_picture.qscale_table[mb_xy]= s->qscale;
6273 write_back_non_zero_count(h);
6279 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6281 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6282 const int alpha = alpha_table[index_a];
6283 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6288 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6289 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6291 /* 16px edge length, because bS=4 is triggered by being at
6292 * the edge of an intra MB, so all 4 bS are the same */
6293 for( d = 0; d < 16; d++ ) {
6294 const int p0 = pix[-1];
6295 const int p1 = pix[-2];
6296 const int p2 = pix[-3];
6298 const int q0 = pix[0];
6299 const int q1 = pix[1];
6300 const int q2 = pix[2];
6302 if( ABS( p0 - q0 ) < alpha &&
6303 ABS( p1 - p0 ) < beta &&
6304 ABS( q1 - q0 ) < beta ) {
6306 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6307 if( ABS( p2 - p0 ) < beta)
6309 const int p3 = pix[-4];
6311 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6312 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6313 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6316 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6318 if( ABS( q2 - q0 ) < beta)
6320 const int q3 = pix[3];
6322 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6323 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6324 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6327 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6331 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6332 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6334 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6340 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6342 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6343 const int alpha = alpha_table[index_a];
6344 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6349 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6350 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6352 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6356 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6358 for( i = 0; i < 16; i++, pix += stride) {
6364 int bS_index = (i >> 1);
6365 if (h->mb_field_decoding_flag) {
6367 bS_index |= (i & 1);
6370 if( bS[bS_index] == 0 ) {
6374 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6375 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6376 alpha = alpha_table[index_a];
6377 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6380 if( bS[bS_index] < 4 ) {
6381 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6382 /* 4px edge length */
6383 const int p0 = pix[-1];
6384 const int p1 = pix[-2];
6385 const int p2 = pix[-3];
6386 const int q0 = pix[0];
6387 const int q1 = pix[1];
6388 const int q2 = pix[2];
6390 if( ABS( p0 - q0 ) < alpha &&
6391 ABS( p1 - p0 ) < beta &&
6392 ABS( q1 - q0 ) < beta ) {
6396 if( ABS( p2 - p0 ) < beta ) {
6397 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6400 if( ABS( q2 - q0 ) < beta ) {
6401 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6405 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6406 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6407 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6408 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6411 /* 4px edge length */
6412 const int p0 = pix[-1];
6413 const int p1 = pix[-2];
6414 const int p2 = pix[-3];
6416 const int q0 = pix[0];
6417 const int q1 = pix[1];
6418 const int q2 = pix[2];
6420 if( ABS( p0 - q0 ) < alpha &&
6421 ABS( p1 - p0 ) < beta &&
6422 ABS( q1 - q0 ) < beta ) {
6424 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6425 if( ABS( p2 - p0 ) < beta)
6427 const int p3 = pix[-4];
6429 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6430 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6431 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6434 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6436 if( ABS( q2 - q0 ) < beta)
6438 const int q3 = pix[3];
6440 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6441 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6442 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6445 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6449 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6450 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6452 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6457 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp[2] ) {
6459 for( i = 0; i < 8; i++, pix += stride) {
6467 if( bS[bS_index] == 0 ) {
6471 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6472 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6473 alpha = alpha_table[index_a];
6474 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6475 if( bS[bS_index] < 4 ) {
6476 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6477 /* 2px edge length (because we use same bS than the one for luma) */
6478 const int p0 = pix[-1];
6479 const int p1 = pix[-2];
6480 const int q0 = pix[0];
6481 const int q1 = pix[1];
6483 if( ABS( p0 - q0 ) < alpha &&
6484 ABS( p1 - p0 ) < beta &&
6485 ABS( q1 - q0 ) < beta ) {
6486 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6488 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6489 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6490 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6493 const int p0 = pix[-1];
6494 const int p1 = pix[-2];
6495 const int q0 = pix[0];
6496 const int q1 = pix[1];
6498 if( ABS( p0 - q0 ) < alpha &&
6499 ABS( p1 - p0 ) < beta &&
6500 ABS( q1 - q0 ) < beta ) {
6502 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6503 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6504 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6510 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6512 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6513 const int alpha = alpha_table[index_a];
6514 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6515 const int pix_next = stride;
6520 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6521 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6523 /* 16px edge length, see filter_mb_edgev */
6524 for( d = 0; d < 16; d++ ) {
6525 const int p0 = pix[-1*pix_next];
6526 const int p1 = pix[-2*pix_next];
6527 const int p2 = pix[-3*pix_next];
6528 const int q0 = pix[0];
6529 const int q1 = pix[1*pix_next];
6530 const int q2 = pix[2*pix_next];
6532 if( ABS( p0 - q0 ) < alpha &&
6533 ABS( p1 - p0 ) < beta &&
6534 ABS( q1 - q0 ) < beta ) {
6536 const int p3 = pix[-4*pix_next];
6537 const int q3 = pix[ 3*pix_next];
6539 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6540 if( ABS( p2 - p0 ) < beta) {
6542 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6543 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6544 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6547 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6549 if( ABS( q2 - q0 ) < beta) {
6551 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6552 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6553 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6556 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6560 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6561 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6563 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6570 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6572 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6573 const int alpha = alpha_table[index_a];
6574 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6579 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6580 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6582 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6586 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6587 MpegEncContext * const s = &h->s;
6588 const int mb_xy= mb_x + mb_y*s->mb_stride;
6589 int first_vertical_edge_done = 0;
6591 /* FIXME: A given frame may occupy more than one position in
6592 * the reference list. So ref2frm should be populated with
6593 * frame numbers, not indices. */
6594 static const int ref2frm[18] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6596 //for sufficiently low qp, filtering wouldn't do anything
6597 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6598 if(!h->mb_aff_frame){
6599 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
6600 int qp = s->current_picture.qscale_table[mb_xy];
6602 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6603 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6609 // left mb is in picture
6610 && h->slice_table[mb_xy-1] != 255
6611 // and current and left pair do not have the same interlaced type
6612 && (IS_INTERLACED(s->current_picture.mb_type[mb_xy]) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6613 // and left mb is in the same slice if deblocking_filter == 2
6614 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6615 /* First vertical edge is different in MBAFF frames
6616 * There are 8 different bS to compute and 2 different Qp
6623 first_vertical_edge_done = 1;
6624 for( i = 0; i < 8; i++ ) {
6626 int b_idx= 8 + 4 + 8*y;
6627 int bn_idx= b_idx - 1;
6629 int mbn_xy = h->mb_field_decoding_flag ? h->left_mb_xy[i>>2] : h->left_mb_xy[i&1];
6631 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6632 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6634 } else if( h->non_zero_count_cache[b_idx] != 0 ||
6635 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6636 h->non_zero_count_cache[bn_idx] != 0 ) {
6641 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6642 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6643 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6644 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6651 if(bS[0]+bS[1]+bS[2]+bS[3] != 0) {
6652 // Do not use s->qscale as luma quantizer because it has not the same
6653 // value in IPCM macroblocks.
6654 qp[0] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[0]] + 1 ) >> 1;
6655 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6656 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[0]] ) + 1 ) >> 1;
6657 qp[1] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[1]] + 1 ) >> 1;
6658 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6659 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[1]] ) + 1 ) >> 1;
6662 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
6663 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6664 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6665 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
6666 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
6669 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6670 for( dir = 0; dir < 2; dir++ )
6673 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6674 const int mb_type = s->current_picture.mb_type[mb_xy];
6675 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6676 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6678 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6679 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6680 // how often to recheck mv-based bS when iterating between edges
6681 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6682 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6683 // how often to recheck mv-based bS when iterating along each edge
6684 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6686 if (first_vertical_edge_done) {
6688 first_vertical_edge_done = 0;
6691 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6695 for( edge = start; edge < edges; edge++ ) {
6696 /* mbn_xy: neighbor macroblock */
6697 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6698 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6702 if( (edge&1) && IS_8x8DCT(mb_type) )
6705 if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0)
6706 && !IS_INTERLACED(mb_type)
6707 && IS_INTERLACED(mbn_type)
6709 // This is a special case in the norm where the filtering must
6710 // be done twice (one each of the field) even if we are in a
6711 // frame macroblock.
6713 unsigned int tmp_linesize = 2 * linesize;
6714 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6715 int mbn_xy = mb_xy - 2 * s->mb_stride;
6719 if( IS_INTRA(mb_type) ||
6720 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6721 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6724 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6727 // Do not use s->qscale as luma quantizer because it has not the same
6728 // value in IPCM macroblocks.
6729 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6730 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6731 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6732 filter_mb_edgeh( h, &img_y[0], tmp_linesize, bS, qp );
6733 chroma_qp = ( h->chroma_qp +
6734 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6735 filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp );
6736 filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp );
6739 mbn_xy += s->mb_stride;
6740 if( IS_INTRA(mb_type) ||
6741 IS_INTRA(mbn_type) ) {
6742 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6745 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6748 // Do not use s->qscale as luma quantizer because it has not the same
6749 // value in IPCM macroblocks.
6750 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6751 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6752 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6753 filter_mb_edgeh( h, &img_y[linesize], tmp_linesize, bS, qp );
6754 chroma_qp = ( h->chroma_qp +
6755 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6756 filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6757 filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6760 if( IS_INTRA(mb_type) ||
6761 IS_INTRA(mbn_type) ) {
6764 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6765 || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6774 bS[0] = bS[1] = bS[2] = bS[3] = value;
6779 if( edge & mask_edge ) {
6780 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6783 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6784 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6785 int bn_idx= b_idx - (dir ? 8:1);
6787 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6788 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6789 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6790 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4;
6792 bS[0] = bS[1] = bS[2] = bS[3] = v;
6798 for( i = 0; i < 4; i++ ) {
6799 int x = dir == 0 ? edge : i;
6800 int y = dir == 0 ? i : edge;
6801 int b_idx= 8 + 4 + x + 8*y;
6802 int bn_idx= b_idx - (dir ? 8:1);
6804 if( h->non_zero_count_cache[b_idx] != 0 ||
6805 h->non_zero_count_cache[bn_idx] != 0 ) {
6811 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6812 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6813 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6814 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6822 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6827 // Do not use s->qscale as luma quantizer because it has not the same
6828 // value in IPCM macroblocks.
6829 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6830 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6831 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6832 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6834 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6835 if( (edge&1) == 0 ) {
6836 int chroma_qp = ( h->chroma_qp +
6837 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6838 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
6839 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
6842 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6843 if( (edge&1) == 0 ) {
6844 int chroma_qp = ( h->chroma_qp +
6845 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6846 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6847 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6854 static int decode_slice(H264Context *h){
6855 MpegEncContext * const s = &h->s;
6856 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6860 if( h->pps.cabac ) {
6864 align_get_bits( &s->gb );
6867 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
6868 ff_init_cabac_decoder( &h->cabac,
6869 s->gb.buffer + get_bits_count(&s->gb)/8,
6870 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6871 /* calculate pre-state */
6872 for( i= 0; i < 460; i++ ) {
6874 if( h->slice_type == I_TYPE )
6875 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6877 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6880 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6882 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6886 int ret = decode_mb_cabac(h);
6889 if(ret>=0) hl_decode_mb(h);
6891 /* XXX: useless as decode_mb_cabac it doesn't support that ... */
6892 if( ret >= 0 && h->mb_aff_frame ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6895 if(ret>=0) ret = decode_mb_cabac(h);
6897 if(ret>=0) hl_decode_mb(h);
6900 eos = get_cabac_terminate( &h->cabac );
6902 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
6903 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6904 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6908 if( ++s->mb_x >= s->mb_width ) {
6910 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6912 if(h->mb_aff_frame) {
6917 if( eos || s->mb_y >= s->mb_height ) {
6918 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6919 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6926 int ret = decode_mb_cavlc(h);
6928 if(ret>=0) hl_decode_mb(h);
6930 if(ret>=0 && h->mb_aff_frame){ //FIXME optimal? or let mb_decode decode 16x32 ?
6932 ret = decode_mb_cavlc(h);
6934 if(ret>=0) hl_decode_mb(h);
6939 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6940 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6945 if(++s->mb_x >= s->mb_width){
6947 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6949 if(h->mb_aff_frame) {
6952 if(s->mb_y >= s->mb_height){
6953 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6955 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6956 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6960 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6967 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6968 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6969 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6970 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6974 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6983 for(;s->mb_y < s->mb_height; s->mb_y++){
6984 for(;s->mb_x < s->mb_width; s->mb_x++){
6985 int ret= decode_mb(h);
6990 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6991 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6996 if(++s->mb_x >= s->mb_width){
6998 if(++s->mb_y >= s->mb_height){
6999 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7000 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7004 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7011 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7012 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7013 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7017 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7024 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7027 return -1; //not reached
7030 static int decode_unregistered_user_data(H264Context *h, int size){
7031 MpegEncContext * const s = &h->s;
7032 uint8_t user_data[16+256];
7038 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7039 user_data[i]= get_bits(&s->gb, 8);
7043 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7044 if(e==1 && build>=0)
7045 h->x264_build= build;
7047 if(s->avctx->debug & FF_DEBUG_BUGS)
7048 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7051 skip_bits(&s->gb, 8);
7056 static int decode_sei(H264Context *h){
7057 MpegEncContext * const s = &h->s;
7059 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7064 type+= show_bits(&s->gb, 8);
7065 }while(get_bits(&s->gb, 8) == 255);
7069 size+= show_bits(&s->gb, 8);
7070 }while(get_bits(&s->gb, 8) == 255);
7074 if(decode_unregistered_user_data(h, size) < 0);
7078 skip_bits(&s->gb, 8*size);
7081 //FIXME check bits here
7082 align_get_bits(&s->gb);
7088 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7089 MpegEncContext * const s = &h->s;
7091 cpb_count = get_ue_golomb(&s->gb) + 1;
7092 get_bits(&s->gb, 4); /* bit_rate_scale */
7093 get_bits(&s->gb, 4); /* cpb_size_scale */
7094 for(i=0; i<cpb_count; i++){
7095 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7096 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7097 get_bits1(&s->gb); /* cbr_flag */
7099 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7100 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7101 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7102 get_bits(&s->gb, 5); /* time_offset_length */
7105 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7106 MpegEncContext * const s = &h->s;
7107 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7108 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7110 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7112 if( aspect_ratio_info_present_flag ) {
7113 aspect_ratio_idc= get_bits(&s->gb, 8);
7114 if( aspect_ratio_idc == EXTENDED_SAR ) {
7115 sps->sar.num= get_bits(&s->gb, 16);
7116 sps->sar.den= get_bits(&s->gb, 16);
7117 }else if(aspect_ratio_idc < 14){
7118 sps->sar= pixel_aspect[aspect_ratio_idc];
7120 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7127 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7129 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7130 get_bits1(&s->gb); /* overscan_appropriate_flag */
7133 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7134 get_bits(&s->gb, 3); /* video_format */
7135 get_bits1(&s->gb); /* video_full_range_flag */
7136 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7137 get_bits(&s->gb, 8); /* colour_primaries */
7138 get_bits(&s->gb, 8); /* transfer_characteristics */
7139 get_bits(&s->gb, 8); /* matrix_coefficients */
7143 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7144 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7145 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7148 sps->timing_info_present_flag = get_bits1(&s->gb);
7149 if(sps->timing_info_present_flag){
7150 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7151 sps->time_scale = get_bits_long(&s->gb, 32);
7152 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7155 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7156 if(nal_hrd_parameters_present_flag)
7157 decode_hrd_parameters(h, sps);
7158 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7159 if(vcl_hrd_parameters_present_flag)
7160 decode_hrd_parameters(h, sps);
7161 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7162 get_bits1(&s->gb); /* low_delay_hrd_flag */
7163 get_bits1(&s->gb); /* pic_struct_present_flag */
7165 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7166 if(sps->bitstream_restriction_flag){
7167 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7168 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7169 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7170 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7171 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7172 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7173 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7179 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7180 const uint8_t *jvt_list, const uint8_t *fallback_list){
7181 MpegEncContext * const s = &h->s;
7182 int i, last = 8, next = 8;
7183 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7184 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7185 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7187 for(i=0;i<size;i++){
7189 next = (last + get_se_golomb(&s->gb)) & 0xff;
7190 if(!i && !next){ /* matrix not written, we use the preset one */
7191 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7194 last = factors[scan[i]] = next ? next : last;
7198 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7199 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7200 MpegEncContext * const s = &h->s;
7201 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7202 const uint8_t *fallback[4] = {
7203 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7204 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7205 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7206 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7208 if(get_bits1(&s->gb)){
7209 sps->scaling_matrix_present |= is_sps;
7210 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7211 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7212 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7213 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7214 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7215 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7216 if(is_sps || pps->transform_8x8_mode){
7217 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7218 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7220 } else if(fallback_sps) {
7221 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7222 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7226 static inline int decode_seq_parameter_set(H264Context *h){
7227 MpegEncContext * const s = &h->s;
7228 int profile_idc, level_idc;
7232 profile_idc= get_bits(&s->gb, 8);
7233 get_bits1(&s->gb); //constraint_set0_flag
7234 get_bits1(&s->gb); //constraint_set1_flag
7235 get_bits1(&s->gb); //constraint_set2_flag
7236 get_bits1(&s->gb); //constraint_set3_flag
7237 get_bits(&s->gb, 4); // reserved
7238 level_idc= get_bits(&s->gb, 8);
7239 sps_id= get_ue_golomb(&s->gb);
7241 sps= &h->sps_buffer[ sps_id ];
7242 sps->profile_idc= profile_idc;
7243 sps->level_idc= level_idc;
7245 if(sps->profile_idc >= 100){ //high profile
7246 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7247 get_bits1(&s->gb); //residual_color_transform_flag
7248 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7249 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7250 sps->transform_bypass = get_bits1(&s->gb);
7251 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7253 sps->scaling_matrix_present = 0;
7255 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7256 sps->poc_type= get_ue_golomb(&s->gb);
7258 if(sps->poc_type == 0){ //FIXME #define
7259 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7260 } else if(sps->poc_type == 1){//FIXME #define
7261 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7262 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7263 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7264 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7266 for(i=0; i<sps->poc_cycle_length; i++)
7267 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7269 if(sps->poc_type > 2){
7270 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7274 sps->ref_frame_count= get_ue_golomb(&s->gb);
7275 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7276 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7278 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7279 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7280 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7281 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7282 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7285 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7286 if(!sps->frame_mbs_only_flag)
7287 sps->mb_aff= get_bits1(&s->gb);
7291 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7293 sps->crop= get_bits1(&s->gb);
7295 sps->crop_left = get_ue_golomb(&s->gb);
7296 sps->crop_right = get_ue_golomb(&s->gb);
7297 sps->crop_top = get_ue_golomb(&s->gb);
7298 sps->crop_bottom= get_ue_golomb(&s->gb);
7299 if(sps->crop_left || sps->crop_top){
7300 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7306 sps->crop_bottom= 0;
7309 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7310 if( sps->vui_parameters_present_flag )
7311 decode_vui_parameters(h, sps);
7313 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7314 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7315 sps_id, sps->profile_idc, sps->level_idc,
7317 sps->ref_frame_count,
7318 sps->mb_width, sps->mb_height,
7319 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7320 sps->direct_8x8_inference_flag ? "8B8" : "",
7321 sps->crop_left, sps->crop_right,
7322 sps->crop_top, sps->crop_bottom,
7323 sps->vui_parameters_present_flag ? "VUI" : ""
7329 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7330 MpegEncContext * const s = &h->s;
7331 int pps_id= get_ue_golomb(&s->gb);
7332 PPS *pps= &h->pps_buffer[pps_id];
7334 pps->sps_id= get_ue_golomb(&s->gb);
7335 pps->cabac= get_bits1(&s->gb);
7336 pps->pic_order_present= get_bits1(&s->gb);
7337 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7338 if(pps->slice_group_count > 1 ){
7339 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7340 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7341 switch(pps->mb_slice_group_map_type){
7344 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7345 | run_length[ i ] |1 |ue(v) |
7350 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7352 | top_left_mb[ i ] |1 |ue(v) |
7353 | bottom_right_mb[ i ] |1 |ue(v) |
7361 | slice_group_change_direction_flag |1 |u(1) |
7362 | slice_group_change_rate_minus1 |1 |ue(v) |
7367 | slice_group_id_cnt_minus1 |1 |ue(v) |
7368 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7370 | slice_group_id[ i ] |1 |u(v) |
7375 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7376 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7377 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7378 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7382 pps->weighted_pred= get_bits1(&s->gb);
7383 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7384 pps->init_qp= get_se_golomb(&s->gb) + 26;
7385 pps->init_qs= get_se_golomb(&s->gb) + 26;
7386 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7387 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7388 pps->constrained_intra_pred= get_bits1(&s->gb);
7389 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7391 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7392 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7394 if(get_bits_count(&s->gb) < bit_length){
7395 pps->transform_8x8_mode= get_bits1(&s->gb);
7396 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7397 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7400 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7401 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7402 pps_id, pps->sps_id,
7403 pps->cabac ? "CABAC" : "CAVLC",
7404 pps->slice_group_count,
7405 pps->ref_count[0], pps->ref_count[1],
7406 pps->weighted_pred ? "weighted" : "",
7407 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7408 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7409 pps->constrained_intra_pred ? "CONSTR" : "",
7410 pps->redundant_pic_cnt_present ? "REDU" : "",
7411 pps->transform_8x8_mode ? "8x8DCT" : ""
7419 * finds the end of the current frame in the bitstream.
7420 * @return the position of the first byte of the next frame, or -1
7422 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7425 ParseContext *pc = &(h->s.parse_context);
7426 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7427 // mb_addr= pc->mb_addr - 1;
7429 for(i=0; i<=buf_size; i++){
7430 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7431 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7432 if(pc->frame_start_found){
7433 // If there isn't one more byte in the buffer
7434 // the test on first_mb_in_slice cannot be done yet
7435 // do it at next call.
7436 if (i >= buf_size) break;
7437 if (buf[i] & 0x80) {
7438 // first_mb_in_slice is 0, probably the first nal of a new
7440 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7442 pc->frame_start_found= 0;
7446 pc->frame_start_found = 1;
7448 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7449 if(pc->frame_start_found){
7451 pc->frame_start_found= 0;
7456 state= (state<<8) | buf[i];
7460 return END_NOT_FOUND;
7463 static int h264_parse(AVCodecParserContext *s,
7464 AVCodecContext *avctx,
7465 uint8_t **poutbuf, int *poutbuf_size,
7466 const uint8_t *buf, int buf_size)
7468 H264Context *h = s->priv_data;
7469 ParseContext *pc = &h->s.parse_context;
7472 next= find_frame_end(h, buf, buf_size);
7474 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
7480 *poutbuf = (uint8_t *)buf;
7481 *poutbuf_size = buf_size;
7485 static int h264_split(AVCodecContext *avctx,
7486 const uint8_t *buf, int buf_size)
7489 uint32_t state = -1;
7492 for(i=0; i<=buf_size; i++){
7493 if((state&0xFFFFFF1F) == 0x107)
7495 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7497 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
7499 while(i>4 && buf[i-5]==0) i--;
7504 state= (state<<8) | buf[i];
7510 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7511 MpegEncContext * const s = &h->s;
7512 AVCodecContext * const avctx= s->avctx;
7516 for(i=0; i<50; i++){
7517 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7521 s->current_picture_ptr= NULL;
7530 if(buf_index >= buf_size) break;
7532 for(i = 0; i < h->nal_length_size; i++)
7533 nalsize = (nalsize << 8) | buf[buf_index++];
7539 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7544 // start code prefix search
7545 for(; buf_index + 3 < buf_size; buf_index++){
7546 // this should allways succeed in the first iteration
7547 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7551 if(buf_index+3 >= buf_size) break;
7556 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7557 if(ptr[dst_length - 1] == 0) dst_length--;
7558 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
7560 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7561 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7564 if (h->is_avc && (nalsize != consumed))
7565 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7567 buf_index += consumed;
7569 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
7570 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7573 switch(h->nal_unit_type){
7575 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7577 init_get_bits(&s->gb, ptr, bit_length);
7579 h->inter_gb_ptr= &s->gb;
7580 s->data_partitioning = 0;
7582 if(decode_slice_header(h) < 0){
7583 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7586 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
7587 if(h->redundant_pic_count==0 && s->hurry_up < 5
7588 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7589 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7590 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7591 && avctx->skip_frame < AVDISCARD_ALL)
7595 init_get_bits(&s->gb, ptr, bit_length);
7597 h->inter_gb_ptr= NULL;
7598 s->data_partitioning = 1;
7600 if(decode_slice_header(h) < 0){
7601 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7605 init_get_bits(&h->intra_gb, ptr, bit_length);
7606 h->intra_gb_ptr= &h->intra_gb;
7609 init_get_bits(&h->inter_gb, ptr, bit_length);
7610 h->inter_gb_ptr= &h->inter_gb;
7612 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
7614 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7615 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7616 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7617 && avctx->skip_frame < AVDISCARD_ALL)
7621 init_get_bits(&s->gb, ptr, bit_length);
7625 init_get_bits(&s->gb, ptr, bit_length);
7626 decode_seq_parameter_set(h);
7628 if(s->flags& CODEC_FLAG_LOW_DELAY)
7631 if(avctx->has_b_frames < 2)
7632 avctx->has_b_frames= !s->low_delay;
7635 init_get_bits(&s->gb, ptr, bit_length);
7637 decode_picture_parameter_set(h, bit_length);
7641 case NAL_END_SEQUENCE:
7642 case NAL_END_STREAM:
7643 case NAL_FILLER_DATA:
7645 case NAL_AUXILIARY_SLICE:
7648 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
7652 if(!s->current_picture_ptr) return buf_index; //no frame
7654 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7655 s->current_picture_ptr->pict_type= s->pict_type;
7657 h->prev_frame_num_offset= h->frame_num_offset;
7658 h->prev_frame_num= h->frame_num;
7659 if(s->current_picture_ptr->reference){
7660 h->prev_poc_msb= h->poc_msb;
7661 h->prev_poc_lsb= h->poc_lsb;
7663 if(s->current_picture_ptr->reference)
7664 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7674 * returns the number of bytes consumed for building the current frame
7676 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7677 if(s->flags&CODEC_FLAG_TRUNCATED){
7678 pos -= s->parse_context.last_index;
7679 if(pos<0) pos=0; // FIXME remove (unneeded?)
7683 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
7684 if(pos+10>buf_size) pos=buf_size; // oops ;)
7690 static int decode_frame(AVCodecContext *avctx,
7691 void *data, int *data_size,
7692 uint8_t *buf, int buf_size)
7694 H264Context *h = avctx->priv_data;
7695 MpegEncContext *s = &h->s;
7696 AVFrame *pict = data;
7699 s->flags= avctx->flags;
7700 s->flags2= avctx->flags2;
7702 /* no supplementary picture */
7703 if (buf_size == 0) {
7707 if(s->flags&CODEC_FLAG_TRUNCATED){
7708 int next= find_frame_end(h, buf, buf_size);
7710 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7712 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7715 if(h->is_avc && !h->got_avcC) {
7716 int i, cnt, nalsize;
7717 unsigned char *p = avctx->extradata;
7718 if(avctx->extradata_size < 7) {
7719 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7723 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7726 /* sps and pps in the avcC always have length coded with 2 bytes,
7727 so put a fake nal_length_size = 2 while parsing them */
7728 h->nal_length_size = 2;
7729 // Decode sps from avcC
7730 cnt = *(p+5) & 0x1f; // Number of sps
7732 for (i = 0; i < cnt; i++) {
7733 nalsize = BE_16(p) + 2;
7734 if(decode_nal_units(h, p, nalsize) < 0) {
7735 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7740 // Decode pps from avcC
7741 cnt = *(p++); // Number of pps
7742 for (i = 0; i < cnt; i++) {
7743 nalsize = BE_16(p) + 2;
7744 if(decode_nal_units(h, p, nalsize) != nalsize) {
7745 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7750 // Now store right nal length size, that will be use to parse all other nals
7751 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7752 // Do not reparse avcC
7756 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
7757 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7761 buf_index=decode_nal_units(h, buf, buf_size);
7765 //FIXME do something with unavailable reference frames
7767 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
7768 if(!s->current_picture_ptr){
7769 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
7774 Picture *out = s->current_picture_ptr;
7775 #if 0 //decode order
7776 *data_size = sizeof(AVFrame);
7778 /* Sort B-frames into display order */
7779 Picture *cur = s->current_picture_ptr;
7780 Picture *prev = h->delayed_output_pic;
7785 int dropped_frame = 0;
7788 if(h->sps.bitstream_restriction_flag
7789 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7790 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7794 while(h->delayed_pic[pics]) pics++;
7795 h->delayed_pic[pics++] = cur;
7796 if(cur->reference == 0)
7799 for(i=0; h->delayed_pic[i]; i++)
7800 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7803 out = h->delayed_pic[0];
7804 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7805 if(h->delayed_pic[i]->poc < out->poc){
7806 out = h->delayed_pic[i];
7810 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7811 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7813 else if(prev && pics <= s->avctx->has_b_frames)
7815 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7817 ((!cross_idr && prev && out->poc > prev->poc + 2)
7818 || cur->pict_type == B_TYPE)))
7821 s->avctx->has_b_frames++;
7824 else if(out_of_order)
7827 if(out_of_order || pics > s->avctx->has_b_frames){
7828 dropped_frame = (out != h->delayed_pic[out_idx]);
7829 for(i=out_idx; h->delayed_pic[i]; i++)
7830 h->delayed_pic[i] = h->delayed_pic[i+1];
7833 if(prev == out && !dropped_frame)
7836 *data_size = sizeof(AVFrame);
7837 if(prev && prev != out && prev->reference == 1)
7838 prev->reference = 0;
7839 h->delayed_output_pic = out;
7843 *pict= *(AVFrame*)out;
7845 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7848 assert(pict->data[0] || !*data_size);
7849 ff_print_debug_info(s, pict);
7850 //printf("out %d\n", (int)pict->data[0]);
7853 /* Return the Picture timestamp as the frame number */
7854 /* we substract 1 because it is added on utils.c */
7855 avctx->frame_number = s->picture_number - 1;
7857 return get_consumed_bytes(s, buf_index, buf_size);
7860 static inline void fill_mb_avail(H264Context *h){
7861 MpegEncContext * const s = &h->s;
7862 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7865 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7866 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7867 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7873 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7874 h->mb_avail[4]= 1; //FIXME move out
7875 h->mb_avail[5]= 0; //FIXME move out
7881 #define SIZE (COUNT*40)
7887 // int int_temp[10000];
7889 AVCodecContext avctx;
7891 dsputil_init(&dsp, &avctx);
7893 init_put_bits(&pb, temp, SIZE);
7894 printf("testing unsigned exp golomb\n");
7895 for(i=0; i<COUNT; i++){
7897 set_ue_golomb(&pb, i);
7898 STOP_TIMER("set_ue_golomb");
7900 flush_put_bits(&pb);
7902 init_get_bits(&gb, temp, 8*SIZE);
7903 for(i=0; i<COUNT; i++){
7906 s= show_bits(&gb, 24);
7909 j= get_ue_golomb(&gb);
7911 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7914 STOP_TIMER("get_ue_golomb");
7918 init_put_bits(&pb, temp, SIZE);
7919 printf("testing signed exp golomb\n");
7920 for(i=0; i<COUNT; i++){
7922 set_se_golomb(&pb, i - COUNT/2);
7923 STOP_TIMER("set_se_golomb");
7925 flush_put_bits(&pb);
7927 init_get_bits(&gb, temp, 8*SIZE);
7928 for(i=0; i<COUNT; i++){
7931 s= show_bits(&gb, 24);
7934 j= get_se_golomb(&gb);
7935 if(j != i - COUNT/2){
7936 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7939 STOP_TIMER("get_se_golomb");
7942 printf("testing 4x4 (I)DCT\n");
7945 uint8_t src[16], ref[16];
7946 uint64_t error= 0, max_error=0;
7948 for(i=0; i<COUNT; i++){
7950 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7951 for(j=0; j<16; j++){
7952 ref[j]= random()%255;
7953 src[j]= random()%255;
7956 h264_diff_dct_c(block, src, ref, 4);
7959 for(j=0; j<16; j++){
7960 // printf("%d ", block[j]);
7961 block[j]= block[j]*4;
7962 if(j&1) block[j]= (block[j]*4 + 2)/5;
7963 if(j&4) block[j]= (block[j]*4 + 2)/5;
7967 s->dsp.h264_idct_add(ref, block, 4);
7968 /* for(j=0; j<16; j++){
7969 printf("%d ", ref[j]);
7973 for(j=0; j<16; j++){
7974 int diff= ABS(src[j] - ref[j]);
7977 max_error= FFMAX(max_error, diff);
7980 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7982 printf("testing quantizer\n");
7983 for(qp=0; qp<52; qp++){
7985 src1_block[i]= src2_block[i]= random()%255;
7989 printf("Testing NAL layer\n");
7991 uint8_t bitstream[COUNT];
7992 uint8_t nal[COUNT*2];
7994 memset(&h, 0, sizeof(H264Context));
7996 for(i=0; i<COUNT; i++){
8004 for(j=0; j<COUNT; j++){
8005 bitstream[j]= (random() % 255) + 1;
8008 for(j=0; j<zeros; j++){
8009 int pos= random() % COUNT;
8010 while(bitstream[pos] == 0){
8019 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8021 printf("encoding failed\n");
8025 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8029 if(out_length != COUNT){
8030 printf("incorrect length %d %d\n", out_length, COUNT);
8034 if(consumed != nal_length){
8035 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8039 if(memcmp(bitstream, out, COUNT)){
8040 printf("missmatch\n");
8045 printf("Testing RBSP\n");
8053 static int decode_end(AVCodecContext *avctx)
8055 H264Context *h = avctx->priv_data;
8056 MpegEncContext *s = &h->s;
8058 av_freep(&h->rbsp_buffer);
8059 free_tables(h); //FIXME cleanup init stuff perhaps
8062 // memset(h, 0, sizeof(H264Context));
8068 AVCodec h264_decoder = {
8072 sizeof(H264Context),
8077 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8081 AVCodecParser h264_parser = {
8083 sizeof(H264Context),