2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
58 * Sequence parameter set
64 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
65 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
66 int poc_type; ///< pic_order_cnt_type
67 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
68 int delta_pic_order_always_zero_flag;
69 int offset_for_non_ref_pic;
70 int offset_for_top_to_bottom_field;
71 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
72 int ref_frame_count; ///< num_ref_frames
73 int gaps_in_frame_num_allowed_flag;
74 int mb_width; ///< frame_width_in_mbs_minus1 + 1
75 int mb_height; ///< frame_height_in_mbs_minus1 + 1
76 int frame_mbs_only_flag;
77 int mb_aff; ///<mb_adaptive_frame_field_flag
78 int direct_8x8_inference_flag;
79 int crop; ///< frame_cropping_flag
80 int crop_left; ///< frame_cropping_rect_left_offset
81 int crop_right; ///< frame_cropping_rect_right_offset
82 int crop_top; ///< frame_cropping_rect_top_offset
83 int crop_bottom; ///< frame_cropping_rect_bottom_offset
84 int vui_parameters_present_flag;
86 int timing_info_present_flag;
87 uint32_t num_units_in_tick;
89 int fixed_frame_rate_flag;
90 short offset_for_ref_frame[256]; //FIXME dyn aloc?
91 int bitstream_restriction_flag;
92 int num_reorder_frames;
96 * Picture parameter set
100 int cabac; ///< entropy_coding_mode_flag
101 int pic_order_present; ///< pic_order_present_flag
102 int slice_group_count; ///< num_slice_groups_minus1 + 1
103 int mb_slice_group_map_type;
104 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
105 int weighted_pred; ///< weighted_pred_flag
106 int weighted_bipred_idc;
107 int init_qp; ///< pic_init_qp_minus26 + 26
108 int init_qs; ///< pic_init_qs_minus26 + 26
109 int chroma_qp_index_offset;
110 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
111 int constrained_intra_pred; ///< constrained_intra_pred_flag
112 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
113 int transform_8x8_mode; ///< transform_8x8_mode_flag
117 * Memory management control operation opcode.
119 typedef enum MMCOOpcode{
130 * Memory management control operation.
141 typedef struct H264Context{
149 #define NAL_IDR_SLICE 5
153 #define NAL_PICTURE_DELIMITER 9
154 #define NAL_FILTER_DATA 10
155 uint8_t *rbsp_buffer;
156 int rbsp_buffer_size;
159 * Used to parse AVC variant of h264
161 int is_avc; ///< this flag is != 0 if codec is avc1
162 int got_avcC; ///< flag used to parse avcC data only once
163 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
167 int prev_mb_skipped; //FIXME remove (IMHO not used)
170 int chroma_pred_mode;
171 int intra16x16_pred_mode;
176 int8_t intra4x4_pred_mode_cache[5*8];
177 int8_t (*intra4x4_pred_mode)[8];
178 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
179 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
180 void (*pred8x8 [4+3])(uint8_t *src, int stride);
181 void (*pred16x16[4+3])(uint8_t *src, int stride);
182 unsigned int topleft_samples_available;
183 unsigned int top_samples_available;
184 unsigned int topright_samples_available;
185 unsigned int left_samples_available;
186 uint8_t (*top_borders[2])[16+2*8];
187 uint8_t left_border[2*(17+2*9)];
190 * non zero coeff count cache.
191 * is 64 if not available.
193 uint8_t non_zero_count_cache[6*8] __align8;
194 uint8_t (*non_zero_count)[16];
197 * Motion vector cache.
199 int16_t mv_cache[2][5*8][2] __align8;
200 int8_t ref_cache[2][5*8] __align8;
201 #define LIST_NOT_USED -1 //FIXME rename?
202 #define PART_NOT_AVAILABLE -2
205 * is 1 if the specific list MV&references are set to 0,0,-2.
207 int mv_cache_clean[2];
210 * number of neighbors (top and/or left) that used 8x8 dct
212 int neighbor_transform_size;
215 * block_offset[ 0..23] for frame macroblocks
216 * block_offset[24..47] for field macroblocks
218 int block_offset[2*(16+8)];
220 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
222 int b_stride; //FIXME use s->b4_stride
228 int unknown_svq3_flag;
229 int next_slice_index;
231 SPS sps_buffer[MAX_SPS_COUNT];
232 SPS sps; ///< current sps
234 PPS pps_buffer[MAX_PPS_COUNT];
238 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
240 uint16_t (*dequant4_coeff)[16]; // FIXME quant matrices should be per SPS or PPS
241 uint16_t (*dequant8_coeff)[64];
244 uint8_t *slice_table_base;
245 uint8_t *slice_table; ///< slice_table_base + mb_stride + 1
247 int slice_type_fixed;
249 //interlacing specific flags
251 int mb_field_decoding_flag;
258 int delta_poc_bottom;
261 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
262 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
263 int frame_num_offset; ///< for POC type 2
264 int prev_frame_num_offset; ///< for POC type 2
265 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
268 * frame_num for frames or 2*frame_num for field pics.
273 * max_frame_num or 2*max_frame_num for field pics.
277 //Weighted pred stuff
279 int use_weight_chroma;
280 int luma_log2_weight_denom;
281 int chroma_log2_weight_denom;
282 int luma_weight[2][16];
283 int luma_offset[2][16];
284 int chroma_weight[2][16][2];
285 int chroma_offset[2][16][2];
286 int implicit_weight[16][16];
289 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
290 int slice_alpha_c0_offset;
291 int slice_beta_offset;
293 int redundant_pic_count;
295 int direct_spatial_mv_pred;
296 int dist_scale_factor[16];
297 int map_col_to_list0[2][16];
300 * num_ref_idx_l0/1_active_minus1 + 1
302 int ref_count[2];// FIXME split for AFF
303 Picture *short_ref[32];
304 Picture *long_ref[32];
305 Picture default_ref_list[2][32];
306 Picture ref_list[2][32]; //FIXME size?
307 Picture field_ref_list[2][32]; //FIXME size?
308 Picture *delayed_pic[16]; //FIXME size?
309 Picture *delayed_output_pic;
312 * memory management control operations buffer.
314 MMCO mmco[MAX_MMCO_COUNT];
317 int long_ref_count; ///< number of actual long term references
318 int short_ref_count; ///< number of actual short term references
321 GetBitContext intra_gb;
322 GetBitContext inter_gb;
323 GetBitContext *intra_gb_ptr;
324 GetBitContext *inter_gb_ptr;
326 DCTELEM mb[16*24] __align8;
332 uint8_t cabac_state[460];
335 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
339 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
340 uint8_t *chroma_pred_mode_table;
341 int last_qscale_diff;
342 int16_t (*mvd_table[2])[2];
343 int16_t mvd_cache[2][5*8][2] __align8;
344 uint8_t *direct_table;
345 uint8_t direct_cache[5*8];
347 uint8_t zigzag_scan[16];
348 uint8_t field_scan[16];
349 const uint8_t *zigzag_scan_q0;
350 const uint8_t *field_scan_q0;
355 static VLC coeff_token_vlc[4];
356 static VLC chroma_dc_coeff_token_vlc;
358 static VLC total_zeros_vlc[15];
359 static VLC chroma_dc_total_zeros_vlc[3];
361 static VLC run_vlc[6];
364 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
365 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
366 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
368 static inline uint32_t pack16to32(int a, int b){
369 #ifdef WORDS_BIGENDIAN
370 return (b&0xFFFF) + (a<<16);
372 return (a&0xFFFF) + (b<<16);
378 * @param h height of the rectangle, should be a constant
379 * @param w width of the rectangle, should be a constant
380 * @param size the size of val (1 or 4), should be a constant
382 static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
383 uint8_t *p= (uint8_t*)vp;
384 assert(size==1 || size==4);
389 assert((((int)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
390 assert((stride&(w-1))==0);
391 //FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it
394 *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
395 }else if(w==2 && h==4){
396 *(uint16_t*)(p + 0*stride)=
397 *(uint16_t*)(p + 1*stride)=
398 *(uint16_t*)(p + 2*stride)=
399 *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
400 }else if(w==4 && h==1){
401 *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
402 }else if(w==4 && h==2){
403 *(uint32_t*)(p + 0*stride)=
404 *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
405 }else if(w==4 && h==4){
406 *(uint32_t*)(p + 0*stride)=
407 *(uint32_t*)(p + 1*stride)=
408 *(uint32_t*)(p + 2*stride)=
409 *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
410 }else if(w==8 && h==1){
412 *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
413 }else if(w==8 && h==2){
414 *(uint32_t*)(p + 0 + 0*stride)=
415 *(uint32_t*)(p + 4 + 0*stride)=
416 *(uint32_t*)(p + 0 + 1*stride)=
417 *(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101;
418 }else if(w==8 && h==4){
419 *(uint64_t*)(p + 0*stride)=
420 *(uint64_t*)(p + 1*stride)=
421 *(uint64_t*)(p + 2*stride)=
422 *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
423 }else if(w==16 && h==2){
424 *(uint64_t*)(p + 0+0*stride)=
425 *(uint64_t*)(p + 8+0*stride)=
426 *(uint64_t*)(p + 0+1*stride)=
427 *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
428 }else if(w==16 && h==4){
429 *(uint64_t*)(p + 0+0*stride)=
430 *(uint64_t*)(p + 8+0*stride)=
431 *(uint64_t*)(p + 0+1*stride)=
432 *(uint64_t*)(p + 8+1*stride)=
433 *(uint64_t*)(p + 0+2*stride)=
434 *(uint64_t*)(p + 8+2*stride)=
435 *(uint64_t*)(p + 0+3*stride)=
436 *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
441 static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
442 MpegEncContext * const s = &h->s;
443 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
444 int topleft_xy, top_xy, topright_xy, left_xy[2];
445 int topleft_type, top_type, topright_type, left_type[2];
449 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
450 // the actual condition is whether we're on the edge of a slice,
451 // and even then the intra and nnz parts are unnecessary.
452 if(for_deblock && h->slice_num == 1)
455 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
457 top_xy = mb_xy - s->mb_stride;
458 topleft_xy = top_xy - 1;
459 topright_xy= top_xy + 1;
460 left_xy[1] = left_xy[0] = mb_xy-1;
470 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
471 const int top_pair_xy = pair_xy - s->mb_stride;
472 const int topleft_pair_xy = top_pair_xy - 1;
473 const int topright_pair_xy = top_pair_xy + 1;
474 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
475 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
476 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
477 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
478 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
479 const int bottom = (s->mb_y & 1);
480 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
482 ? !curr_mb_frame_flag // bottom macroblock
483 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
485 top_xy -= s->mb_stride;
488 ? !curr_mb_frame_flag // bottom macroblock
489 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
491 topleft_xy -= s->mb_stride;
494 ? !curr_mb_frame_flag // bottom macroblock
495 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
497 topright_xy -= s->mb_stride;
499 if (left_mb_frame_flag != curr_mb_frame_flag) {
500 left_xy[1] = left_xy[0] = pair_xy - 1;
501 if (curr_mb_frame_flag) {
522 left_xy[1] += s->mb_stride;
535 h->top_mb_xy = top_xy;
536 h->left_mb_xy[0] = left_xy[0];
537 h->left_mb_xy[1] = left_xy[1];
539 topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
540 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
541 topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
542 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
543 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
545 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
546 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
547 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
548 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
549 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
552 if(IS_INTRA(mb_type)){
553 h->topleft_samples_available=
554 h->top_samples_available=
555 h->left_samples_available= 0xFFFF;
556 h->topright_samples_available= 0xEEEA;
558 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
559 h->topleft_samples_available= 0xB3FF;
560 h->top_samples_available= 0x33FF;
561 h->topright_samples_available= 0x26EA;
564 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
565 h->topleft_samples_available&= 0xDF5F;
566 h->left_samples_available&= 0x5F5F;
570 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
571 h->topleft_samples_available&= 0x7FFF;
573 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
574 h->topright_samples_available&= 0xFBFF;
576 if(IS_INTRA4x4(mb_type)){
577 if(IS_INTRA4x4(top_type)){
578 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
579 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
580 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
581 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
584 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
589 h->intra4x4_pred_mode_cache[4+8*0]=
590 h->intra4x4_pred_mode_cache[5+8*0]=
591 h->intra4x4_pred_mode_cache[6+8*0]=
592 h->intra4x4_pred_mode_cache[7+8*0]= pred;
595 if(IS_INTRA4x4(left_type[i])){
596 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
597 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
600 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
605 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
606 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
621 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
623 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
624 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
625 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
626 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
628 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
629 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
631 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
632 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
635 h->non_zero_count_cache[4+8*0]=
636 h->non_zero_count_cache[5+8*0]=
637 h->non_zero_count_cache[6+8*0]=
638 h->non_zero_count_cache[7+8*0]=
640 h->non_zero_count_cache[1+8*0]=
641 h->non_zero_count_cache[2+8*0]=
643 h->non_zero_count_cache[1+8*3]=
644 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
648 for (i=0; i<2; i++) {
650 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
651 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
652 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
653 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
655 h->non_zero_count_cache[3+8*1 + 2*8*i]=
656 h->non_zero_count_cache[3+8*2 + 2*8*i]=
657 h->non_zero_count_cache[0+8*1 + 8*i]=
658 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
665 h->top_cbp = h->cbp_table[top_xy];
666 } else if(IS_INTRA(mb_type)) {
673 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
674 } else if(IS_INTRA(mb_type)) {
680 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
683 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
688 //FIXME direct mb can skip much of this
689 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
691 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
692 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
693 /*if(!h->mv_cache_clean[list]){
694 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
695 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
696 h->mv_cache_clean[list]= 1;
700 h->mv_cache_clean[list]= 0;
702 if(IS_INTER(top_type)){
703 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
704 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
705 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
706 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
707 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
708 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
709 h->ref_cache[list][scan8[0] + 0 - 1*8]=
710 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
711 h->ref_cache[list][scan8[0] + 2 - 1*8]=
712 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
714 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
715 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
716 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
717 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
718 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
721 //FIXME unify cleanup or sth
722 if(IS_INTER(left_type[0])){
723 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
724 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
725 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
726 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
727 h->ref_cache[list][scan8[0] - 1 + 0*8]=
728 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
730 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
731 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
732 h->ref_cache[list][scan8[0] - 1 + 0*8]=
733 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
736 if(IS_INTER(left_type[1])){
737 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
738 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
739 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
740 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
741 h->ref_cache[list][scan8[0] - 1 + 2*8]=
742 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
744 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
745 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
746 h->ref_cache[list][scan8[0] - 1 + 2*8]=
747 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
748 assert((!left_type[0]) == (!left_type[1]));
751 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
754 if(IS_INTER(topleft_type)){
755 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
756 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
757 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
758 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
760 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
761 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
764 if(IS_INTER(topright_type)){
765 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
766 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
767 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
768 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
770 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
771 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
775 h->ref_cache[list][scan8[5 ]+1] =
776 h->ref_cache[list][scan8[7 ]+1] =
777 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
778 h->ref_cache[list][scan8[4 ]] =
779 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
780 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
781 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
782 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
783 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
784 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
787 /* XXX beurk, Load mvd */
788 if(IS_INTER(topleft_type)){
789 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
790 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy];
792 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0;
795 if(IS_INTER(top_type)){
796 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
797 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
798 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
799 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
800 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
802 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
803 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
804 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
805 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
807 if(IS_INTER(left_type[0])){
808 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
809 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
810 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
812 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
813 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
815 if(IS_INTER(left_type[1])){
816 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
817 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
818 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
820 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
821 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
823 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
824 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
825 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
826 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
827 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
829 if(h->slice_type == B_TYPE){
830 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
832 if(IS_DIRECT(top_type)){
833 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
834 }else if(IS_8X8(top_type)){
835 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
836 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
837 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
839 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
843 if(IS_DIRECT(left_type[0])){
844 h->direct_cache[scan8[0] - 1 + 0*8]=
845 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
846 }else if(IS_8X8(left_type[0])){
847 int b8_xy = h->mb2b8_xy[left_xy[0]] + 1;
848 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[b8_xy];
849 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[b8_xy + h->b8_stride];
851 h->direct_cache[scan8[0] - 1 + 0*8]=
852 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
860 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
863 static inline void write_back_intra_pred_mode(H264Context *h){
864 MpegEncContext * const s = &h->s;
865 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
867 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
868 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
869 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
870 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
871 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
872 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
873 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
877 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
879 static inline int check_intra4x4_pred_mode(H264Context *h){
880 MpegEncContext * const s = &h->s;
881 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
882 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
885 if(!(h->top_samples_available&0x8000)){
887 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
889 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
892 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
897 if(!(h->left_samples_available&0x8000)){
899 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
901 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
904 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
910 } //FIXME cleanup like next
913 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
915 static inline int check_intra_pred_mode(H264Context *h, int mode){
916 MpegEncContext * const s = &h->s;
917 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
918 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
920 if(mode < 0 || mode > 6) {
921 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
925 if(!(h->top_samples_available&0x8000)){
928 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
933 if(!(h->left_samples_available&0x8000)){
936 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
945 * gets the predicted intra4x4 prediction mode.
947 static inline int pred_intra_mode(H264Context *h, int n){
948 const int index8= scan8[n];
949 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
950 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
951 const int min= FFMIN(left, top);
953 tprintf("mode:%d %d min:%d\n", left ,top, min);
955 if(min<0) return DC_PRED;
959 static inline void write_back_non_zero_count(H264Context *h){
960 MpegEncContext * const s = &h->s;
961 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
963 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
964 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
965 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
966 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
967 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
968 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
969 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
971 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
972 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
973 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
975 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
976 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
977 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
981 * gets the predicted number of non zero coefficients.
982 * @param n block index
984 static inline int pred_non_zero_count(H264Context *h, int n){
985 const int index8= scan8[n];
986 const int left= h->non_zero_count_cache[index8 - 1];
987 const int top = h->non_zero_count_cache[index8 - 8];
990 if(i<64) i= (i+1)>>1;
992 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
997 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
998 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1000 if(topright_ref != PART_NOT_AVAILABLE){
1001 *C= h->mv_cache[list][ i - 8 + part_width ];
1002 return topright_ref;
1004 tprintf("topright MV not available\n");
1006 *C= h->mv_cache[list][ i - 8 - 1 ];
1007 return h->ref_cache[list][ i - 8 - 1 ];
1012 * gets the predicted MV.
1013 * @param n the block index
1014 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1015 * @param mx the x component of the predicted motion vector
1016 * @param my the y component of the predicted motion vector
1018 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1019 const int index8= scan8[n];
1020 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1021 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1022 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1023 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1025 int diagonal_ref, match_count;
1027 assert(part_width==1 || part_width==2 || part_width==4);
1037 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1038 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1039 tprintf("pred_motion match_count=%d\n", match_count);
1040 if(match_count > 1){ //most common
1041 *mx= mid_pred(A[0], B[0], C[0]);
1042 *my= mid_pred(A[1], B[1], C[1]);
1043 }else if(match_count==1){
1047 }else if(top_ref==ref){
1055 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1059 *mx= mid_pred(A[0], B[0], C[0]);
1060 *my= mid_pred(A[1], B[1], C[1]);
1064 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1068 * gets the directionally predicted 16x8 MV.
1069 * @param n the block index
1070 * @param mx the x component of the predicted motion vector
1071 * @param my the y component of the predicted motion vector
1073 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1075 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1076 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1078 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1086 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1087 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1089 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1091 if(left_ref == ref){
1099 pred_motion(h, n, 4, list, ref, mx, my);
1103 * gets the directionally predicted 8x16 MV.
1104 * @param n the block index
1105 * @param mx the x component of the predicted motion vector
1106 * @param my the y component of the predicted motion vector
1108 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1110 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1111 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1113 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1115 if(left_ref == ref){
1124 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1126 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1128 if(diagonal_ref == ref){
1136 pred_motion(h, n, 2, list, ref, mx, my);
1139 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1140 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1141 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1143 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1145 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1146 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1147 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1153 pred_motion(h, 0, 4, 0, 0, mx, my);
1158 static inline void direct_dist_scale_factor(H264Context * const h){
1159 const int poc = h->s.current_picture_ptr->poc;
1160 const int poc1 = h->ref_list[1][0].poc;
1162 for(i=0; i<h->ref_count[0]; i++){
1163 int poc0 = h->ref_list[0][i].poc;
1164 int td = clip(poc1 - poc0, -128, 127);
1165 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1166 h->dist_scale_factor[i] = 256;
1168 int tb = clip(poc - poc0, -128, 127);
1169 int tx = (16384 + (ABS(td) >> 1)) / td;
1170 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1174 static inline void direct_ref_list_init(H264Context * const h){
1175 MpegEncContext * const s = &h->s;
1176 Picture * const ref1 = &h->ref_list[1][0];
1177 Picture * const cur = s->current_picture_ptr;
1179 if(cur->pict_type == I_TYPE)
1180 cur->ref_count[0] = 0;
1181 if(cur->pict_type != B_TYPE)
1182 cur->ref_count[1] = 0;
1183 for(list=0; list<2; list++){
1184 cur->ref_count[list] = h->ref_count[list];
1185 for(j=0; j<h->ref_count[list]; j++)
1186 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1188 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1190 for(list=0; list<2; list++){
1191 for(i=0; i<ref1->ref_count[list]; i++){
1192 const int poc = ref1->ref_poc[list][i];
1193 h->map_col_to_list0[list][i] = PART_NOT_AVAILABLE;
1194 for(j=0; j<h->ref_count[list]; j++)
1195 if(h->ref_list[list][j].poc == poc){
1196 h->map_col_to_list0[list][i] = j;
1203 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1204 MpegEncContext * const s = &h->s;
1205 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1206 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1207 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1208 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1209 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1210 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1211 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1212 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1213 const int is_b8x8 = IS_8X8(*mb_type);
1217 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1218 /* FIXME save sub mb types from previous frames (or derive from MVs)
1219 * so we know exactly what block size to use */
1220 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1221 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1222 }else if(!is_b8x8 && (IS_16X16(mb_type_col) || IS_INTRA(mb_type_col))){
1223 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1224 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1226 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1227 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1230 *mb_type |= MB_TYPE_DIRECT2;
1232 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1234 if(h->direct_spatial_mv_pred){
1239 /* ref = min(neighbors) */
1240 for(list=0; list<2; list++){
1241 int refa = h->ref_cache[list][scan8[0] - 1];
1242 int refb = h->ref_cache[list][scan8[0] - 8];
1243 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1245 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1247 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1249 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1255 if(ref[0] < 0 && ref[1] < 0){
1256 ref[0] = ref[1] = 0;
1257 mv[0][0] = mv[0][1] =
1258 mv[1][0] = mv[1][1] = 0;
1260 for(list=0; list<2; list++){
1262 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1264 mv[list][0] = mv[list][1] = 0;
1269 *mb_type &= ~MB_TYPE_P0L1;
1270 sub_mb_type &= ~MB_TYPE_P0L1;
1271 }else if(ref[0] < 0){
1272 *mb_type &= ~MB_TYPE_P0L0;
1273 sub_mb_type &= ~MB_TYPE_P0L0;
1276 if(IS_16X16(*mb_type)){
1277 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref[0], 1);
1278 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, ref[1], 1);
1279 if(!IS_INTRA(mb_type_col)
1280 && ( (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1)
1281 || (l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1
1282 && (h->x264_build>33 || !h->x264_build)))){
1284 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1286 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1288 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1290 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1292 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1293 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1296 for(i8=0; i8<4; i8++){
1297 const int x8 = i8&1;
1298 const int y8 = i8>>1;
1300 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1302 h->sub_mb_type[i8] = sub_mb_type;
1304 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1305 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1306 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref[0], 1);
1307 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, ref[1], 1);
1310 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1311 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1312 && (h->x264_build>33 || !h->x264_build)))){
1313 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1314 for(i4=0; i4<4; i4++){
1315 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1316 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1318 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1320 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1326 }else{ /* direct temporal mv pred */
1327 if(IS_16X16(*mb_type)){
1328 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1329 if(IS_INTRA(mb_type_col)){
1330 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1331 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1332 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1334 const int ref0 = l1ref0[0] >= 0 ? h->map_col_to_list0[0][l1ref0[0]]
1335 : h->map_col_to_list0[1][l1ref1[0]];
1336 const int dist_scale_factor = h->dist_scale_factor[ref0];
1337 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1339 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1340 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1341 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1342 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1343 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1346 for(i8=0; i8<4; i8++){
1347 const int x8 = i8&1;
1348 const int y8 = i8>>1;
1349 int ref0, dist_scale_factor;
1350 const int16_t (*l1mv)[2]= l1mv0;
1352 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1354 h->sub_mb_type[i8] = sub_mb_type;
1355 if(IS_INTRA(mb_type_col)){
1356 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1357 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1358 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1359 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1363 ref0 = l1ref0[x8 + y8*h->b8_stride];
1365 ref0 = h->map_col_to_list0[0][ref0];
1367 ref0 = h->map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1370 dist_scale_factor = h->dist_scale_factor[ref0];
1372 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1373 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1374 for(i4=0; i4<4; i4++){
1375 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1376 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1377 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1378 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1379 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1380 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1387 static inline void write_back_motion(H264Context *h, int mb_type){
1388 MpegEncContext * const s = &h->s;
1389 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1390 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1393 for(list=0; list<2; list++){
1395 if(!USES_LIST(mb_type, list)){
1396 if(1){ //FIXME skip or never read if mb_type doesn't use it
1398 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
1399 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
1401 if( h->pps.cabac ) {
1402 /* FIXME needed ? */
1404 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]=
1405 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= 0;
1409 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]=
1410 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= LIST_NOT_USED;
1417 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1418 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1420 if( h->pps.cabac ) {
1422 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1423 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1427 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
1428 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
1432 if(h->slice_type == B_TYPE && h->pps.cabac){
1433 if(IS_8X8(mb_type)){
1434 h->direct_table[b8_xy+1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1435 h->direct_table[b8_xy+0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1436 h->direct_table[b8_xy+1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1442 * Decodes a network abstraction layer unit.
1443 * @param consumed is the number of bytes used as input
1444 * @param length is the length of the array
1445 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1446 * @returns decoded bytes, might be src+1 if no escapes
1448 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1452 // src[0]&0x80; //forbidden bit
1453 h->nal_ref_idc= src[0]>>5;
1454 h->nal_unit_type= src[0]&0x1F;
1458 for(i=0; i<length; i++)
1459 printf("%2X ", src[i]);
1461 for(i=0; i+1<length; i+=2){
1462 if(src[i]) continue;
1463 if(i>0 && src[i-1]==0) i--;
1464 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1466 /* startcode, so we must be past the end */
1473 if(i>=length-1){ //no escaped 0
1474 *dst_length= length;
1475 *consumed= length+1; //+1 for the header
1479 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1480 dst= h->rbsp_buffer;
1482 //printf("decoding esc\n");
1485 //remove escapes (very rare 1:2^22)
1486 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1487 if(src[si+2]==3){ //escape
1492 }else //next start code
1496 dst[di++]= src[si++];
1500 *consumed= si + 1;//+1 for the header
1501 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1507 * @param src the data which should be escaped
1508 * @param dst the target buffer, dst+1 == src is allowed as a special case
1509 * @param length the length of the src data
1510 * @param dst_length the length of the dst array
1511 * @returns length of escaped data in bytes or -1 if an error occured
1513 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1514 int i, escape_count, si, di;
1518 assert(dst_length>0);
1520 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1522 if(length==0) return 1;
1525 for(i=0; i<length; i+=2){
1526 if(src[i]) continue;
1527 if(i>0 && src[i-1]==0)
1529 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1535 if(escape_count==0){
1537 memcpy(dst+1, src, length);
1541 if(length + escape_count + 1> dst_length)
1544 //this should be damn rare (hopefully)
1546 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1547 temp= h->rbsp_buffer;
1548 //printf("encoding esc\n");
1553 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1554 temp[di++]= 0; si++;
1555 temp[di++]= 0; si++;
1557 temp[di++]= src[si++];
1560 temp[di++]= src[si++];
1562 memcpy(dst+1, temp, length+escape_count);
1564 assert(di == length+escape_count);
1570 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1572 static void encode_rbsp_trailing(PutBitContext *pb){
1575 length= (-put_bits_count(pb))&7;
1576 if(length) put_bits(pb, length, 0);
1581 * identifies the exact end of the bitstream
1582 * @return the length of the trailing, or 0 if damaged
1584 static int decode_rbsp_trailing(uint8_t *src){
1588 tprintf("rbsp trailing %X\n", v);
1598 * idct tranforms the 16 dc values and dequantize them.
1599 * @param qp quantization parameter
1601 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
1602 const int qmul= dequant_coeff[qp][0];
1605 int temp[16]; //FIXME check if this is a good idea
1606 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1607 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1609 //memset(block, 64, 2*256);
1612 const int offset= y_offset[i];
1613 const int z0= block[offset+stride*0] + block[offset+stride*4];
1614 const int z1= block[offset+stride*0] - block[offset+stride*4];
1615 const int z2= block[offset+stride*1] - block[offset+stride*5];
1616 const int z3= block[offset+stride*1] + block[offset+stride*5];
1625 const int offset= x_offset[i];
1626 const int z0= temp[4*0+i] + temp[4*2+i];
1627 const int z1= temp[4*0+i] - temp[4*2+i];
1628 const int z2= temp[4*1+i] - temp[4*3+i];
1629 const int z3= temp[4*1+i] + temp[4*3+i];
1631 block[stride*0 +offset]= ((z0 + z3)*qmul + 2)>>2; //FIXME think about merging this into decode_resdual
1632 block[stride*2 +offset]= ((z1 + z2)*qmul + 2)>>2;
1633 block[stride*8 +offset]= ((z1 - z2)*qmul + 2)>>2;
1634 block[stride*10+offset]= ((z0 - z3)*qmul + 2)>>2;
1640 * dct tranforms the 16 dc values.
1641 * @param qp quantization parameter ??? FIXME
1643 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1644 // const int qmul= dequant_coeff[qp][0];
1646 int temp[16]; //FIXME check if this is a good idea
1647 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1648 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1651 const int offset= y_offset[i];
1652 const int z0= block[offset+stride*0] + block[offset+stride*4];
1653 const int z1= block[offset+stride*0] - block[offset+stride*4];
1654 const int z2= block[offset+stride*1] - block[offset+stride*5];
1655 const int z3= block[offset+stride*1] + block[offset+stride*5];
1664 const int offset= x_offset[i];
1665 const int z0= temp[4*0+i] + temp[4*2+i];
1666 const int z1= temp[4*0+i] - temp[4*2+i];
1667 const int z2= temp[4*1+i] - temp[4*3+i];
1668 const int z3= temp[4*1+i] + temp[4*3+i];
1670 block[stride*0 +offset]= (z0 + z3)>>1;
1671 block[stride*2 +offset]= (z1 + z2)>>1;
1672 block[stride*8 +offset]= (z1 - z2)>>1;
1673 block[stride*10+offset]= (z0 - z3)>>1;
1681 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp){
1682 const int qmul= dequant_coeff[qp][0];
1683 const int stride= 16*2;
1684 const int xStride= 16;
1687 a= block[stride*0 + xStride*0];
1688 b= block[stride*0 + xStride*1];
1689 c= block[stride*1 + xStride*0];
1690 d= block[stride*1 + xStride*1];
1697 block[stride*0 + xStride*0]= ((a+c)*qmul + 0)>>1;
1698 block[stride*0 + xStride*1]= ((e+b)*qmul + 0)>>1;
1699 block[stride*1 + xStride*0]= ((a-c)*qmul + 0)>>1;
1700 block[stride*1 + xStride*1]= ((e-b)*qmul + 0)>>1;
1704 static void chroma_dc_dct_c(DCTELEM *block){
1705 const int stride= 16*2;
1706 const int xStride= 16;
1709 a= block[stride*0 + xStride*0];
1710 b= block[stride*0 + xStride*1];
1711 c= block[stride*1 + xStride*0];
1712 d= block[stride*1 + xStride*1];
1719 block[stride*0 + xStride*0]= (a+c);
1720 block[stride*0 + xStride*1]= (e+b);
1721 block[stride*1 + xStride*0]= (a-c);
1722 block[stride*1 + xStride*1]= (e-b);
1727 * gets the chroma qp.
1729 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1731 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
1736 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1738 //FIXME try int temp instead of block
1741 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1742 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1743 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1744 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1745 const int z0= d0 + d3;
1746 const int z3= d0 - d3;
1747 const int z1= d1 + d2;
1748 const int z2= d1 - d2;
1750 block[0 + 4*i]= z0 + z1;
1751 block[1 + 4*i]= 2*z3 + z2;
1752 block[2 + 4*i]= z0 - z1;
1753 block[3 + 4*i]= z3 - 2*z2;
1757 const int z0= block[0*4 + i] + block[3*4 + i];
1758 const int z3= block[0*4 + i] - block[3*4 + i];
1759 const int z1= block[1*4 + i] + block[2*4 + i];
1760 const int z2= block[1*4 + i] - block[2*4 + i];
1762 block[0*4 + i]= z0 + z1;
1763 block[1*4 + i]= 2*z3 + z2;
1764 block[2*4 + i]= z0 - z1;
1765 block[3*4 + i]= z3 - 2*z2;
1770 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1771 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1772 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1774 const int * const quant_table= quant_coeff[qscale];
1775 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1776 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1777 const unsigned int threshold2= (threshold1<<1);
1783 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1784 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1785 const unsigned int dc_threshold2= (dc_threshold1<<1);
1787 int level= block[0]*quant_coeff[qscale+18][0];
1788 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1790 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1793 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1796 // last_non_zero = i;
1801 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1802 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1803 const unsigned int dc_threshold2= (dc_threshold1<<1);
1805 int level= block[0]*quant_table[0];
1806 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1808 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1811 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1814 // last_non_zero = i;
1827 const int j= scantable[i];
1828 int level= block[j]*quant_table[j];
1830 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1831 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1832 if(((unsigned)(level+threshold1))>threshold2){
1834 level= (bias + level)>>QUANT_SHIFT;
1837 level= (bias - level)>>QUANT_SHIFT;
1846 return last_non_zero;
1849 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1850 const uint32_t a= ((uint32_t*)(src-stride))[0];
1851 ((uint32_t*)(src+0*stride))[0]= a;
1852 ((uint32_t*)(src+1*stride))[0]= a;
1853 ((uint32_t*)(src+2*stride))[0]= a;
1854 ((uint32_t*)(src+3*stride))[0]= a;
1857 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1858 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1859 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1860 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1861 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1864 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1865 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1866 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1868 ((uint32_t*)(src+0*stride))[0]=
1869 ((uint32_t*)(src+1*stride))[0]=
1870 ((uint32_t*)(src+2*stride))[0]=
1871 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1874 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1875 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1877 ((uint32_t*)(src+0*stride))[0]=
1878 ((uint32_t*)(src+1*stride))[0]=
1879 ((uint32_t*)(src+2*stride))[0]=
1880 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1883 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1884 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1886 ((uint32_t*)(src+0*stride))[0]=
1887 ((uint32_t*)(src+1*stride))[0]=
1888 ((uint32_t*)(src+2*stride))[0]=
1889 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1892 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1893 ((uint32_t*)(src+0*stride))[0]=
1894 ((uint32_t*)(src+1*stride))[0]=
1895 ((uint32_t*)(src+2*stride))[0]=
1896 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1900 #define LOAD_TOP_RIGHT_EDGE\
1901 const int t4= topright[0];\
1902 const int t5= topright[1];\
1903 const int t6= topright[2];\
1904 const int t7= topright[3];\
1906 #define LOAD_LEFT_EDGE\
1907 const int l0= src[-1+0*stride];\
1908 const int l1= src[-1+1*stride];\
1909 const int l2= src[-1+2*stride];\
1910 const int l3= src[-1+3*stride];\
1912 #define LOAD_TOP_EDGE\
1913 const int t0= src[ 0-1*stride];\
1914 const int t1= src[ 1-1*stride];\
1915 const int t2= src[ 2-1*stride];\
1916 const int t3= src[ 3-1*stride];\
1918 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1919 const int lt= src[-1-1*stride];
1923 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
1925 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
1928 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
1932 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1935 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1937 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1938 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1941 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1946 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1948 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1951 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1955 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1958 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1960 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1961 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1964 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1965 const int lt= src[-1-1*stride];
1968 const __attribute__((unused)) int unu= l3;
1971 src[1+2*stride]=(lt + t0 + 1)>>1;
1973 src[2+2*stride]=(t0 + t1 + 1)>>1;
1975 src[3+2*stride]=(t1 + t2 + 1)>>1;
1976 src[3+0*stride]=(t2 + t3 + 1)>>1;
1978 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1980 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
1982 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1983 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1984 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1985 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1988 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
1991 const __attribute__((unused)) int unu= t7;
1993 src[0+0*stride]=(t0 + t1 + 1)>>1;
1995 src[0+2*stride]=(t1 + t2 + 1)>>1;
1997 src[1+2*stride]=(t2 + t3 + 1)>>1;
1999 src[2+2*stride]=(t3 + t4+ 1)>>1;
2000 src[3+2*stride]=(t4 + t5+ 1)>>1;
2001 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2003 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2005 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2007 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2008 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2011 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2014 src[0+0*stride]=(l0 + l1 + 1)>>1;
2015 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2017 src[0+1*stride]=(l1 + l2 + 1)>>1;
2019 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2021 src[0+2*stride]=(l2 + l3 + 1)>>1;
2023 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2032 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2033 const int lt= src[-1-1*stride];
2036 const __attribute__((unused)) int unu= t3;
2039 src[2+1*stride]=(lt + l0 + 1)>>1;
2041 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2042 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2043 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2045 src[2+2*stride]=(l0 + l1 + 1)>>1;
2047 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2049 src[2+3*stride]=(l1 + l2+ 1)>>1;
2051 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2052 src[0+3*stride]=(l2 + l3 + 1)>>1;
2053 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2056 static void pred16x16_vertical_c(uint8_t *src, int stride){
2058 const uint32_t a= ((uint32_t*)(src-stride))[0];
2059 const uint32_t b= ((uint32_t*)(src-stride))[1];
2060 const uint32_t c= ((uint32_t*)(src-stride))[2];
2061 const uint32_t d= ((uint32_t*)(src-stride))[3];
2063 for(i=0; i<16; i++){
2064 ((uint32_t*)(src+i*stride))[0]= a;
2065 ((uint32_t*)(src+i*stride))[1]= b;
2066 ((uint32_t*)(src+i*stride))[2]= c;
2067 ((uint32_t*)(src+i*stride))[3]= d;
2071 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2074 for(i=0; i<16; i++){
2075 ((uint32_t*)(src+i*stride))[0]=
2076 ((uint32_t*)(src+i*stride))[1]=
2077 ((uint32_t*)(src+i*stride))[2]=
2078 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2082 static void pred16x16_dc_c(uint8_t *src, int stride){
2086 dc+= src[-1+i*stride];
2093 dc= 0x01010101*((dc + 16)>>5);
2095 for(i=0; i<16; i++){
2096 ((uint32_t*)(src+i*stride))[0]=
2097 ((uint32_t*)(src+i*stride))[1]=
2098 ((uint32_t*)(src+i*stride))[2]=
2099 ((uint32_t*)(src+i*stride))[3]= dc;
2103 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2107 dc+= src[-1+i*stride];
2110 dc= 0x01010101*((dc + 8)>>4);
2112 for(i=0; i<16; i++){
2113 ((uint32_t*)(src+i*stride))[0]=
2114 ((uint32_t*)(src+i*stride))[1]=
2115 ((uint32_t*)(src+i*stride))[2]=
2116 ((uint32_t*)(src+i*stride))[3]= dc;
2120 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2126 dc= 0x01010101*((dc + 8)>>4);
2128 for(i=0; i<16; i++){
2129 ((uint32_t*)(src+i*stride))[0]=
2130 ((uint32_t*)(src+i*stride))[1]=
2131 ((uint32_t*)(src+i*stride))[2]=
2132 ((uint32_t*)(src+i*stride))[3]= dc;
2136 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2139 for(i=0; i<16; i++){
2140 ((uint32_t*)(src+i*stride))[0]=
2141 ((uint32_t*)(src+i*stride))[1]=
2142 ((uint32_t*)(src+i*stride))[2]=
2143 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2147 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2150 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2151 const uint8_t * const src0 = src+7-stride;
2152 const uint8_t *src1 = src+8*stride-1;
2153 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2154 int H = src0[1] - src0[-1];
2155 int V = src1[0] - src2[ 0];
2156 for(k=2; k<=8; ++k) {
2157 src1 += stride; src2 -= stride;
2158 H += k*(src0[k] - src0[-k]);
2159 V += k*(src1[0] - src2[ 0]);
2162 H = ( 5*(H/4) ) / 16;
2163 V = ( 5*(V/4) ) / 16;
2165 /* required for 100% accuracy */
2166 i = H; H = V; V = i;
2168 H = ( 5*H+32 ) >> 6;
2169 V = ( 5*V+32 ) >> 6;
2172 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2173 for(j=16; j>0; --j) {
2176 for(i=-16; i<0; i+=4) {
2177 src[16+i] = cm[ (b ) >> 5 ];
2178 src[17+i] = cm[ (b+ H) >> 5 ];
2179 src[18+i] = cm[ (b+2*H) >> 5 ];
2180 src[19+i] = cm[ (b+3*H) >> 5 ];
2187 static void pred16x16_plane_c(uint8_t *src, int stride){
2188 pred16x16_plane_compat_c(src, stride, 0);
2191 static void pred8x8_vertical_c(uint8_t *src, int stride){
2193 const uint32_t a= ((uint32_t*)(src-stride))[0];
2194 const uint32_t b= ((uint32_t*)(src-stride))[1];
2197 ((uint32_t*)(src+i*stride))[0]= a;
2198 ((uint32_t*)(src+i*stride))[1]= b;
2202 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2206 ((uint32_t*)(src+i*stride))[0]=
2207 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2211 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2215 ((uint32_t*)(src+i*stride))[0]=
2216 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2220 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2226 dc0+= src[-1+i*stride];
2227 dc2+= src[-1+(i+4)*stride];
2229 dc0= 0x01010101*((dc0 + 2)>>2);
2230 dc2= 0x01010101*((dc2 + 2)>>2);
2233 ((uint32_t*)(src+i*stride))[0]=
2234 ((uint32_t*)(src+i*stride))[1]= dc0;
2237 ((uint32_t*)(src+i*stride))[0]=
2238 ((uint32_t*)(src+i*stride))[1]= dc2;
2242 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2248 dc0+= src[i-stride];
2249 dc1+= src[4+i-stride];
2251 dc0= 0x01010101*((dc0 + 2)>>2);
2252 dc1= 0x01010101*((dc1 + 2)>>2);
2255 ((uint32_t*)(src+i*stride))[0]= dc0;
2256 ((uint32_t*)(src+i*stride))[1]= dc1;
2259 ((uint32_t*)(src+i*stride))[0]= dc0;
2260 ((uint32_t*)(src+i*stride))[1]= dc1;
2265 static void pred8x8_dc_c(uint8_t *src, int stride){
2267 int dc0, dc1, dc2, dc3;
2271 dc0+= src[-1+i*stride] + src[i-stride];
2272 dc1+= src[4+i-stride];
2273 dc2+= src[-1+(i+4)*stride];
2275 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2276 dc0= 0x01010101*((dc0 + 4)>>3);
2277 dc1= 0x01010101*((dc1 + 2)>>2);
2278 dc2= 0x01010101*((dc2 + 2)>>2);
2281 ((uint32_t*)(src+i*stride))[0]= dc0;
2282 ((uint32_t*)(src+i*stride))[1]= dc1;
2285 ((uint32_t*)(src+i*stride))[0]= dc2;
2286 ((uint32_t*)(src+i*stride))[1]= dc3;
2290 static void pred8x8_plane_c(uint8_t *src, int stride){
2293 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2294 const uint8_t * const src0 = src+3-stride;
2295 const uint8_t *src1 = src+4*stride-1;
2296 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2297 int H = src0[1] - src0[-1];
2298 int V = src1[0] - src2[ 0];
2299 for(k=2; k<=4; ++k) {
2300 src1 += stride; src2 -= stride;
2301 H += k*(src0[k] - src0[-k]);
2302 V += k*(src1[0] - src2[ 0]);
2304 H = ( 17*H+16 ) >> 5;
2305 V = ( 17*V+16 ) >> 5;
2307 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2308 for(j=8; j>0; --j) {
2311 src[0] = cm[ (b ) >> 5 ];
2312 src[1] = cm[ (b+ H) >> 5 ];
2313 src[2] = cm[ (b+2*H) >> 5 ];
2314 src[3] = cm[ (b+3*H) >> 5 ];
2315 src[4] = cm[ (b+4*H) >> 5 ];
2316 src[5] = cm[ (b+5*H) >> 5 ];
2317 src[6] = cm[ (b+6*H) >> 5 ];
2318 src[7] = cm[ (b+7*H) >> 5 ];
2323 #define SRC(x,y) src[(x)+(y)*stride]
2325 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2326 #define PREDICT_8x8_LOAD_LEFT \
2327 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2328 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2329 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2330 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2333 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2334 #define PREDICT_8x8_LOAD_TOP \
2335 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2336 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2337 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2338 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2339 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2342 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2343 #define PREDICT_8x8_LOAD_TOPRIGHT \
2344 int t8, t9, t10, t11, t12, t13, t14, t15; \
2345 if(has_topright) { \
2346 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2347 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2348 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2350 #define PREDICT_8x8_LOAD_TOPLEFT \
2351 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2353 #define PREDICT_8x8_DC(v) \
2355 for( y = 0; y < 8; y++ ) { \
2356 ((uint32_t*)src)[0] = \
2357 ((uint32_t*)src)[1] = v; \
2361 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2363 PREDICT_8x8_DC(0x80808080);
2365 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2367 PREDICT_8x8_LOAD_LEFT;
2368 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2371 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2373 PREDICT_8x8_LOAD_TOP;
2374 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2377 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2379 PREDICT_8x8_LOAD_LEFT;
2380 PREDICT_8x8_LOAD_TOP;
2381 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2382 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2385 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2387 PREDICT_8x8_LOAD_LEFT;
2388 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2389 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2390 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2393 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2396 PREDICT_8x8_LOAD_TOP;
2405 for( y = 1; y < 8; y++ )
2406 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2408 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2410 PREDICT_8x8_LOAD_TOP;
2411 PREDICT_8x8_LOAD_TOPRIGHT;
2412 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2413 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2414 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2415 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2416 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2417 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2418 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2419 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2420 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2421 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2422 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2423 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2424 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2425 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2426 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2428 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2430 PREDICT_8x8_LOAD_TOP;
2431 PREDICT_8x8_LOAD_LEFT;
2432 PREDICT_8x8_LOAD_TOPLEFT;
2433 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2434 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2435 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2436 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2437 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2438 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2439 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2440 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2441 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2442 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2443 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2444 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2445 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2446 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2447 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2450 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2452 PREDICT_8x8_LOAD_TOP;
2453 PREDICT_8x8_LOAD_LEFT;
2454 PREDICT_8x8_LOAD_TOPLEFT;
2455 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2456 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2457 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2458 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2459 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2460 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2461 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2462 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2463 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2464 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2465 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2466 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2467 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2468 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2469 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2470 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2471 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2472 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2473 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2474 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2475 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2476 SRC(7,0)= (t6 + t7 + 1) >> 1;
2478 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2480 PREDICT_8x8_LOAD_TOP;
2481 PREDICT_8x8_LOAD_LEFT;
2482 PREDICT_8x8_LOAD_TOPLEFT;
2483 SRC(0,7)= (l6 + l7 + 1) >> 1;
2484 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2485 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2486 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2487 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2488 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2489 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2490 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2491 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2492 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2493 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2494 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2495 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2496 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2497 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2498 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2499 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2500 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2501 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2502 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2503 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2504 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2506 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2508 PREDICT_8x8_LOAD_TOP;
2509 PREDICT_8x8_LOAD_TOPRIGHT;
2510 SRC(0,0)= (t0 + t1 + 1) >> 1;
2511 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2512 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2513 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2514 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2515 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2516 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2517 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2518 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2519 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2520 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2521 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2522 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2523 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2524 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2525 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2526 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2527 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2528 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2529 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2530 SRC(7,6)= (t10 + t11 + 1) >> 1;
2531 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2533 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2535 PREDICT_8x8_LOAD_LEFT;
2536 SRC(0,0)= (l0 + l1 + 1) >> 1;
2537 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2538 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2539 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2540 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2541 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2542 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2543 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2544 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2545 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2546 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2547 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2548 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2549 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2550 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2551 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2552 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2553 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2555 #undef PREDICT_8x8_LOAD_LEFT
2556 #undef PREDICT_8x8_LOAD_TOP
2557 #undef PREDICT_8x8_LOAD_TOPLEFT
2558 #undef PREDICT_8x8_LOAD_TOPRIGHT
2559 #undef PREDICT_8x8_DC
2565 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2566 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2567 int src_x_offset, int src_y_offset,
2568 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2569 MpegEncContext * const s = &h->s;
2570 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2571 const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2572 const int luma_xy= (mx&3) + ((my&3)<<2);
2573 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
2574 uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
2575 uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
2576 int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
2577 int extra_height= extra_width;
2579 const int full_mx= mx>>2;
2580 const int full_my= my>>2;
2581 const int pic_width = 16*s->mb_width;
2582 const int pic_height = 16*s->mb_height;
2584 assert(pic->data[0]);
2586 if(mx&7) extra_width -= 3;
2587 if(my&7) extra_height -= 3;
2589 if( full_mx < 0-extra_width
2590 || full_my < 0-extra_height
2591 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2592 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2593 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2594 src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
2598 qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
2600 qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
2603 if(s->flags&CODEC_FLAG_GRAY) return;
2606 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2607 src_cb= s->edge_emu_buffer;
2609 chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
2612 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2613 src_cr= s->edge_emu_buffer;
2615 chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
2618 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2619 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2620 int x_offset, int y_offset,
2621 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2622 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2623 int list0, int list1){
2624 MpegEncContext * const s = &h->s;
2625 qpel_mc_func *qpix_op= qpix_put;
2626 h264_chroma_mc_func chroma_op= chroma_put;
2628 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2629 dest_cb += x_offset + y_offset*s->uvlinesize;
2630 dest_cr += x_offset + y_offset*s->uvlinesize;
2631 x_offset += 8*s->mb_x;
2632 y_offset += 8*s->mb_y;
2635 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2636 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2637 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2638 qpix_op, chroma_op);
2641 chroma_op= chroma_avg;
2645 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2646 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2647 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2648 qpix_op, chroma_op);
2652 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2653 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2654 int x_offset, int y_offset,
2655 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2656 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2657 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2658 int list0, int list1){
2659 MpegEncContext * const s = &h->s;
2661 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2662 dest_cb += x_offset + y_offset*s->uvlinesize;
2663 dest_cr += x_offset + y_offset*s->uvlinesize;
2664 x_offset += 8*s->mb_x;
2665 y_offset += 8*s->mb_y;
2668 /* don't optimize for luma-only case, since B-frames usually
2669 * use implicit weights => chroma too. */
2670 uint8_t *tmp_cb = s->obmc_scratchpad;
2671 uint8_t *tmp_cr = tmp_cb + 8*s->uvlinesize;
2672 uint8_t *tmp_y = tmp_cr + 8*s->uvlinesize;
2673 int refn0 = h->ref_cache[0][ scan8[n] ];
2674 int refn1 = h->ref_cache[1][ scan8[n] ];
2676 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2677 dest_y, dest_cb, dest_cr,
2678 x_offset, y_offset, qpix_put, chroma_put);
2679 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2680 tmp_y, tmp_cb, tmp_cr,
2681 x_offset, y_offset, qpix_put, chroma_put);
2683 if(h->use_weight == 2){
2684 int weight0 = h->implicit_weight[refn0][refn1];
2685 int weight1 = 64 - weight0;
2686 luma_weight_avg( dest_y, tmp_y, s-> linesize, 5, weight0, weight1, 0, 0);
2687 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0, 0);
2688 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0, 0);
2690 luma_weight_avg(dest_y, tmp_y, s->linesize, h->luma_log2_weight_denom,
2691 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2692 h->luma_offset[0][refn0], h->luma_offset[1][refn1]);
2693 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2694 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2695 h->chroma_offset[0][refn0][0], h->chroma_offset[1][refn1][0]);
2696 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2697 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2698 h->chroma_offset[0][refn0][1], h->chroma_offset[1][refn1][1]);
2701 int list = list1 ? 1 : 0;
2702 int refn = h->ref_cache[list][ scan8[n] ];
2703 Picture *ref= &h->ref_list[list][refn];
2704 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2705 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2706 qpix_put, chroma_put);
2708 luma_weight_op(dest_y, s->linesize, h->luma_log2_weight_denom,
2709 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2710 if(h->use_weight_chroma){
2711 chroma_weight_op(dest_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2712 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2713 chroma_weight_op(dest_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2714 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2719 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2720 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2721 int x_offset, int y_offset,
2722 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2723 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2724 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2725 int list0, int list1){
2726 if((h->use_weight==2 && list0 && list1
2727 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2728 || h->use_weight==1)
2729 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2730 x_offset, y_offset, qpix_put, chroma_put,
2731 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2733 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2734 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2737 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2738 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2739 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2740 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2741 MpegEncContext * const s = &h->s;
2742 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2743 const int mb_type= s->current_picture.mb_type[mb_xy];
2745 assert(IS_INTER(mb_type));
2747 if(IS_16X16(mb_type)){
2748 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2749 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2750 &weight_op[0], &weight_avg[0],
2751 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2752 }else if(IS_16X8(mb_type)){
2753 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2754 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2755 &weight_op[1], &weight_avg[1],
2756 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2757 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2758 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2759 &weight_op[1], &weight_avg[1],
2760 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2761 }else if(IS_8X16(mb_type)){
2762 mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
2763 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2764 &weight_op[2], &weight_avg[2],
2765 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2766 mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
2767 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2768 &weight_op[2], &weight_avg[2],
2769 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2773 assert(IS_8X8(mb_type));
2776 const int sub_mb_type= h->sub_mb_type[i];
2778 int x_offset= (i&1)<<2;
2779 int y_offset= (i&2)<<1;
2781 if(IS_SUB_8X8(sub_mb_type)){
2782 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2783 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2784 &weight_op[3], &weight_avg[3],
2785 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2786 }else if(IS_SUB_8X4(sub_mb_type)){
2787 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2788 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2789 &weight_op[4], &weight_avg[4],
2790 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2791 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2792 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2793 &weight_op[4], &weight_avg[4],
2794 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2795 }else if(IS_SUB_4X8(sub_mb_type)){
2796 mc_part(h, n , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2797 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2798 &weight_op[5], &weight_avg[5],
2799 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2800 mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2801 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2802 &weight_op[5], &weight_avg[5],
2803 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2806 assert(IS_SUB_4X4(sub_mb_type));
2808 int sub_x_offset= x_offset + 2*(j&1);
2809 int sub_y_offset= y_offset + (j&2);
2810 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2811 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2812 &weight_op[6], &weight_avg[6],
2813 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2820 static void decode_init_vlc(H264Context *h){
2821 static int done = 0;
2827 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
2828 &chroma_dc_coeff_token_len [0], 1, 1,
2829 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
2832 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
2833 &coeff_token_len [i][0], 1, 1,
2834 &coeff_token_bits[i][0], 1, 1, 1);
2838 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2839 &chroma_dc_total_zeros_len [i][0], 1, 1,
2840 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
2842 for(i=0; i<15; i++){
2843 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
2844 &total_zeros_len [i][0], 1, 1,
2845 &total_zeros_bits[i][0], 1, 1, 1);
2849 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2850 &run_len [i][0], 1, 1,
2851 &run_bits[i][0], 1, 1, 1);
2853 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2854 &run_len [6][0], 1, 1,
2855 &run_bits[6][0], 1, 1, 1);
2860 * Sets the intra prediction function pointers.
2862 static void init_pred_ptrs(H264Context *h){
2863 // MpegEncContext * const s = &h->s;
2865 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
2866 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
2867 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
2868 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2869 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2870 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
2871 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
2872 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
2873 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
2874 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
2875 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
2876 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
2878 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
2879 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
2880 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
2881 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
2882 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
2883 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
2884 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
2885 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
2886 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
2887 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
2888 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
2889 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
2891 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
2892 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
2893 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
2894 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
2895 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
2896 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
2897 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
2899 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
2900 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
2901 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
2902 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
2903 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
2904 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
2905 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
2908 static void free_tables(H264Context *h){
2909 av_freep(&h->intra4x4_pred_mode);
2910 av_freep(&h->chroma_pred_mode_table);
2911 av_freep(&h->cbp_table);
2912 av_freep(&h->mvd_table[0]);
2913 av_freep(&h->mvd_table[1]);
2914 av_freep(&h->direct_table);
2915 av_freep(&h->non_zero_count);
2916 av_freep(&h->slice_table_base);
2917 av_freep(&h->top_borders[1]);
2918 av_freep(&h->top_borders[0]);
2919 h->slice_table= NULL;
2921 av_freep(&h->mb2b_xy);
2922 av_freep(&h->mb2b8_xy);
2924 av_freep(&h->dequant4_coeff);
2925 av_freep(&h->dequant8_coeff);
2927 av_freep(&h->s.obmc_scratchpad);
2932 * needs width/height
2934 static int alloc_tables(H264Context *h){
2935 MpegEncContext * const s = &h->s;
2936 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2939 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2941 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2942 CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t))
2943 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2944 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2945 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2947 if( h->pps.cabac ) {
2948 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2949 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2950 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2951 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2954 memset(h->slice_table_base, -1, big_mb_num * sizeof(uint8_t));
2955 h->slice_table= h->slice_table_base + s->mb_stride + 1;
2957 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2958 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2959 for(y=0; y<s->mb_height; y++){
2960 for(x=0; x<s->mb_width; x++){
2961 const int mb_xy= x + y*s->mb_stride;
2962 const int b_xy = 4*x + 4*y*h->b_stride;
2963 const int b8_xy= 2*x + 2*y*h->b8_stride;
2965 h->mb2b_xy [mb_xy]= b_xy;
2966 h->mb2b8_xy[mb_xy]= b8_xy;
2970 CHECKED_ALLOCZ(h->dequant4_coeff, 52*16 * sizeof(uint16_t));
2971 CHECKED_ALLOCZ(h->dequant8_coeff, 52*64 * sizeof(uint16_t));
2972 memcpy(h->dequant4_coeff, dequant_coeff, 52*16 * sizeof(uint16_t));
2973 for(q=0; q<52; q++){
2974 int shift = div6[q];
2976 if(shift >= 2) // qp<12 are shifted during dequant
2979 h->dequant8_coeff[q][x] = dequant8_coeff_init[idx][
2980 dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] << shift;
2982 if(h->sps.transform_bypass){
2984 h->dequant4_coeff[0][x] = 1;
2986 h->dequant8_coeff[0][x] = 1<<2;
2989 s->obmc_scratchpad = NULL;
2997 static void common_init(H264Context *h){
2998 MpegEncContext * const s = &h->s;
3000 s->width = s->avctx->width;
3001 s->height = s->avctx->height;
3002 s->codec_id= s->avctx->codec->id;
3006 s->unrestricted_mv=1;
3007 s->decode=1; //FIXME
3010 static int decode_init(AVCodecContext *avctx){
3011 H264Context *h= avctx->priv_data;
3012 MpegEncContext * const s = &h->s;
3014 MPV_decode_defaults(s);
3019 s->out_format = FMT_H264;
3020 s->workaround_bugs= avctx->workaround_bugs;
3023 // s->decode_mb= ff_h263_decode_mb;
3025 avctx->pix_fmt= PIX_FMT_YUV420P;
3029 if(avctx->extradata_size > 0 && avctx->extradata &&
3030 *(char *)avctx->extradata == 1){
3040 static void frame_start(H264Context *h){
3041 MpegEncContext * const s = &h->s;
3044 MPV_frame_start(s, s->avctx);
3045 ff_er_frame_start(s);
3047 assert(s->linesize && s->uvlinesize);
3049 for(i=0; i<16; i++){
3050 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3051 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3054 h->block_offset[16+i]=
3055 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3056 h->block_offset[24+16+i]=
3057 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3060 /* can't be in alloc_tables because linesize isn't known there.
3061 * FIXME: redo bipred weight to not require extra buffer? */
3062 if(!s->obmc_scratchpad)
3063 s->obmc_scratchpad = av_malloc(16*s->linesize + 2*8*s->uvlinesize);
3065 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3068 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3069 MpegEncContext * const s = &h->s;
3073 src_cb -= uvlinesize;
3074 src_cr -= uvlinesize;
3076 // There are two lines saved, the line above the the top macroblock of a pair,
3077 // and the line above the bottom macroblock
3078 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3079 for(i=1; i<17; i++){
3080 h->left_border[i]= src_y[15+i* linesize];
3083 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3084 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3086 if(!(s->flags&CODEC_FLAG_GRAY)){
3087 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3088 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3090 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3091 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3093 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3094 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3098 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3099 MpegEncContext * const s = &h->s;
3102 int deblock_left = (s->mb_x > 0);
3103 int deblock_top = (s->mb_y > 0);
3105 src_y -= linesize + 1;
3106 src_cb -= uvlinesize + 1;
3107 src_cr -= uvlinesize + 1;
3109 #define XCHG(a,b,t,xchg)\
3116 for(i = !deblock_top; i<17; i++){
3117 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3122 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3123 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3124 if(s->mb_x+1 < s->mb_width){
3125 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3129 if(!(s->flags&CODEC_FLAG_GRAY)){
3131 for(i = !deblock_top; i<9; i++){
3132 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3133 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3137 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3138 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3143 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3144 MpegEncContext * const s = &h->s;
3147 src_y -= 2 * linesize;
3148 src_cb -= 2 * uvlinesize;
3149 src_cr -= 2 * uvlinesize;
3151 // There are two lines saved, the line above the the top macroblock of a pair,
3152 // and the line above the bottom macroblock
3153 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3154 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3155 for(i=2; i<34; i++){
3156 h->left_border[i]= src_y[15+i* linesize];
3159 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3160 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3161 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3162 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3164 if(!(s->flags&CODEC_FLAG_GRAY)){
3165 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3166 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3167 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3168 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3169 for(i=2; i<18; i++){
3170 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3171 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3173 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3174 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3175 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3176 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3180 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3181 MpegEncContext * const s = &h->s;
3184 int deblock_left = (s->mb_x > 0);
3185 int deblock_top = (s->mb_y > 0);
3187 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3189 src_y -= 2 * linesize + 1;
3190 src_cb -= 2 * uvlinesize + 1;
3191 src_cr -= 2 * uvlinesize + 1;
3193 #define XCHG(a,b,t,xchg)\
3200 for(i = (!deblock_top)<<1; i<34; i++){
3201 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3206 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3207 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3208 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3209 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3212 if(!(s->flags&CODEC_FLAG_GRAY)){
3214 for(i = (!deblock_top) << 1; i<18; i++){
3215 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3216 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3220 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3221 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3222 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3223 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3228 static void hl_decode_mb(H264Context *h){
3229 MpegEncContext * const s = &h->s;
3230 const int mb_x= s->mb_x;
3231 const int mb_y= s->mb_y;
3232 const int mb_xy= mb_x + mb_y*s->mb_stride;
3233 const int mb_type= s->current_picture.mb_type[mb_xy];
3234 uint8_t *dest_y, *dest_cb, *dest_cr;
3235 int linesize, uvlinesize /*dct_offset*/;
3237 int *block_offset = &h->block_offset[0];
3238 const unsigned int bottom = mb_y & 1;
3239 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3240 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3245 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3246 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3247 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3249 if (h->mb_field_decoding_flag) {
3250 linesize = s->linesize * 2;
3251 uvlinesize = s->uvlinesize * 2;
3252 block_offset = &h->block_offset[24];
3253 if(mb_y&1){ //FIXME move out of this func?
3254 dest_y -= s->linesize*15;
3255 dest_cb-= s->uvlinesize*7;
3256 dest_cr-= s->uvlinesize*7;
3259 linesize = s->linesize;
3260 uvlinesize = s->uvlinesize;
3261 // dct_offset = s->linesize * 16;
3264 idct_add = transform_bypass
3265 ? IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4
3266 : IS_8x8DCT(mb_type) ? s->dsp.h264_idct8_add : s->dsp.h264_idct_add;
3268 if (IS_INTRA_PCM(mb_type)) {
3271 // The pixels are stored in h->mb array in the same order as levels,
3272 // copy them in output in the correct order.
3273 for(i=0; i<16; i++) {
3274 for (y=0; y<4; y++) {
3275 for (x=0; x<4; x++) {
3276 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3280 for(i=16; i<16+4; i++) {
3281 for (y=0; y<4; y++) {
3282 for (x=0; x<4; x++) {
3283 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3287 for(i=20; i<20+4; i++) {
3288 for (y=0; y<4; y++) {
3289 for (x=0; x<4; x++) {
3290 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3295 if(IS_INTRA(mb_type)){
3296 if(h->deblocking_filter) {
3297 if (h->mb_aff_frame) {
3299 xchg_pair_border(h, dest_y, dest_cb, dest_cr, s->linesize, s->uvlinesize, 1);
3301 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3305 if(!(s->flags&CODEC_FLAG_GRAY)){
3306 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3307 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3310 if(IS_INTRA4x4(mb_type)){
3312 if(IS_8x8DCT(mb_type)){
3313 for(i=0; i<16; i+=4){
3314 uint8_t * const ptr= dest_y + block_offset[i];
3315 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3316 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3317 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3318 if(h->non_zero_count_cache[ scan8[i] ])
3319 idct_add(ptr, h->mb + i*16, linesize);
3322 for(i=0; i<16; i++){
3323 uint8_t * const ptr= dest_y + block_offset[i];
3325 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3328 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3329 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3330 assert(mb_y || linesize <= block_offset[i]);
3331 if(!topright_avail){
3332 tr= ptr[3 - linesize]*0x01010101;
3333 topright= (uint8_t*) &tr;
3335 topright= ptr + 4 - linesize;
3339 h->pred4x4[ dir ](ptr, topright, linesize);
3340 if(h->non_zero_count_cache[ scan8[i] ]){
3341 if(s->codec_id == CODEC_ID_H264)
3342 idct_add(ptr, h->mb + i*16, linesize);
3344 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3349 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3350 if(s->codec_id == CODEC_ID_H264){
3351 if(!transform_bypass)
3352 h264_luma_dc_dequant_idct_c(h->mb, s->qscale);
3354 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3356 if(h->deblocking_filter) {
3357 if (h->mb_aff_frame) {
3359 uint8_t *pair_dest_y = s->current_picture.data[0] + ((mb_y-1) * 16* s->linesize ) + mb_x * 16;
3360 uint8_t *pair_dest_cb = s->current_picture.data[1] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3361 uint8_t *pair_dest_cr = s->current_picture.data[2] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3363 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3367 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3370 }else if(s->codec_id == CODEC_ID_H264){
3371 hl_motion(h, dest_y, dest_cb, dest_cr,
3372 s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
3373 s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
3374 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3378 if(!IS_INTRA4x4(mb_type)){
3379 if(s->codec_id == CODEC_ID_H264){
3380 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3381 for(i=0; i<16; i+=di){
3382 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3383 uint8_t * const ptr= dest_y + block_offset[i];
3384 idct_add(ptr, h->mb + i*16, linesize);
3388 for(i=0; i<16; i++){
3389 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3390 uint8_t * const ptr= dest_y + block_offset[i];
3391 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3397 if(!(s->flags&CODEC_FLAG_GRAY)){
3398 idct_add = transform_bypass ? s->dsp.add_pixels4 : s->dsp.h264_idct_add;
3399 if(!transform_bypass){
3400 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
3401 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
3403 if(s->codec_id == CODEC_ID_H264){
3404 for(i=16; i<16+4; i++){
3405 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3406 uint8_t * const ptr= dest_cb + block_offset[i];
3407 idct_add(ptr, h->mb + i*16, uvlinesize);
3410 for(i=20; i<20+4; i++){
3411 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3412 uint8_t * const ptr= dest_cr + block_offset[i];
3413 idct_add(ptr, h->mb + i*16, uvlinesize);
3417 for(i=16; i<16+4; i++){
3418 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3419 uint8_t * const ptr= dest_cb + block_offset[i];
3420 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3423 for(i=20; i<20+4; i++){
3424 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3425 uint8_t * const ptr= dest_cr + block_offset[i];
3426 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3432 if(h->deblocking_filter) {
3433 if (h->mb_aff_frame) {
3434 const int mb_y = s->mb_y - 1;
3435 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3436 const int mb_xy= mb_x + mb_y*s->mb_stride;
3437 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3438 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3439 uint8_t tmp = s->current_picture.data[1][384];
3440 if (!bottom) return;
3441 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3442 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3443 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3445 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3446 // TODO deblock a pair
3449 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3450 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3451 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3452 if (tmp != s->current_picture.data[1][384]) {
3453 tprintf("modified pixel 8,1 (1)\n");
3457 tprintf("call mbaff filter_mb\n");
3458 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3459 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3460 if (tmp != s->current_picture.data[1][384]) {
3461 tprintf("modified pixel 8,1 (2)\n");
3464 tprintf("call filter_mb\n");
3465 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3466 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3467 filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3473 * fills the default_ref_list.
3475 static int fill_default_ref_list(H264Context *h){
3476 MpegEncContext * const s = &h->s;
3478 int smallest_poc_greater_than_current = -1;
3479 Picture sorted_short_ref[32];
3481 if(h->slice_type==B_TYPE){
3485 /* sort frame according to poc in B slice */
3486 for(out_i=0; out_i<h->short_ref_count; out_i++){
3488 int best_poc=INT_MAX;
3490 for(i=0; i<h->short_ref_count; i++){
3491 const int poc= h->short_ref[i]->poc;
3492 if(poc > limit && poc < best_poc){
3498 assert(best_i != INT_MIN);
3501 sorted_short_ref[out_i]= *h->short_ref[best_i];
3502 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3503 if (-1 == smallest_poc_greater_than_current) {
3504 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3505 smallest_poc_greater_than_current = out_i;
3511 if(s->picture_structure == PICT_FRAME){
3512 if(h->slice_type==B_TYPE){
3514 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3516 // find the largest poc
3517 for(list=0; list<2; list++){
3520 int step= list ? -1 : 1;
3522 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3523 while(j<0 || j>= h->short_ref_count){
3524 if(j != -99 && step == (list ? -1 : 1))
3527 j= smallest_poc_greater_than_current + (step>>1);
3529 if(sorted_short_ref[j].reference != 3) continue;
3530 h->default_ref_list[list][index ]= sorted_short_ref[j];
3531 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3534 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3535 if(h->long_ref[i] == NULL) continue;
3536 if(h->long_ref[i]->reference != 3) continue;
3538 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3539 h->default_ref_list[ list ][index++].pic_id= i;;
3542 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3543 // swap the two first elements of L1 when
3544 // L0 and L1 are identical
3545 Picture temp= h->default_ref_list[1][0];
3546 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3547 h->default_ref_list[1][1] = temp;
3550 if(index < h->ref_count[ list ])
3551 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3555 for(i=0; i<h->short_ref_count; i++){
3556 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3557 h->default_ref_list[0][index ]= *h->short_ref[i];
3558 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3560 for(i = 0; i < 16; i++){
3561 if(h->long_ref[i] == NULL) continue;
3562 if(h->long_ref[i]->reference != 3) continue;
3563 h->default_ref_list[0][index ]= *h->long_ref[i];
3564 h->default_ref_list[0][index++].pic_id= i;;
3566 if(index < h->ref_count[0])
3567 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3570 if(h->slice_type==B_TYPE){
3572 //FIXME second field balh
3576 for (i=0; i<h->ref_count[0]; i++) {
3577 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3579 if(h->slice_type==B_TYPE){
3580 for (i=0; i<h->ref_count[1]; i++) {
3581 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3588 static void print_short_term(H264Context *h);
3589 static void print_long_term(H264Context *h);
3591 static int decode_ref_pic_list_reordering(H264Context *h){
3592 MpegEncContext * const s = &h->s;
3595 print_short_term(h);
3597 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3599 for(list=0; list<2; list++){
3600 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3602 if(get_bits1(&s->gb)){
3603 int pred= h->curr_pic_num;
3605 for(index=0; ; index++){
3606 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3609 Picture *ref = NULL;
3611 if(reordering_of_pic_nums_idc==3)
3614 if(index >= h->ref_count[list]){
3615 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3619 if(reordering_of_pic_nums_idc<3){
3620 if(reordering_of_pic_nums_idc<2){
3621 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3623 if(abs_diff_pic_num >= h->max_pic_num){
3624 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3628 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3629 else pred+= abs_diff_pic_num;
3630 pred &= h->max_pic_num - 1;
3632 for(i= h->short_ref_count-1; i>=0; i--){
3633 ref = h->short_ref[i];
3634 assert(ref->reference == 3);
3635 assert(!ref->long_ref);
3636 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3640 ref->pic_id= ref->frame_num;
3642 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3643 ref = h->long_ref[pic_id];
3644 ref->pic_id= pic_id;
3645 assert(ref->reference == 3);
3646 assert(ref->long_ref);
3651 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3652 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3654 for(i=index; i+1<h->ref_count[list]; i++){
3655 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3658 for(; i > index; i--){
3659 h->ref_list[list][i]= h->ref_list[list][i-1];
3661 h->ref_list[list][index]= *ref;
3664 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3670 if(h->slice_type!=B_TYPE) break;
3672 for(list=0; list<2; list++){
3673 for(index= 0; index < h->ref_count[list]; index++){
3674 if(!h->ref_list[list][index].data[0])
3675 h->ref_list[list][index]= s->current_picture;
3677 if(h->slice_type!=B_TYPE) break;
3680 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3681 direct_dist_scale_factor(h);
3682 direct_ref_list_init(h);
3686 static int pred_weight_table(H264Context *h){
3687 MpegEncContext * const s = &h->s;
3689 int luma_def, chroma_def;
3692 h->use_weight_chroma= 0;
3693 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3694 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3695 luma_def = 1<<h->luma_log2_weight_denom;
3696 chroma_def = 1<<h->chroma_log2_weight_denom;
3698 for(list=0; list<2; list++){
3699 for(i=0; i<h->ref_count[list]; i++){
3700 int luma_weight_flag, chroma_weight_flag;
3702 luma_weight_flag= get_bits1(&s->gb);
3703 if(luma_weight_flag){
3704 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3705 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3706 if( h->luma_weight[list][i] != luma_def
3707 || h->luma_offset[list][i] != 0)
3710 h->luma_weight[list][i]= luma_def;
3711 h->luma_offset[list][i]= 0;
3714 chroma_weight_flag= get_bits1(&s->gb);
3715 if(chroma_weight_flag){
3718 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3719 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3720 if( h->chroma_weight[list][i][j] != chroma_def
3721 || h->chroma_offset[list][i][j] != 0)
3722 h->use_weight_chroma= 1;
3727 h->chroma_weight[list][i][j]= chroma_def;
3728 h->chroma_offset[list][i][j]= 0;
3732 if(h->slice_type != B_TYPE) break;
3734 h->use_weight= h->use_weight || h->use_weight_chroma;
3738 static void implicit_weight_table(H264Context *h){
3739 MpegEncContext * const s = &h->s;
3741 int cur_poc = s->current_picture_ptr->poc;
3743 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3744 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3746 h->use_weight_chroma= 0;
3751 h->use_weight_chroma= 2;
3752 h->luma_log2_weight_denom= 5;
3753 h->chroma_log2_weight_denom= 5;
3756 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3757 int poc0 = h->ref_list[0][ref0].poc;
3758 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3759 int poc1 = h->ref_list[1][ref1].poc;
3760 int td = clip(poc1 - poc0, -128, 127);
3762 int tb = clip(cur_poc - poc0, -128, 127);
3763 int tx = (16384 + (ABS(td) >> 1)) / td;
3764 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3765 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3766 h->implicit_weight[ref0][ref1] = 32;
3768 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3770 h->implicit_weight[ref0][ref1] = 32;
3775 static inline void unreference_pic(H264Context *h, Picture *pic){
3778 if(pic == h->delayed_output_pic)
3781 for(i = 0; h->delayed_pic[i]; i++)
3782 if(pic == h->delayed_pic[i]){
3790 * instantaneous decoder refresh.
3792 static void idr(H264Context *h){
3795 for(i=0; i<16; i++){
3796 if (h->long_ref[i] != NULL) {
3797 unreference_pic(h, h->long_ref[i]);
3798 h->long_ref[i]= NULL;
3801 h->long_ref_count=0;
3803 for(i=0; i<h->short_ref_count; i++){
3804 unreference_pic(h, h->short_ref[i]);
3805 h->short_ref[i]= NULL;
3807 h->short_ref_count=0;
3810 /* forget old pics after a seek */
3811 static void flush_dpb(AVCodecContext *avctx){
3812 H264Context *h= avctx->priv_data;
3815 h->delayed_pic[i]= NULL;
3816 h->delayed_output_pic= NULL;
3818 if(h->s.current_picture_ptr)
3819 h->s.current_picture_ptr->reference= 0;
3824 * @return the removed picture or NULL if an error occurs
3826 static Picture * remove_short(H264Context *h, int frame_num){
3827 MpegEncContext * const s = &h->s;
3830 if(s->avctx->debug&FF_DEBUG_MMCO)
3831 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3833 for(i=0; i<h->short_ref_count; i++){
3834 Picture *pic= h->short_ref[i];
3835 if(s->avctx->debug&FF_DEBUG_MMCO)
3836 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3837 if(pic->frame_num == frame_num){
3838 h->short_ref[i]= NULL;
3839 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
3840 h->short_ref_count--;
3849 * @return the removed picture or NULL if an error occurs
3851 static Picture * remove_long(H264Context *h, int i){
3854 pic= h->long_ref[i];
3855 h->long_ref[i]= NULL;
3856 if(pic) h->long_ref_count--;
3862 * print short term list
3864 static void print_short_term(H264Context *h) {
3866 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3867 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3868 for(i=0; i<h->short_ref_count; i++){
3869 Picture *pic= h->short_ref[i];
3870 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3876 * print long term list
3878 static void print_long_term(H264Context *h) {
3880 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3881 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3882 for(i = 0; i < 16; i++){
3883 Picture *pic= h->long_ref[i];
3885 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3892 * Executes the reference picture marking (memory management control operations).
3894 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3895 MpegEncContext * const s = &h->s;
3897 int current_is_long=0;
3900 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3901 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3903 for(i=0; i<mmco_count; i++){
3904 if(s->avctx->debug&FF_DEBUG_MMCO)
3905 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
3907 switch(mmco[i].opcode){
3908 case MMCO_SHORT2UNUSED:
3909 pic= remove_short(h, mmco[i].short_frame_num);
3911 unreference_pic(h, pic);
3912 else if(s->avctx->debug&FF_DEBUG_MMCO)
3913 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
3915 case MMCO_SHORT2LONG:
3916 pic= remove_long(h, mmco[i].long_index);
3917 if(pic) unreference_pic(h, pic);
3919 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
3920 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3921 h->long_ref_count++;
3923 case MMCO_LONG2UNUSED:
3924 pic= remove_long(h, mmco[i].long_index);
3926 unreference_pic(h, pic);
3927 else if(s->avctx->debug&FF_DEBUG_MMCO)
3928 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
3931 pic= remove_long(h, mmco[i].long_index);
3932 if(pic) unreference_pic(h, pic);
3934 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
3935 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3936 h->long_ref_count++;
3940 case MMCO_SET_MAX_LONG:
3941 assert(mmco[i].long_index <= 16);
3942 // just remove the long term which index is greater than new max
3943 for(j = mmco[i].long_index; j<16; j++){
3944 pic = remove_long(h, j);
3945 if (pic) unreference_pic(h, pic);
3949 while(h->short_ref_count){
3950 pic= remove_short(h, h->short_ref[0]->frame_num);
3951 unreference_pic(h, pic);
3953 for(j = 0; j < 16; j++) {
3954 pic= remove_long(h, j);
3955 if(pic) unreference_pic(h, pic);
3962 if(!current_is_long){
3963 pic= remove_short(h, s->current_picture_ptr->frame_num);
3965 unreference_pic(h, pic);
3966 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3969 if(h->short_ref_count)
3970 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3972 h->short_ref[0]= s->current_picture_ptr;
3973 h->short_ref[0]->long_ref=0;
3974 h->short_ref_count++;
3977 print_short_term(h);
3982 static int decode_ref_pic_marking(H264Context *h){
3983 MpegEncContext * const s = &h->s;
3986 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3987 s->broken_link= get_bits1(&s->gb) -1;
3988 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
3989 if(h->mmco[0].long_index == -1)
3992 h->mmco[0].opcode= MMCO_LONG;
3996 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
3997 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3998 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4000 h->mmco[i].opcode= opcode;
4001 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4002 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4003 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4004 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4008 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4009 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4010 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4011 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4016 if(opcode > MMCO_LONG){
4017 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4020 if(opcode == MMCO_END)
4025 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4027 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4028 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4029 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4039 static int init_poc(H264Context *h){
4040 MpegEncContext * const s = &h->s;
4041 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4044 if(h->nal_unit_type == NAL_IDR_SLICE){
4045 h->frame_num_offset= 0;
4047 if(h->frame_num < h->prev_frame_num)
4048 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4050 h->frame_num_offset= h->prev_frame_num_offset;
4053 if(h->sps.poc_type==0){
4054 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4056 if(h->nal_unit_type == NAL_IDR_SLICE){
4061 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4062 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4063 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4064 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4066 h->poc_msb = h->prev_poc_msb;
4067 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4069 field_poc[1] = h->poc_msb + h->poc_lsb;
4070 if(s->picture_structure == PICT_FRAME)
4071 field_poc[1] += h->delta_poc_bottom;
4072 }else if(h->sps.poc_type==1){
4073 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4076 if(h->sps.poc_cycle_length != 0)
4077 abs_frame_num = h->frame_num_offset + h->frame_num;
4081 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4084 expected_delta_per_poc_cycle = 0;
4085 for(i=0; i < h->sps.poc_cycle_length; i++)
4086 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4088 if(abs_frame_num > 0){
4089 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4090 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4092 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4093 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4094 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4098 if(h->nal_ref_idc == 0)
4099 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4101 field_poc[0] = expectedpoc + h->delta_poc[0];
4102 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4104 if(s->picture_structure == PICT_FRAME)
4105 field_poc[1] += h->delta_poc[1];
4108 if(h->nal_unit_type == NAL_IDR_SLICE){
4111 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4112 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4118 if(s->picture_structure != PICT_BOTTOM_FIELD)
4119 s->current_picture_ptr->field_poc[0]= field_poc[0];
4120 if(s->picture_structure != PICT_TOP_FIELD)
4121 s->current_picture_ptr->field_poc[1]= field_poc[1];
4122 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4123 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4129 * decodes a slice header.
4130 * this will allso call MPV_common_init() and frame_start() as needed
4132 static int decode_slice_header(H264Context *h){
4133 MpegEncContext * const s = &h->s;
4134 int first_mb_in_slice, pps_id;
4135 int num_ref_idx_active_override_flag;
4136 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4138 int default_ref_list_done = 0;
4140 s->current_picture.reference= h->nal_ref_idc != 0;
4141 s->dropable= h->nal_ref_idc == 0;
4143 first_mb_in_slice= get_ue_golomb(&s->gb);
4145 slice_type= get_ue_golomb(&s->gb);
4147 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4152 h->slice_type_fixed=1;
4154 h->slice_type_fixed=0;
4156 slice_type= slice_type_map[ slice_type ];
4157 if (slice_type == I_TYPE
4158 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4159 default_ref_list_done = 1;
4161 h->slice_type= slice_type;
4163 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4165 pps_id= get_ue_golomb(&s->gb);
4167 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4170 h->pps= h->pps_buffer[pps_id];
4171 if(h->pps.slice_group_count == 0){
4172 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4176 h->sps= h->sps_buffer[ h->pps.sps_id ];
4177 if(h->sps.log2_max_frame_num == 0){
4178 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4182 s->mb_width= h->sps.mb_width;
4183 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4185 h->b_stride= s->mb_width*4 + 1;
4186 h->b8_stride= s->mb_width*2 + 1;
4188 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4189 if(h->sps.frame_mbs_only_flag)
4190 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4192 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4194 if (s->context_initialized
4195 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4199 if (!s->context_initialized) {
4200 if (MPV_common_init(s) < 0)
4203 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4204 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4205 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4208 for(i=0; i<16; i++){
4209 #define T(x) (x>>2) | ((x<<2) & 0xF)
4210 h->zigzag_scan[i] = T(zigzag_scan[i]);
4211 h-> field_scan[i] = T( field_scan[i]);
4214 if(h->sps.transform_bypass){ //FIXME same ugly
4215 h->zigzag_scan_q0 = zigzag_scan;
4216 h->field_scan_q0 = field_scan;
4218 h->zigzag_scan_q0 = h->zigzag_scan;
4219 h->field_scan_q0 = h->field_scan;
4224 s->avctx->width = s->width;
4225 s->avctx->height = s->height;
4226 s->avctx->sample_aspect_ratio= h->sps.sar;
4227 if(!s->avctx->sample_aspect_ratio.den)
4228 s->avctx->sample_aspect_ratio.den = 1;
4230 if(h->sps.timing_info_present_flag){
4231 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
4235 if(h->slice_num == 0){
4239 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4240 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4242 h->mb_aff_frame = 0;
4243 if(h->sps.frame_mbs_only_flag){
4244 s->picture_structure= PICT_FRAME;
4246 if(get_bits1(&s->gb)) { //field_pic_flag
4247 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4249 s->picture_structure= PICT_FRAME;
4250 first_mb_in_slice <<= h->sps.mb_aff;
4251 h->mb_aff_frame = h->sps.mb_aff;
4255 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4256 s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width;
4257 if(s->mb_y >= s->mb_height){
4261 if(s->picture_structure==PICT_FRAME){
4262 h->curr_pic_num= h->frame_num;
4263 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4265 h->curr_pic_num= 2*h->frame_num;
4266 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4269 if(h->nal_unit_type == NAL_IDR_SLICE){
4270 get_ue_golomb(&s->gb); /* idr_pic_id */
4273 if(h->sps.poc_type==0){
4274 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4276 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4277 h->delta_poc_bottom= get_se_golomb(&s->gb);
4281 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4282 h->delta_poc[0]= get_se_golomb(&s->gb);
4284 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4285 h->delta_poc[1]= get_se_golomb(&s->gb);
4290 if(h->pps.redundant_pic_cnt_present){
4291 h->redundant_pic_count= get_ue_golomb(&s->gb);
4294 //set defaults, might be overriden a few line later
4295 h->ref_count[0]= h->pps.ref_count[0];
4296 h->ref_count[1]= h->pps.ref_count[1];
4298 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4299 if(h->slice_type == B_TYPE){
4300 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4302 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4304 if(num_ref_idx_active_override_flag){
4305 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4306 if(h->slice_type==B_TYPE)
4307 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4309 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4310 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4316 if(!default_ref_list_done){
4317 fill_default_ref_list(h);
4320 if(decode_ref_pic_list_reordering(h) < 0)
4323 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4324 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4325 pred_weight_table(h);
4326 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4327 implicit_weight_table(h);
4331 if(s->current_picture.reference)
4332 decode_ref_pic_marking(h);
4334 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4335 h->cabac_init_idc = get_ue_golomb(&s->gb);
4337 h->last_qscale_diff = 0;
4338 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4339 if(s->qscale<0 || s->qscale>51){
4340 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4343 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4344 //FIXME qscale / qp ... stuff
4345 if(h->slice_type == SP_TYPE){
4346 get_bits1(&s->gb); /* sp_for_switch_flag */
4348 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4349 get_se_golomb(&s->gb); /* slice_qs_delta */
4352 h->deblocking_filter = 1;
4353 h->slice_alpha_c0_offset = 0;
4354 h->slice_beta_offset = 0;
4355 if( h->pps.deblocking_filter_parameters_present ) {
4356 h->deblocking_filter= get_ue_golomb(&s->gb);
4357 if(h->deblocking_filter < 2)
4358 h->deblocking_filter^= 1; // 1<->0
4360 if( h->deblocking_filter ) {
4361 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4362 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4365 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4366 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4367 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4368 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4369 h->deblocking_filter= 0;
4372 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4373 slice_group_change_cycle= get_bits(&s->gb, ?);
4378 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4379 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4381 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4383 av_get_pict_type_char(h->slice_type),
4384 pps_id, h->frame_num,
4385 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4386 h->ref_count[0], h->ref_count[1],
4388 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4390 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4400 static inline int get_level_prefix(GetBitContext *gb){
4404 OPEN_READER(re, gb);
4405 UPDATE_CACHE(re, gb);
4406 buf=GET_CACHE(re, gb);
4408 log= 32 - av_log2(buf);
4410 print_bin(buf>>(32-log), log);
4411 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4414 LAST_SKIP_BITS(re, gb, log);
4415 CLOSE_READER(re, gb);
4420 static inline int get_dct8x8_allowed(H264Context *h){
4423 if(!IS_SUB_8X8(h->sub_mb_type[i])
4424 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4431 * decodes a residual block.
4432 * @param n block index
4433 * @param scantable scantable
4434 * @param max_coeff number of coefficients in the block
4435 * @return <0 if an error occured
4437 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff){
4438 MpegEncContext * const s = &h->s;
4439 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4441 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4443 //FIXME put trailing_onex into the context
4445 if(n == CHROMA_DC_BLOCK_INDEX){
4446 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4447 total_coeff= coeff_token>>2;
4449 if(n == LUMA_DC_BLOCK_INDEX){
4450 total_coeff= pred_non_zero_count(h, 0);
4451 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4452 total_coeff= coeff_token>>2;
4454 total_coeff= pred_non_zero_count(h, n);
4455 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4456 total_coeff= coeff_token>>2;
4457 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4461 //FIXME set last_non_zero?
4466 trailing_ones= coeff_token&3;
4467 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4468 assert(total_coeff<=16);
4470 for(i=0; i<trailing_ones; i++){
4471 level[i]= 1 - 2*get_bits1(gb);
4475 int level_code, mask;
4476 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4477 int prefix= get_level_prefix(gb);
4479 //first coefficient has suffix_length equal to 0 or 1
4480 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4482 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4484 level_code= (prefix<<suffix_length); //part
4485 }else if(prefix==14){
4487 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4489 level_code= prefix + get_bits(gb, 4); //part
4490 }else if(prefix==15){
4491 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4492 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4494 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4498 if(trailing_ones < 3) level_code += 2;
4503 mask= -(level_code&1);
4504 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4507 //remaining coefficients have suffix_length > 0
4508 for(;i<total_coeff;i++) {
4509 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4510 prefix = get_level_prefix(gb);
4512 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4513 }else if(prefix==15){
4514 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4516 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4519 mask= -(level_code&1);
4520 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4521 if(level_code > suffix_limit[suffix_length])
4526 if(total_coeff == max_coeff)
4529 if(n == CHROMA_DC_BLOCK_INDEX)
4530 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4532 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4535 coeff_num = zeros_left + total_coeff - 1;
4536 j = scantable[coeff_num];
4538 block[j] = level[0];
4539 for(i=1;i<total_coeff;i++) {
4542 else if(zeros_left < 7){
4543 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4545 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4547 zeros_left -= run_before;
4548 coeff_num -= 1 + run_before;
4549 j= scantable[ coeff_num ];
4554 block[j] = level[0] * qmul[j];
4555 for(i=1;i<total_coeff;i++) {
4558 else if(zeros_left < 7){
4559 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4561 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4563 zeros_left -= run_before;
4564 coeff_num -= 1 + run_before;
4565 j= scantable[ coeff_num ];
4567 block[j]= level[i] * qmul[j];
4568 // printf("%d %d ", block[j], qmul[j]);
4573 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4581 * decodes a P_SKIP or B_SKIP macroblock
4583 static void decode_mb_skip(H264Context *h){
4584 MpegEncContext * const s = &h->s;
4585 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4588 memset(h->non_zero_count[mb_xy], 0, 16);
4589 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4591 if(h->mb_aff_frame && s->mb_skip_run==0 && (s->mb_y&1)==0){
4592 h->mb_field_decoding_flag= get_bits1(&s->gb);
4594 if(h->mb_field_decoding_flag)
4595 mb_type|= MB_TYPE_INTERLACED;
4597 if( h->slice_type == B_TYPE )
4599 // just for fill_caches. pred_direct_motion will set the real mb_type
4600 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4602 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4603 pred_direct_motion(h, &mb_type);
4605 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4606 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
4612 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4614 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4615 pred_pskip_motion(h, &mx, &my);
4616 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4617 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4619 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4622 write_back_motion(h, mb_type);
4623 s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP;
4624 s->current_picture.qscale_table[mb_xy]= s->qscale;
4625 h->slice_table[ mb_xy ]= h->slice_num;
4626 h->prev_mb_skipped= 1;
4630 * decodes a macroblock
4631 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4633 static int decode_mb_cavlc(H264Context *h){
4634 MpegEncContext * const s = &h->s;
4635 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4636 int mb_type, partition_count, cbp;
4637 int dct8x8_allowed= h->pps.transform_8x8_mode;
4639 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4641 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4642 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4644 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4645 if(s->mb_skip_run==-1)
4646 s->mb_skip_run= get_ue_golomb(&s->gb);
4648 if (s->mb_skip_run--) {
4653 if(h->mb_aff_frame){
4654 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
4655 h->mb_field_decoding_flag = get_bits1(&s->gb);
4657 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4659 h->prev_mb_skipped= 0;
4661 mb_type= get_ue_golomb(&s->gb);
4662 if(h->slice_type == B_TYPE){
4664 partition_count= b_mb_type_info[mb_type].partition_count;
4665 mb_type= b_mb_type_info[mb_type].type;
4668 goto decode_intra_mb;
4670 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4672 partition_count= p_mb_type_info[mb_type].partition_count;
4673 mb_type= p_mb_type_info[mb_type].type;
4676 goto decode_intra_mb;
4679 assert(h->slice_type == I_TYPE);
4682 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4686 cbp= i_mb_type_info[mb_type].cbp;
4687 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4688 mb_type= i_mb_type_info[mb_type].type;
4691 if(h->mb_field_decoding_flag)
4692 mb_type |= MB_TYPE_INTERLACED;
4694 h->slice_table[ mb_xy ]= h->slice_num;
4696 if(IS_INTRA_PCM(mb_type)){
4699 // we assume these blocks are very rare so we dont optimize it
4700 align_get_bits(&s->gb);
4702 // The pixels are stored in the same order as levels in h->mb array.
4703 for(y=0; y<16; y++){
4704 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4705 for(x=0; x<16; x++){
4706 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4707 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4711 const int index= 256 + 4*(y&3) + 32*(y>>2);
4713 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4714 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4718 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4720 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4721 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4725 // In deblocking, the quantizer is 0
4726 s->current_picture.qscale_table[mb_xy]= 0;
4727 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
4728 // All coeffs are present
4729 memset(h->non_zero_count[mb_xy], 16, 16);
4731 s->current_picture.mb_type[mb_xy]= mb_type;
4735 fill_caches(h, mb_type, 0);
4738 if(IS_INTRA(mb_type)){
4739 // init_top_left_availability(h);
4740 if(IS_INTRA4x4(mb_type)){
4743 if(dct8x8_allowed && get_bits1(&s->gb)){
4744 mb_type |= MB_TYPE_8x8DCT;
4748 // fill_intra4x4_pred_table(h);
4749 for(i=0; i<16; i+=di){
4750 const int mode_coded= !get_bits1(&s->gb);
4751 const int predicted_mode= pred_intra_mode(h, i);
4755 const int rem_mode= get_bits(&s->gb, 3);
4756 if(rem_mode<predicted_mode)
4761 mode= predicted_mode;
4765 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4767 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4769 write_back_intra_pred_mode(h);
4770 if( check_intra4x4_pred_mode(h) < 0)
4773 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4774 if(h->intra16x16_pred_mode < 0)
4777 h->chroma_pred_mode= get_ue_golomb(&s->gb);
4779 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
4780 if(h->chroma_pred_mode < 0)
4782 }else if(partition_count==4){
4783 int i, j, sub_partition_count[4], list, ref[2][4];
4785 if(h->slice_type == B_TYPE){
4787 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4788 if(h->sub_mb_type[i] >=13){
4789 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4792 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4793 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4795 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4796 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3]))
4797 pred_direct_motion(h, &mb_type);
4799 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4801 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4802 if(h->sub_mb_type[i] >=4){
4803 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4806 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4807 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4811 for(list=0; list<2; list++){
4812 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4813 if(ref_count == 0) continue;
4814 if (h->mb_aff_frame && h->mb_field_decoding_flag) {
4818 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4819 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4820 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4829 dct8x8_allowed = get_dct8x8_allowed(h);
4831 for(list=0; list<2; list++){
4832 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4833 if(ref_count == 0) continue;
4836 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4837 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4838 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4840 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4841 const int sub_mb_type= h->sub_mb_type[i];
4842 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4843 for(j=0; j<sub_partition_count[i]; j++){
4845 const int index= 4*i + block_width*j;
4846 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4847 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4848 mx += get_se_golomb(&s->gb);
4849 my += get_se_golomb(&s->gb);
4850 tprintf("final mv:%d %d\n", mx, my);
4852 if(IS_SUB_8X8(sub_mb_type)){
4853 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
4854 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4855 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
4856 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4857 }else if(IS_SUB_8X4(sub_mb_type)){
4858 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
4859 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
4860 }else if(IS_SUB_4X8(sub_mb_type)){
4861 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
4862 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
4864 assert(IS_SUB_4X4(sub_mb_type));
4865 mv_cache[ 0 ][0]= mx;
4866 mv_cache[ 0 ][1]= my;
4870 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4876 }else if(IS_DIRECT(mb_type)){
4877 pred_direct_motion(h, &mb_type);
4878 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4880 int list, mx, my, i;
4881 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4882 if(IS_16X16(mb_type)){
4883 for(list=0; list<2; list++){
4884 if(h->ref_count[list]>0){
4885 if(IS_DIR(mb_type, 0, list)){
4886 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4887 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4889 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
4892 for(list=0; list<2; list++){
4893 if(IS_DIR(mb_type, 0, list)){
4894 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4895 mx += get_se_golomb(&s->gb);
4896 my += get_se_golomb(&s->gb);
4897 tprintf("final mv:%d %d\n", mx, my);
4899 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
4901 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
4904 else if(IS_16X8(mb_type)){
4905 for(list=0; list<2; list++){
4906 if(h->ref_count[list]>0){
4908 if(IS_DIR(mb_type, i, list)){
4909 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4910 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4912 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
4916 for(list=0; list<2; list++){
4918 if(IS_DIR(mb_type, i, list)){
4919 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4920 mx += get_se_golomb(&s->gb);
4921 my += get_se_golomb(&s->gb);
4922 tprintf("final mv:%d %d\n", mx, my);
4924 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
4926 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
4930 assert(IS_8X16(mb_type));
4931 for(list=0; list<2; list++){
4932 if(h->ref_count[list]>0){
4934 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4935 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4936 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4938 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
4942 for(list=0; list<2; list++){
4944 if(IS_DIR(mb_type, i, list)){
4945 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4946 mx += get_se_golomb(&s->gb);
4947 my += get_se_golomb(&s->gb);
4948 tprintf("final mv:%d %d\n", mx, my);
4950 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
4952 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
4958 if(IS_INTER(mb_type))
4959 write_back_motion(h, mb_type);
4961 if(!IS_INTRA16x16(mb_type)){
4962 cbp= get_ue_golomb(&s->gb);
4964 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
4968 if(IS_INTRA4x4(mb_type))
4969 cbp= golomb_to_intra4x4_cbp[cbp];
4971 cbp= golomb_to_inter_cbp[cbp];
4974 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4975 if(get_bits1(&s->gb))
4976 mb_type |= MB_TYPE_8x8DCT;
4978 s->current_picture.mb_type[mb_xy]= mb_type;
4980 if(cbp || IS_INTRA16x16(mb_type)){
4981 int i8x8, i4x4, chroma_idx;
4982 int chroma_qp, dquant;
4983 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4984 const uint8_t *scan, *dc_scan;
4986 // fill_non_zero_count_cache(h);
4988 if(IS_INTERLACED(mb_type)){
4989 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4990 dc_scan= luma_dc_field_scan;
4992 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4993 dc_scan= luma_dc_zigzag_scan;
4996 dquant= get_se_golomb(&s->gb);
4998 if( dquant > 25 || dquant < -26 ){
4999 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5003 s->qscale += dquant;
5004 if(((unsigned)s->qscale) > 51){
5005 if(s->qscale<0) s->qscale+= 52;
5006 else s->qscale-= 52;
5009 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5010 if(IS_INTRA16x16(mb_type)){
5011 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[s->qscale], 16) < 0){
5012 return -1; //FIXME continue if partitioned and other return -1 too
5015 assert((cbp&15) == 0 || (cbp&15) == 15);
5018 for(i8x8=0; i8x8<4; i8x8++){
5019 for(i4x4=0; i4x4<4; i4x4++){
5020 const int index= i4x4 + 4*i8x8;
5021 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[s->qscale], 15) < 0 ){
5027 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5030 for(i8x8=0; i8x8<4; i8x8++){
5031 if(cbp & (1<<i8x8)){
5032 if(IS_8x8DCT(mb_type)){
5033 DCTELEM *buf = &h->mb[64*i8x8];
5035 for(i4x4=0; i4x4<4; i4x4++){
5036 if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4,
5037 h->dequant8_coeff[s->qscale], 16) <0 )
5043 buf[i] = (buf[i] + 2) >> 2;
5045 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5046 nnz[0] |= nnz[1] | nnz[8] | nnz[9];
5048 for(i4x4=0; i4x4<4; i4x4++){
5049 const int index= i4x4 + 4*i8x8;
5051 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[s->qscale], 16) <0 ){
5057 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5058 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5064 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5065 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, h->dequant4_coeff[chroma_qp], 4) < 0){
5071 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5072 for(i4x4=0; i4x4<4; i4x4++){
5073 const int index= 16 + 4*chroma_idx + i4x4;
5074 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_qp], 15) < 0){
5080 uint8_t * const nnz= &h->non_zero_count_cache[0];
5081 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5082 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5085 uint8_t * const nnz= &h->non_zero_count_cache[0];
5086 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5087 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5088 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5090 s->current_picture.qscale_table[mb_xy]= s->qscale;
5091 write_back_non_zero_count(h);
5096 static int decode_cabac_field_decoding_flag(H264Context *h) {
5097 MpegEncContext * const s = &h->s;
5098 const int mb_x = s->mb_x;
5099 const int mb_y = s->mb_y & ~1;
5100 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5101 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5103 unsigned int ctx = 0;
5105 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5108 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5112 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5115 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5116 uint8_t *state= &h->cabac_state[ctx_base];
5120 MpegEncContext * const s = &h->s;
5121 const int mba_xy = h->left_mb_xy[0];
5122 const int mbb_xy = h->top_mb_xy;
5124 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5126 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5128 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5129 return 0; /* I4x4 */
5132 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5133 return 0; /* I4x4 */
5136 if( get_cabac_terminate( &h->cabac ) )
5137 return 25; /* PCM */
5139 mb_type = 1; /* I16x16 */
5140 if( get_cabac( &h->cabac, &state[1] ) )
5141 mb_type += 12; /* cbp_luma != 0 */
5143 if( get_cabac( &h->cabac, &state[2] ) ) {
5144 if( get_cabac( &h->cabac, &state[2+intra_slice] ) )
5145 mb_type += 4 * 2; /* cbp_chroma == 2 */
5147 mb_type += 4 * 1; /* cbp_chroma == 1 */
5149 if( get_cabac( &h->cabac, &state[3+intra_slice] ) )
5151 if( get_cabac( &h->cabac, &state[3+2*intra_slice] ) )
5156 static int decode_cabac_mb_type( H264Context *h ) {
5157 MpegEncContext * const s = &h->s;
5159 if( h->slice_type == I_TYPE ) {
5160 return decode_cabac_intra_mb_type(h, 3, 1);
5161 } else if( h->slice_type == P_TYPE ) {
5162 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5164 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5165 if( get_cabac( &h->cabac, &h->cabac_state[16] ) == 0 )
5166 return 0; /* P_L0_D16x16; */
5168 return 3; /* P_8x8; */
5170 if( get_cabac( &h->cabac, &h->cabac_state[17] ) == 0 )
5171 return 2; /* P_L0_D8x16; */
5173 return 1; /* P_L0_D16x8; */
5176 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5178 } else if( h->slice_type == B_TYPE ) {
5179 const int mba_xy = h->left_mb_xy[0];
5180 const int mbb_xy = h->top_mb_xy;
5184 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] )
5185 && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5187 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )
5188 && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5191 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5192 return 0; /* B_Direct_16x16 */
5194 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5195 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5198 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5199 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5200 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5201 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5203 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5204 else if( bits == 13 ) {
5205 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5206 } else if( bits == 14 )
5207 return 11; /* B_L1_L0_8x16 */
5208 else if( bits == 15 )
5209 return 22; /* B_8x8 */
5211 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5212 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5214 /* TODO SI/SP frames? */
5219 static int decode_cabac_mb_skip( H264Context *h) {
5220 MpegEncContext * const s = &h->s;
5221 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5222 const int mba_xy = mb_xy - 1;
5223 const int mbb_xy = mb_xy - s->mb_stride;
5226 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5228 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5231 if( h->slice_type == P_TYPE || h->slice_type == SP_TYPE)
5232 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5234 return get_cabac( &h->cabac, &h->cabac_state[24+ctx] );
5237 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5240 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5243 if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
5245 if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
5247 if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
5249 if( mode >= pred_mode )
5255 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5256 const int mba_xy = h->left_mb_xy[0];
5257 const int mbb_xy = h->top_mb_xy;
5261 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5262 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5265 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5268 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5271 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5273 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5279 static const uint8_t block_idx_x[16] = {
5280 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5282 static const uint8_t block_idx_y[16] = {
5283 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5285 static const uint8_t block_idx_xy[4][4] = {
5292 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5293 MpegEncContext * const s = &h->s;
5298 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5304 x = block_idx_x[4*i8x8];
5305 y = block_idx_y[4*i8x8];
5309 else if( s->mb_x > 0 && (h->slice_table[h->left_mb_xy[0]] == h->slice_num)) {
5310 cbp_a = h->left_cbp;
5311 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5316 else if( s->mb_y > 0 && (h->slice_table[h->top_mb_xy] == h->slice_num)) {
5318 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5321 /* No need to test for skip as we put 0 for skip block */
5322 /* No need to test for IPCM as we put 1 for IPCM block */
5324 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5325 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5330 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5331 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5335 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5341 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5345 cbp_a = (h->left_cbp>>4)&0x03;
5346 cbp_b = (h-> top_cbp>>4)&0x03;
5349 if( cbp_a > 0 ) ctx++;
5350 if( cbp_b > 0 ) ctx += 2;
5351 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5355 if( cbp_a == 2 ) ctx++;
5356 if( cbp_b == 2 ) ctx += 2;
5357 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5359 static int decode_cabac_mb_dqp( H264Context *h) {
5360 MpegEncContext * const s = &h->s;
5366 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5368 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5370 if( h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) )
5373 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5379 if(val > 52) //prevent infinite loop
5386 return -(val + 1)/2;
5388 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5389 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5391 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5393 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5397 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5399 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5400 return 0; /* B_Direct_8x8 */
5401 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5402 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5404 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5405 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5406 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5409 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5410 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5414 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5415 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5418 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5419 int refa = h->ref_cache[list][scan8[n] - 1];
5420 int refb = h->ref_cache[list][scan8[n] - 8];
5424 if( h->slice_type == B_TYPE) {
5425 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5427 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5436 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5446 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5447 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5448 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5449 int ctxbase = (l == 0) ? 40 : 47;
5454 else if( amvd > 32 )
5459 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5464 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5472 while( get_cabac_bypass( &h->cabac ) ) {
5477 if( get_cabac_bypass( &h->cabac ) )
5481 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
5485 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5490 nza = h->left_cbp&0x100;
5491 nzb = h-> top_cbp&0x100;
5492 } else if( cat == 1 || cat == 2 ) {
5493 nza = h->non_zero_count_cache[scan8[idx] - 1];
5494 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5495 } else if( cat == 3 ) {
5496 nza = (h->left_cbp>>(6+idx))&0x01;
5497 nzb = (h-> top_cbp>>(6+idx))&0x01;
5500 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5501 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5510 return ctx + 4 * cat;
5513 static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff) {
5514 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5515 static const int significant_coeff_flag_field_offset[2] = { 105, 277 };
5516 static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 };
5517 static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 };
5518 static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 };
5519 static const int coeff_abs_level_m1_offset[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 };
5520 static const int identity[15] = {
5521 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
5523 static const int significant_coeff_flag_offset_8x8[63] = {
5524 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5525 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5526 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5527 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
5529 static const int last_coeff_flag_offset_8x8[63] = {
5530 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5531 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5532 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5533 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5539 int coeff_count = 0;
5542 int abslevelgt1 = 0;
5544 const int* significant_coeff_ctx_offset;
5545 const int* last_coeff_ctx_offset;
5546 const int significant_coeff_ctx_base = significant_coeff_flag_offset[cat]
5547 + significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5548 const int last_coeff_ctx_base = last_significant_coeff_flag_offset[cat]
5549 + last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5551 /* cat: 0-> DC 16x16 n = 0
5552 * 1-> AC 16x16 n = luma4x4idx
5553 * 2-> Luma4x4 n = luma4x4idx
5554 * 3-> DC Chroma n = iCbCr
5555 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5556 * 5-> Luma8x8 n = 4 * luma8x8idx
5559 /* read coded block flag */
5561 significant_coeff_ctx_offset = significant_coeff_flag_offset_8x8;
5562 last_coeff_ctx_offset = last_coeff_flag_offset_8x8;
5564 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5565 if( cat == 1 || cat == 2 )
5566 h->non_zero_count_cache[scan8[n]] = 0;
5568 h->non_zero_count_cache[scan8[16+n]] = 0;
5573 significant_coeff_ctx_offset =
5574 last_coeff_ctx_offset = identity;
5577 for(last= 0; last < max_coeff - 1; last++) {
5578 int sig_ctx = significant_coeff_ctx_base + significant_coeff_ctx_offset[last];
5579 if( get_cabac( &h->cabac, &h->cabac_state[sig_ctx] )) {
5580 int last_ctx = last_coeff_ctx_base + last_coeff_ctx_offset[last];
5581 index[coeff_count++] = last;
5582 if( get_cabac( &h->cabac, &h->cabac_state[last_ctx] ) ) {
5588 if( last == max_coeff -1 ) {
5589 index[coeff_count++] = last;
5591 assert(coeff_count > 0);
5594 h->cbp_table[mb_xy] |= 0x100;
5595 else if( cat == 1 || cat == 2 )
5596 h->non_zero_count_cache[scan8[n]] = coeff_count;
5598 h->cbp_table[mb_xy] |= 0x40 << n;
5600 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5603 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, 1, 1);
5606 for( i = coeff_count - 1; i >= 0; i-- ) {
5607 int ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + coeff_abs_level_m1_offset[cat];
5608 int j= scantable[index[i]];
5610 if( get_cabac( &h->cabac, &h->cabac_state[ctx] ) == 0 ) {
5611 if( cat == 0 || cat == 3 ) {
5612 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
5615 if( get_cabac_bypass( &h->cabac ) ) block[j] = -qmul[j];
5616 else block[j] = qmul[j];
5622 ctx = 5 + FFMIN( 4, abslevelgt1 ) + coeff_abs_level_m1_offset[cat];
5623 while( coeff_abs < 15 && get_cabac( &h->cabac, &h->cabac_state[ctx] ) ) {
5627 if( coeff_abs >= 15 ) {
5629 while( get_cabac_bypass( &h->cabac ) ) {
5630 coeff_abs += 1 << j;
5635 if( get_cabac_bypass( &h->cabac ) )
5636 coeff_abs += 1 << j ;
5640 if( cat == 0 || cat == 3 ) {
5641 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
5642 else block[j] = coeff_abs;
5644 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs * qmul[j];
5645 else block[j] = coeff_abs * qmul[j];
5654 void inline compute_mb_neighboors(H264Context *h)
5656 MpegEncContext * const s = &h->s;
5657 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5658 h->top_mb_xy = mb_xy - s->mb_stride;
5659 h->left_mb_xy[0] = mb_xy - 1;
5660 if(h->mb_aff_frame){
5661 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5662 const int top_pair_xy = pair_xy - s->mb_stride;
5663 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5664 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5665 const int curr_mb_frame_flag = !h->mb_field_decoding_flag;
5666 const int bottom = (s->mb_y & 1);
5668 ? !curr_mb_frame_flag // bottom macroblock
5669 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5671 h->top_mb_xy -= s->mb_stride;
5673 if (left_mb_frame_flag != curr_mb_frame_flag) {
5674 h->left_mb_xy[0] = pair_xy - 1;
5681 * decodes a macroblock
5682 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5684 static int decode_mb_cabac(H264Context *h) {
5685 MpegEncContext * const s = &h->s;
5686 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5687 int mb_type, partition_count, cbp = 0;
5688 int dct8x8_allowed= h->pps.transform_8x8_mode;
5690 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5692 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5693 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5694 /* read skip flags */
5695 if( decode_cabac_mb_skip( h ) ) {
5698 h->cbp_table[mb_xy] = 0;
5699 h->chroma_pred_mode_table[mb_xy] = 0;
5700 h->last_qscale_diff = 0;
5706 if(h->mb_aff_frame){
5707 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
5708 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5710 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5712 h->prev_mb_skipped = 0;
5714 compute_mb_neighboors(h);
5715 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5716 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5720 if( h->slice_type == B_TYPE ) {
5722 partition_count= b_mb_type_info[mb_type].partition_count;
5723 mb_type= b_mb_type_info[mb_type].type;
5726 goto decode_intra_mb;
5728 } else if( h->slice_type == P_TYPE ) {
5730 partition_count= p_mb_type_info[mb_type].partition_count;
5731 mb_type= p_mb_type_info[mb_type].type;
5734 goto decode_intra_mb;
5737 assert(h->slice_type == I_TYPE);
5739 partition_count = 0;
5740 cbp= i_mb_type_info[mb_type].cbp;
5741 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5742 mb_type= i_mb_type_info[mb_type].type;
5744 if(h->mb_field_decoding_flag)
5745 mb_type |= MB_TYPE_INTERLACED;
5747 h->slice_table[ mb_xy ]= h->slice_num;
5749 if(IS_INTRA_PCM(mb_type)) {
5753 // We assume these blocks are very rare so we dont optimize it.
5754 // FIXME The two following lines get the bitstream position in the cabac
5755 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5756 ptr= h->cabac.bytestream;
5757 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
5759 // The pixels are stored in the same order as levels in h->mb array.
5760 for(y=0; y<16; y++){
5761 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5762 for(x=0; x<16; x++){
5763 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
5764 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5768 const int index= 256 + 4*(y&3) + 32*(y>>2);
5770 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5771 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5775 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5777 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5778 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5782 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5784 // All blocks are present
5785 h->cbp_table[mb_xy] = 0x1ef;
5786 h->chroma_pred_mode_table[mb_xy] = 0;
5787 // In deblocking, the quantizer is 0
5788 s->current_picture.qscale_table[mb_xy]= 0;
5789 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5790 // All coeffs are present
5791 memset(h->non_zero_count[mb_xy], 16, 16);
5792 s->current_picture.mb_type[mb_xy]= mb_type;
5796 fill_caches(h, mb_type, 0);
5798 if( IS_INTRA( mb_type ) ) {
5800 if( IS_INTRA4x4( mb_type ) ) {
5801 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5802 mb_type |= MB_TYPE_8x8DCT;
5803 for( i = 0; i < 16; i+=4 ) {
5804 int pred = pred_intra_mode( h, i );
5805 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5806 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5809 for( i = 0; i < 16; i++ ) {
5810 int pred = pred_intra_mode( h, i );
5811 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5813 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5816 write_back_intra_pred_mode(h);
5817 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5819 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5820 if( h->intra16x16_pred_mode < 0 ) return -1;
5822 h->chroma_pred_mode_table[mb_xy] =
5823 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5825 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
5826 if( h->chroma_pred_mode < 0 ) return -1;
5827 } else if( partition_count == 4 ) {
5828 int i, j, sub_partition_count[4], list, ref[2][4];
5830 if( h->slice_type == B_TYPE ) {
5831 for( i = 0; i < 4; i++ ) {
5832 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5833 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5834 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5836 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5837 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5838 pred_direct_motion(h, &mb_type);
5839 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5840 for( i = 0; i < 4; i++ )
5841 if( IS_DIRECT(h->sub_mb_type[i]) )
5842 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5846 for( i = 0; i < 4; i++ ) {
5847 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5848 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5849 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5853 for( list = 0; list < 2; list++ ) {
5854 if( h->ref_count[list] > 0 ) {
5855 for( i = 0; i < 4; i++ ) {
5856 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5857 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5858 if( h->ref_count[list] > 1 )
5859 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5865 h->ref_cache[list][ scan8[4*i]+1 ]=
5866 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5872 dct8x8_allowed = get_dct8x8_allowed(h);
5874 for(list=0; list<2; list++){
5876 if(IS_DIRECT(h->sub_mb_type[i])){
5877 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5880 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5882 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5883 const int sub_mb_type= h->sub_mb_type[i];
5884 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5885 for(j=0; j<sub_partition_count[i]; j++){
5888 const int index= 4*i + block_width*j;
5889 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5890 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5891 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5893 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5894 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5895 tprintf("final mv:%d %d\n", mx, my);
5897 if(IS_SUB_8X8(sub_mb_type)){
5898 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5899 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5900 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5901 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5903 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
5904 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5905 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
5906 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5907 }else if(IS_SUB_8X4(sub_mb_type)){
5908 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5909 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5911 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
5912 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
5913 }else if(IS_SUB_4X8(sub_mb_type)){
5914 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5915 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5917 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
5918 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
5920 assert(IS_SUB_4X4(sub_mb_type));
5921 mv_cache[ 0 ][0]= mx;
5922 mv_cache[ 0 ][1]= my;
5924 mvd_cache[ 0 ][0]= mx - mpx;
5925 mvd_cache[ 0 ][1]= my - mpy;
5929 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5930 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5931 p[0] = p[1] = p[8] = p[9] = 0;
5932 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5936 } else if( IS_DIRECT(mb_type) ) {
5937 pred_direct_motion(h, &mb_type);
5938 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5939 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5940 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5942 int list, mx, my, i, mpx, mpy;
5943 if(IS_16X16(mb_type)){
5944 for(list=0; list<2; list++){
5945 if(IS_DIR(mb_type, 0, list)){
5946 if(h->ref_count[list] > 0 ){
5947 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5948 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5951 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
5953 for(list=0; list<2; list++){
5954 if(IS_DIR(mb_type, 0, list)){
5955 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5957 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5958 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5959 tprintf("final mv:%d %d\n", mx, my);
5961 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5962 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5964 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5967 else if(IS_16X8(mb_type)){
5968 for(list=0; list<2; list++){
5969 if(h->ref_count[list]>0){
5971 if(IS_DIR(mb_type, i, list)){
5972 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5973 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5975 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5979 for(list=0; list<2; list++){
5981 if(IS_DIR(mb_type, i, list)){
5982 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5983 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5984 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5985 tprintf("final mv:%d %d\n", mx, my);
5987 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5988 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5990 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5991 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5996 assert(IS_8X16(mb_type));
5997 for(list=0; list<2; list++){
5998 if(h->ref_count[list]>0){
6000 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6001 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6002 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6004 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6008 for(list=0; list<2; list++){
6010 if(IS_DIR(mb_type, i, list)){
6011 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6012 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6013 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6015 tprintf("final mv:%d %d\n", mx, my);
6016 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6017 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6019 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6020 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6027 if( IS_INTER( mb_type ) ) {
6028 h->chroma_pred_mode_table[mb_xy] = 0;
6029 write_back_motion( h, mb_type );
6032 if( !IS_INTRA16x16( mb_type ) ) {
6033 cbp = decode_cabac_mb_cbp_luma( h );
6034 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6037 h->cbp_table[mb_xy] = cbp;
6039 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6040 if( decode_cabac_mb_transform_size( h ) )
6041 mb_type |= MB_TYPE_8x8DCT;
6043 s->current_picture.mb_type[mb_xy]= mb_type;
6045 if( cbp || IS_INTRA16x16( mb_type ) ) {
6046 const uint8_t *scan, *dc_scan;
6049 if(IS_INTERLACED(mb_type)){
6050 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6051 dc_scan= luma_dc_field_scan;
6053 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6054 dc_scan= luma_dc_zigzag_scan;
6057 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6058 if( dqp == INT_MIN ){
6059 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6063 if(((unsigned)s->qscale) > 51){
6064 if(s->qscale<0) s->qscale+= 52;
6065 else s->qscale-= 52;
6067 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6069 if( IS_INTRA16x16( mb_type ) ) {
6071 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6072 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, h->dequant4_coeff[s->qscale], 16) < 0)
6075 for( i = 0; i < 16; i++ ) {
6076 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6077 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[s->qscale], 15) < 0 )
6081 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6085 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6086 if( cbp & (1<<i8x8) ) {
6087 if( IS_8x8DCT(mb_type) ) {
6088 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6089 zigzag_scan8x8, h->dequant8_coeff[s->qscale], 64) < 0 )
6094 h->mb[64*i8x8+i] = (h->mb[64*i8x8+i] + 2) >> 2;
6097 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6098 const int index = 4*i8x8 + i4x4;
6099 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6100 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[s->qscale], 16) < 0 )
6104 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6105 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6112 for( c = 0; c < 2; c++ ) {
6113 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6114 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, h->dequant4_coeff[h->chroma_qp], 4) < 0)
6121 for( c = 0; c < 2; c++ ) {
6122 for( i = 0; i < 4; i++ ) {
6123 const int index = 16 + 4 * c + i;
6124 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6125 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[h->chroma_qp], 15) < 0)
6130 uint8_t * const nnz= &h->non_zero_count_cache[0];
6131 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6132 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6135 uint8_t * const nnz= &h->non_zero_count_cache[0];
6136 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6137 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6138 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6141 s->current_picture.qscale_table[mb_xy]= s->qscale;
6142 write_back_non_zero_count(h);
6148 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6150 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6151 const int alpha = alpha_table[index_a];
6152 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6157 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6158 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6160 /* 16px edge length, because bS=4 is triggered by being at
6161 * the edge of an intra MB, so all 4 bS are the same */
6162 for( d = 0; d < 16; d++ ) {
6163 const int p0 = pix[-1];
6164 const int p1 = pix[-2];
6165 const int p2 = pix[-3];
6167 const int q0 = pix[0];
6168 const int q1 = pix[1];
6169 const int q2 = pix[2];
6171 if( ABS( p0 - q0 ) < alpha &&
6172 ABS( p1 - p0 ) < beta &&
6173 ABS( q1 - q0 ) < beta ) {
6175 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6176 if( ABS( p2 - p0 ) < beta)
6178 const int p3 = pix[-4];
6180 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6181 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6182 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6185 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6187 if( ABS( q2 - q0 ) < beta)
6189 const int q3 = pix[3];
6191 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6192 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6193 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6196 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6200 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6201 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6203 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6209 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6211 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6212 const int alpha = alpha_table[index_a];
6213 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6218 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6219 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6221 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6225 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6227 for( i = 0; i < 16; i++, pix += stride) {
6233 int bS_index = (i >> 1);
6234 if (h->mb_field_decoding_flag) {
6236 bS_index |= (i & 1);
6239 if( bS[bS_index] == 0 ) {
6243 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6244 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6245 alpha = alpha_table[index_a];
6246 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6249 if( bS[bS_index] < 4 ) {
6250 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6251 /* 4px edge length */
6252 const int p0 = pix[-1];
6253 const int p1 = pix[-2];
6254 const int p2 = pix[-3];
6255 const int q0 = pix[0];
6256 const int q1 = pix[1];
6257 const int q2 = pix[2];
6259 if( ABS( p0 - q0 ) < alpha &&
6260 ABS( p1 - p0 ) < beta &&
6261 ABS( q1 - q0 ) < beta ) {
6265 if( ABS( p2 - p0 ) < beta ) {
6266 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6269 if( ABS( q2 - q0 ) < beta ) {
6270 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6274 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6275 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6276 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6277 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6280 /* 4px edge length */
6281 const int p0 = pix[-1];
6282 const int p1 = pix[-2];
6283 const int p2 = pix[-3];
6285 const int q0 = pix[0];
6286 const int q1 = pix[1];
6287 const int q2 = pix[2];
6289 if( ABS( p0 - q0 ) < alpha &&
6290 ABS( p1 - p0 ) < beta &&
6291 ABS( q1 - q0 ) < beta ) {
6293 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6294 if( ABS( p2 - p0 ) < beta)
6296 const int p3 = pix[-4];
6298 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6299 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6300 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6303 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6305 if( ABS( q2 - q0 ) < beta)
6307 const int q3 = pix[3];
6309 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6310 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6311 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6314 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6318 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6319 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6321 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6326 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp[2] ) {
6328 for( i = 0; i < 8; i++, pix += stride) {
6336 if( bS[bS_index] == 0 ) {
6340 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6341 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6342 alpha = alpha_table[index_a];
6343 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6344 if( bS[bS_index] < 4 ) {
6345 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6346 /* 2px edge length (because we use same bS than the one for luma) */
6347 const int p0 = pix[-1];
6348 const int p1 = pix[-2];
6349 const int q0 = pix[0];
6350 const int q1 = pix[1];
6352 if( ABS( p0 - q0 ) < alpha &&
6353 ABS( p1 - p0 ) < beta &&
6354 ABS( q1 - q0 ) < beta ) {
6355 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6357 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6358 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6359 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6362 const int p0 = pix[-1];
6363 const int p1 = pix[-2];
6364 const int q0 = pix[0];
6365 const int q1 = pix[1];
6367 if( ABS( p0 - q0 ) < alpha &&
6368 ABS( p1 - p0 ) < beta &&
6369 ABS( q1 - q0 ) < beta ) {
6371 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6372 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6373 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6379 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6381 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6382 const int alpha = alpha_table[index_a];
6383 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6384 const int pix_next = stride;
6389 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6390 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6392 /* 16px edge length, see filter_mb_edgev */
6393 for( d = 0; d < 16; d++ ) {
6394 const int p0 = pix[-1*pix_next];
6395 const int p1 = pix[-2*pix_next];
6396 const int p2 = pix[-3*pix_next];
6397 const int q0 = pix[0];
6398 const int q1 = pix[1*pix_next];
6399 const int q2 = pix[2*pix_next];
6401 if( ABS( p0 - q0 ) < alpha &&
6402 ABS( p1 - p0 ) < beta &&
6403 ABS( q1 - q0 ) < beta ) {
6405 const int p3 = pix[-4*pix_next];
6406 const int q3 = pix[ 3*pix_next];
6408 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6409 if( ABS( p2 - p0 ) < beta) {
6411 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6412 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6413 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6416 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6418 if( ABS( q2 - q0 ) < beta) {
6420 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6421 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6422 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6425 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6429 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6430 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6432 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6439 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6441 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6442 const int alpha = alpha_table[index_a];
6443 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6448 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6449 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6451 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6455 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6456 MpegEncContext * const s = &h->s;
6457 const int mb_xy= mb_x + mb_y*s->mb_stride;
6458 int first_vertical_edge_done = 0;
6460 /* FIXME: A given frame may occupy more than one position in
6461 * the reference list. So ref2frm should be populated with
6462 * frame numbers, not indices. */
6463 static const int ref2frm[18] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6466 // left mb is in picture
6467 && h->slice_table[mb_xy-1] != 255
6468 // and current and left pair do not have the same interlaced type
6469 && (IS_INTERLACED(s->current_picture.mb_type[mb_xy]) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6470 // and left mb is in the same slice if deblocking_filter == 2
6471 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6472 /* First vertical edge is different in MBAFF frames
6473 * There are 8 different bS to compute and 2 different Qp
6480 first_vertical_edge_done = 1;
6481 for( i = 0; i < 8; i++ ) {
6483 int b_idx= 8 + 4 + 8*y;
6484 int bn_idx= b_idx - 1;
6486 int mbn_xy = h->mb_field_decoding_flag ? h->left_mb_xy[i>>2] : h->left_mb_xy[i&1];
6488 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6489 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6491 } else if( h->non_zero_count_cache[b_idx] != 0 ||
6492 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6493 h->non_zero_count_cache[bn_idx] != 0 ) {
6498 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6499 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6500 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6501 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6508 if(bS[0]+bS[1]+bS[2]+bS[3] != 0) {
6509 // Do not use s->qscale as luma quantizer because it has not the same
6510 // value in IPCM macroblocks.
6511 qp[0] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[0]] + 1 ) >> 1;
6512 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6513 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[0]] ) + 1 ) >> 1;
6514 qp[1] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[1]] + 1 ) >> 1;
6515 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6516 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[1]] ) + 1 ) >> 1;
6519 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
6520 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6521 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6522 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
6523 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
6526 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6527 for( dir = 0; dir < 2; dir++ )
6530 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6531 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6533 if (first_vertical_edge_done) {
6535 first_vertical_edge_done = 0;
6538 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6542 for( edge = start; edge < 4; edge++ ) {
6543 /* mbn_xy: neighbor macroblock */
6544 int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6548 if( (edge&1) && IS_8x8DCT(s->current_picture.mb_type[mb_xy]) )
6551 if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0)
6552 && !IS_INTERLACED(s->current_picture.mb_type[mb_xy])
6553 && IS_INTERLACED(s->current_picture.mb_type[mbn_xy])
6555 // This is a special case in the norm where the filtering must
6556 // be done twice (one each of the field) even if we are in a
6557 // frame macroblock.
6559 unsigned int tmp_linesize = 2 * linesize;
6560 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6561 int mbn_xy = mb_xy - 2 * s->mb_stride;
6565 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6566 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6567 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6570 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6573 // Do not use s->qscale as luma quantizer because it has not the same
6574 // value in IPCM macroblocks.
6575 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6576 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6577 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6578 filter_mb_edgeh( h, &img_y[0], tmp_linesize, bS, qp );
6579 chroma_qp = ( h->chroma_qp +
6580 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6581 filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp );
6582 filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp );
6585 mbn_xy += s->mb_stride;
6586 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6587 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6588 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6591 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6594 // Do not use s->qscale as luma quantizer because it has not the same
6595 // value in IPCM macroblocks.
6596 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6597 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6598 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6599 filter_mb_edgeh( h, &img_y[linesize], tmp_linesize, bS, qp );
6600 chroma_qp = ( h->chroma_qp +
6601 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6602 filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6603 filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6606 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6607 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6610 if ( (!IS_INTERLACED(s->current_picture.mb_type[mb_xy]) && !IS_INTERLACED(s->current_picture.mb_type[mbm_xy]))
6611 || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6620 bS[0] = bS[1] = bS[2] = bS[3] = value;
6623 for( i = 0; i < 4; i++ ) {
6624 int x = dir == 0 ? edge : i;
6625 int y = dir == 0 ? i : edge;
6626 int b_idx= 8 + 4 + x + 8*y;
6627 int bn_idx= b_idx - (dir ? 8:1);
6629 if( h->non_zero_count_cache[b_idx] != 0 ||
6630 h->non_zero_count_cache[bn_idx] != 0 ) {
6637 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6638 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6639 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6640 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6648 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6653 // Do not use s->qscale as luma quantizer because it has not the same
6654 // value in IPCM macroblocks.
6655 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6656 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6657 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6658 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6660 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6661 if( (edge&1) == 0 ) {
6662 int chroma_qp = ( h->chroma_qp +
6663 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6664 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
6665 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
6668 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6669 if( (edge&1) == 0 ) {
6670 int chroma_qp = ( h->chroma_qp +
6671 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6672 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6673 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6680 static int decode_slice(H264Context *h){
6681 MpegEncContext * const s = &h->s;
6682 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6686 if( h->pps.cabac ) {
6690 align_get_bits( &s->gb );
6693 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
6694 ff_init_cabac_decoder( &h->cabac,
6695 s->gb.buffer + get_bits_count(&s->gb)/8,
6696 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6697 /* calculate pre-state */
6698 for( i= 0; i < 460; i++ ) {
6700 if( h->slice_type == I_TYPE )
6701 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6703 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6706 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6708 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6712 int ret = decode_mb_cabac(h);
6715 if(ret>=0) hl_decode_mb(h);
6717 /* XXX: useless as decode_mb_cabac it doesn't support that ... */
6718 if( ret >= 0 && h->mb_aff_frame ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6721 if(ret>=0) ret = decode_mb_cabac(h);
6723 if(ret>=0) hl_decode_mb(h);
6726 eos = get_cabac_terminate( &h->cabac );
6728 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
6729 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6730 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6734 if( ++s->mb_x >= s->mb_width ) {
6736 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6738 if(h->mb_aff_frame) {
6743 if( eos || s->mb_y >= s->mb_height ) {
6744 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6745 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6752 int ret = decode_mb_cavlc(h);
6754 if(ret>=0) hl_decode_mb(h);
6756 if(ret>=0 && h->mb_aff_frame){ //FIXME optimal? or let mb_decode decode 16x32 ?
6758 ret = decode_mb_cavlc(h);
6760 if(ret>=0) hl_decode_mb(h);
6765 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6766 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6771 if(++s->mb_x >= s->mb_width){
6773 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6775 if(h->mb_aff_frame) {
6778 if(s->mb_y >= s->mb_height){
6779 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6781 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6782 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6786 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6793 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6794 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6795 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6796 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6800 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6809 for(;s->mb_y < s->mb_height; s->mb_y++){
6810 for(;s->mb_x < s->mb_width; s->mb_x++){
6811 int ret= decode_mb(h);
6816 fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6817 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6822 if(++s->mb_x >= s->mb_width){
6824 if(++s->mb_y >= s->mb_height){
6825 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6826 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6830 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6837 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6838 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6839 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6843 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6850 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6853 return -1; //not reached
6856 static int decode_unregistered_user_data(H264Context *h, int size){
6857 MpegEncContext * const s = &h->s;
6858 uint8_t user_data[16+256];
6864 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6865 user_data[i]= get_bits(&s->gb, 8);
6869 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6870 if(e==1 && build>=0)
6871 h->x264_build= build;
6873 if(s->avctx->debug & FF_DEBUG_BUGS)
6874 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6877 skip_bits(&s->gb, 8);
6882 static int decode_sei(H264Context *h){
6883 MpegEncContext * const s = &h->s;
6885 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6890 type+= show_bits(&s->gb, 8);
6891 }while(get_bits(&s->gb, 8) == 255);
6895 size+= show_bits(&s->gb, 8);
6896 }while(get_bits(&s->gb, 8) == 255);
6900 if(decode_unregistered_user_data(h, size) < 0);
6904 skip_bits(&s->gb, 8*size);
6907 //FIXME check bits here
6908 align_get_bits(&s->gb);
6914 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6915 MpegEncContext * const s = &h->s;
6917 cpb_count = get_ue_golomb(&s->gb) + 1;
6918 get_bits(&s->gb, 4); /* bit_rate_scale */
6919 get_bits(&s->gb, 4); /* cpb_size_scale */
6920 for(i=0; i<cpb_count; i++){
6921 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6922 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6923 get_bits1(&s->gb); /* cbr_flag */
6925 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6926 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6927 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6928 get_bits(&s->gb, 5); /* time_offset_length */
6931 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6932 MpegEncContext * const s = &h->s;
6933 int aspect_ratio_info_present_flag, aspect_ratio_idc;
6934 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6936 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6938 if( aspect_ratio_info_present_flag ) {
6939 aspect_ratio_idc= get_bits(&s->gb, 8);
6940 if( aspect_ratio_idc == EXTENDED_SAR ) {
6941 sps->sar.num= get_bits(&s->gb, 16);
6942 sps->sar.den= get_bits(&s->gb, 16);
6943 }else if(aspect_ratio_idc < 16){
6944 sps->sar= pixel_aspect[aspect_ratio_idc];
6946 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6953 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6955 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6956 get_bits1(&s->gb); /* overscan_appropriate_flag */
6959 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6960 get_bits(&s->gb, 3); /* video_format */
6961 get_bits1(&s->gb); /* video_full_range_flag */
6962 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6963 get_bits(&s->gb, 8); /* colour_primaries */
6964 get_bits(&s->gb, 8); /* transfer_characteristics */
6965 get_bits(&s->gb, 8); /* matrix_coefficients */
6969 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6970 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6971 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6974 sps->timing_info_present_flag = get_bits1(&s->gb);
6975 if(sps->timing_info_present_flag){
6976 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6977 sps->time_scale = get_bits_long(&s->gb, 32);
6978 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6981 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6982 if(nal_hrd_parameters_present_flag)
6983 decode_hrd_parameters(h, sps);
6984 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6985 if(vcl_hrd_parameters_present_flag)
6986 decode_hrd_parameters(h, sps);
6987 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6988 get_bits1(&s->gb); /* low_delay_hrd_flag */
6989 get_bits1(&s->gb); /* pic_struct_present_flag */
6991 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6992 if(sps->bitstream_restriction_flag){
6993 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6994 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6995 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6996 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6997 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6998 sps->num_reorder_frames = get_ue_golomb(&s->gb);
6999 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7005 static inline int decode_seq_parameter_set(H264Context *h){
7006 MpegEncContext * const s = &h->s;
7007 int profile_idc, level_idc;
7011 profile_idc= get_bits(&s->gb, 8);
7012 get_bits1(&s->gb); //constraint_set0_flag
7013 get_bits1(&s->gb); //constraint_set1_flag
7014 get_bits1(&s->gb); //constraint_set2_flag
7015 get_bits1(&s->gb); //constraint_set3_flag
7016 get_bits(&s->gb, 4); // reserved
7017 level_idc= get_bits(&s->gb, 8);
7018 sps_id= get_ue_golomb(&s->gb);
7020 sps= &h->sps_buffer[ sps_id ];
7021 sps->profile_idc= profile_idc;
7022 sps->level_idc= level_idc;
7024 if(sps->profile_idc >= 100){ //high profile
7025 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7026 get_bits1(&s->gb); //residual_color_transform_flag
7027 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7028 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7029 sps->transform_bypass = get_bits1(&s->gb);
7030 if(get_bits1(&s->gb)){ //seq_scaling_matrix_present_flag
7031 av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n");
7036 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7037 sps->poc_type= get_ue_golomb(&s->gb);
7039 if(sps->poc_type == 0){ //FIXME #define
7040 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7041 } else if(sps->poc_type == 1){//FIXME #define
7042 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7043 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7044 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7045 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7047 for(i=0; i<sps->poc_cycle_length; i++)
7048 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7050 if(sps->poc_type > 2){
7051 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7055 sps->ref_frame_count= get_ue_golomb(&s->gb);
7056 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7057 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7059 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7060 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7061 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7062 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7063 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7066 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7067 if(!sps->frame_mbs_only_flag)
7068 sps->mb_aff= get_bits1(&s->gb);
7072 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7074 sps->crop= get_bits1(&s->gb);
7076 sps->crop_left = get_ue_golomb(&s->gb);
7077 sps->crop_right = get_ue_golomb(&s->gb);
7078 sps->crop_top = get_ue_golomb(&s->gb);
7079 sps->crop_bottom= get_ue_golomb(&s->gb);
7080 if(sps->crop_left || sps->crop_top){
7081 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7087 sps->crop_bottom= 0;
7090 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7091 if( sps->vui_parameters_present_flag )
7092 decode_vui_parameters(h, sps);
7094 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7095 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7096 sps_id, sps->profile_idc, sps->level_idc,
7098 sps->ref_frame_count,
7099 sps->mb_width, sps->mb_height,
7100 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7101 sps->direct_8x8_inference_flag ? "8B8" : "",
7102 sps->crop_left, sps->crop_right,
7103 sps->crop_top, sps->crop_bottom,
7104 sps->vui_parameters_present_flag ? "VUI" : ""
7110 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7111 MpegEncContext * const s = &h->s;
7112 int pps_id= get_ue_golomb(&s->gb);
7113 PPS *pps= &h->pps_buffer[pps_id];
7115 pps->sps_id= get_ue_golomb(&s->gb);
7116 pps->cabac= get_bits1(&s->gb);
7117 pps->pic_order_present= get_bits1(&s->gb);
7118 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7119 if(pps->slice_group_count > 1 ){
7120 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7121 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7122 switch(pps->mb_slice_group_map_type){
7125 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7126 | run_length[ i ] |1 |ue(v) |
7131 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7133 | top_left_mb[ i ] |1 |ue(v) |
7134 | bottom_right_mb[ i ] |1 |ue(v) |
7142 | slice_group_change_direction_flag |1 |u(1) |
7143 | slice_group_change_rate_minus1 |1 |ue(v) |
7148 | slice_group_id_cnt_minus1 |1 |ue(v) |
7149 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7151 | slice_group_id[ i ] |1 |u(v) |
7156 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7157 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7158 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7159 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7163 pps->weighted_pred= get_bits1(&s->gb);
7164 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7165 pps->init_qp= get_se_golomb(&s->gb) + 26;
7166 pps->init_qs= get_se_golomb(&s->gb) + 26;
7167 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7168 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7169 pps->constrained_intra_pred= get_bits1(&s->gb);
7170 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7172 if(get_bits_count(&s->gb) < bit_length){
7173 pps->transform_8x8_mode= get_bits1(&s->gb);
7174 if(get_bits1(&s->gb)){ //pic_scaling_matrix_present_flag
7175 av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n");
7178 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7181 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7182 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7183 pps_id, pps->sps_id,
7184 pps->cabac ? "CABAC" : "CAVLC",
7185 pps->slice_group_count,
7186 pps->ref_count[0], pps->ref_count[1],
7187 pps->weighted_pred ? "weighted" : "",
7188 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7189 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7190 pps->constrained_intra_pred ? "CONSTR" : "",
7191 pps->redundant_pic_cnt_present ? "REDU" : "",
7192 pps->transform_8x8_mode ? "8x8DCT" : ""
7200 * finds the end of the current frame in the bitstream.
7201 * @return the position of the first byte of the next frame, or -1
7203 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7206 ParseContext *pc = &(h->s.parse_context);
7207 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7208 // mb_addr= pc->mb_addr - 1;
7210 for(i=0; i<=buf_size; i++){
7211 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7212 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7213 if(pc->frame_start_found){
7214 // If there isn't one more byte in the buffer
7215 // the test on first_mb_in_slice cannot be done yet
7216 // do it at next call.
7217 if (i >= buf_size) break;
7218 if (buf[i] & 0x80) {
7219 // first_mb_in_slice is 0, probably the first nal of a new
7221 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7223 pc->frame_start_found= 0;
7227 pc->frame_start_found = 1;
7229 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7230 if(pc->frame_start_found){
7232 pc->frame_start_found= 0;
7237 state= (state<<8) | buf[i];
7241 return END_NOT_FOUND;
7244 static int h264_parse(AVCodecParserContext *s,
7245 AVCodecContext *avctx,
7246 uint8_t **poutbuf, int *poutbuf_size,
7247 const uint8_t *buf, int buf_size)
7249 H264Context *h = s->priv_data;
7250 ParseContext *pc = &h->s.parse_context;
7253 next= find_frame_end(h, buf, buf_size);
7255 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
7261 *poutbuf = (uint8_t *)buf;
7262 *poutbuf_size = buf_size;
7266 static int h264_split(AVCodecContext *avctx,
7267 const uint8_t *buf, int buf_size)
7270 uint32_t state = -1;
7273 for(i=0; i<=buf_size; i++){
7274 if((state&0xFFFFFF1F) == 0x107)
7276 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7278 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
7280 while(i>4 && buf[i-5]==0) i--;
7285 state= (state<<8) | buf[i];
7291 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7292 MpegEncContext * const s = &h->s;
7293 AVCodecContext * const avctx= s->avctx;
7297 for(i=0; i<50; i++){
7298 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7302 s->current_picture_ptr= NULL;
7311 if(buf_index >= buf_size) break;
7313 for(i = 0; i < h->nal_length_size; i++)
7314 nalsize = (nalsize << 8) | buf[buf_index++];
7316 // start code prefix search
7317 for(; buf_index + 3 < buf_size; buf_index++){
7318 // this should allways succeed in the first iteration
7319 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7323 if(buf_index+3 >= buf_size) break;
7328 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7329 if(ptr[dst_length - 1] == 0) dst_length--;
7330 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
7332 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7333 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7336 if (h->is_avc && (nalsize != consumed))
7337 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7339 buf_index += consumed;
7341 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
7342 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7345 switch(h->nal_unit_type){
7347 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7349 init_get_bits(&s->gb, ptr, bit_length);
7351 h->inter_gb_ptr= &s->gb;
7352 s->data_partitioning = 0;
7354 if(decode_slice_header(h) < 0){
7355 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7358 if(h->redundant_pic_count==0 && s->hurry_up < 5
7359 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7360 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7361 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7362 && avctx->skip_frame < AVDISCARD_ALL)
7366 init_get_bits(&s->gb, ptr, bit_length);
7368 h->inter_gb_ptr= NULL;
7369 s->data_partitioning = 1;
7371 if(decode_slice_header(h) < 0){
7372 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7376 init_get_bits(&h->intra_gb, ptr, bit_length);
7377 h->intra_gb_ptr= &h->intra_gb;
7380 init_get_bits(&h->inter_gb, ptr, bit_length);
7381 h->inter_gb_ptr= &h->inter_gb;
7383 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
7385 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7386 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7387 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7388 && avctx->skip_frame < AVDISCARD_ALL)
7392 init_get_bits(&s->gb, ptr, bit_length);
7396 init_get_bits(&s->gb, ptr, bit_length);
7397 decode_seq_parameter_set(h);
7399 if(s->flags& CODEC_FLAG_LOW_DELAY)
7402 if(avctx->has_b_frames < 2)
7403 avctx->has_b_frames= !s->low_delay;
7406 init_get_bits(&s->gb, ptr, bit_length);
7408 decode_picture_parameter_set(h, bit_length);
7411 case NAL_PICTURE_DELIMITER:
7413 case NAL_FILTER_DATA:
7416 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
7420 if(!s->current_picture_ptr) return buf_index; //no frame
7422 s->current_picture_ptr->pict_type= s->pict_type;
7423 s->current_picture_ptr->key_frame= s->pict_type == I_TYPE && h->nal_unit_type == NAL_IDR_SLICE;
7425 h->prev_frame_num_offset= h->frame_num_offset;
7426 h->prev_frame_num= h->frame_num;
7427 if(s->current_picture_ptr->reference){
7428 h->prev_poc_msb= h->poc_msb;
7429 h->prev_poc_lsb= h->poc_lsb;
7431 if(s->current_picture_ptr->reference)
7432 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7442 * returns the number of bytes consumed for building the current frame
7444 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7445 if(s->flags&CODEC_FLAG_TRUNCATED){
7446 pos -= s->parse_context.last_index;
7447 if(pos<0) pos=0; // FIXME remove (unneeded?)
7451 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
7452 if(pos+10>buf_size) pos=buf_size; // oops ;)
7458 static int decode_frame(AVCodecContext *avctx,
7459 void *data, int *data_size,
7460 uint8_t *buf, int buf_size)
7462 H264Context *h = avctx->priv_data;
7463 MpegEncContext *s = &h->s;
7464 AVFrame *pict = data;
7467 s->flags= avctx->flags;
7468 s->flags2= avctx->flags2;
7470 /* no supplementary picture */
7471 if (buf_size == 0) {
7475 if(s->flags&CODEC_FLAG_TRUNCATED){
7476 int next= find_frame_end(h, buf, buf_size);
7478 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7480 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7483 if(h->is_avc && !h->got_avcC) {
7484 int i, cnt, nalsize;
7485 unsigned char *p = avctx->extradata;
7486 if(avctx->extradata_size < 7) {
7487 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7491 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7494 /* sps and pps in the avcC always have length coded with 2 bytes,
7495 so put a fake nal_length_size = 2 while parsing them */
7496 h->nal_length_size = 2;
7497 // Decode sps from avcC
7498 cnt = *(p+5) & 0x1f; // Number of sps
7500 for (i = 0; i < cnt; i++) {
7501 nalsize = BE_16(p) + 2;
7502 if(decode_nal_units(h, p, nalsize) < 0) {
7503 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7508 // Decode pps from avcC
7509 cnt = *(p++); // Number of pps
7510 for (i = 0; i < cnt; i++) {
7511 nalsize = BE_16(p) + 2;
7512 if(decode_nal_units(h, p, nalsize) != nalsize) {
7513 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7518 // Now store right nal length size, that will be use to parse all other nals
7519 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7520 // Do not reparse avcC
7524 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
7525 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7529 buf_index=decode_nal_units(h, buf, buf_size);
7533 //FIXME do something with unavailable reference frames
7535 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
7536 if(!s->current_picture_ptr){
7537 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
7542 Picture *out = s->current_picture_ptr;
7543 #if 0 //decode order
7544 *data_size = sizeof(AVFrame);
7546 /* Sort B-frames into display order */
7547 Picture *cur = s->current_picture_ptr;
7548 Picture *prev = h->delayed_output_pic;
7553 int dropped_frame = 0;
7556 if(h->sps.bitstream_restriction_flag
7557 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7558 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7562 while(h->delayed_pic[pics]) pics++;
7563 h->delayed_pic[pics++] = cur;
7564 if(cur->reference == 0)
7567 for(i=0; h->delayed_pic[i]; i++)
7568 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7571 out = h->delayed_pic[0];
7572 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7573 if(h->delayed_pic[i]->poc < out->poc){
7574 out = h->delayed_pic[i];
7578 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7579 if(prev && pics <= s->avctx->has_b_frames)
7581 else if((out_of_order && pics-1 == s->avctx->has_b_frames)
7583 ((!cross_idr && prev && out->poc > prev->poc + 2)
7584 || cur->pict_type == B_TYPE)))
7587 s->avctx->has_b_frames++;
7590 else if(out_of_order)
7593 if(out_of_order || pics > s->avctx->has_b_frames){
7594 dropped_frame = (out != h->delayed_pic[out_idx]);
7595 for(i=out_idx; h->delayed_pic[i]; i++)
7596 h->delayed_pic[i] = h->delayed_pic[i+1];
7599 if(prev == out && !dropped_frame)
7602 *data_size = sizeof(AVFrame);
7603 if(prev && prev != out && prev->reference == 1)
7604 prev->reference = 0;
7605 h->delayed_output_pic = out;
7608 *pict= *(AVFrame*)out;
7611 assert(pict->data[0]);
7612 ff_print_debug_info(s, pict);
7613 //printf("out %d\n", (int)pict->data[0]);
7616 /* Return the Picture timestamp as the frame number */
7617 /* we substract 1 because it is added on utils.c */
7618 avctx->frame_number = s->picture_number - 1;
7620 return get_consumed_bytes(s, buf_index, buf_size);
7623 static inline void fill_mb_avail(H264Context *h){
7624 MpegEncContext * const s = &h->s;
7625 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7628 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7629 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7630 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7636 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7637 h->mb_avail[4]= 1; //FIXME move out
7638 h->mb_avail[5]= 0; //FIXME move out
7644 #define SIZE (COUNT*40)
7650 // int int_temp[10000];
7652 AVCodecContext avctx;
7654 dsputil_init(&dsp, &avctx);
7656 init_put_bits(&pb, temp, SIZE);
7657 printf("testing unsigned exp golomb\n");
7658 for(i=0; i<COUNT; i++){
7660 set_ue_golomb(&pb, i);
7661 STOP_TIMER("set_ue_golomb");
7663 flush_put_bits(&pb);
7665 init_get_bits(&gb, temp, 8*SIZE);
7666 for(i=0; i<COUNT; i++){
7669 s= show_bits(&gb, 24);
7672 j= get_ue_golomb(&gb);
7674 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7677 STOP_TIMER("get_ue_golomb");
7681 init_put_bits(&pb, temp, SIZE);
7682 printf("testing signed exp golomb\n");
7683 for(i=0; i<COUNT; i++){
7685 set_se_golomb(&pb, i - COUNT/2);
7686 STOP_TIMER("set_se_golomb");
7688 flush_put_bits(&pb);
7690 init_get_bits(&gb, temp, 8*SIZE);
7691 for(i=0; i<COUNT; i++){
7694 s= show_bits(&gb, 24);
7697 j= get_se_golomb(&gb);
7698 if(j != i - COUNT/2){
7699 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7702 STOP_TIMER("get_se_golomb");
7705 printf("testing 4x4 (I)DCT\n");
7708 uint8_t src[16], ref[16];
7709 uint64_t error= 0, max_error=0;
7711 for(i=0; i<COUNT; i++){
7713 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7714 for(j=0; j<16; j++){
7715 ref[j]= random()%255;
7716 src[j]= random()%255;
7719 h264_diff_dct_c(block, src, ref, 4);
7722 for(j=0; j<16; j++){
7723 // printf("%d ", block[j]);
7724 block[j]= block[j]*4;
7725 if(j&1) block[j]= (block[j]*4 + 2)/5;
7726 if(j&4) block[j]= (block[j]*4 + 2)/5;
7730 s->dsp.h264_idct_add(ref, block, 4);
7731 /* for(j=0; j<16; j++){
7732 printf("%d ", ref[j]);
7736 for(j=0; j<16; j++){
7737 int diff= ABS(src[j] - ref[j]);
7740 max_error= FFMAX(max_error, diff);
7743 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7745 printf("testing quantizer\n");
7746 for(qp=0; qp<52; qp++){
7748 src1_block[i]= src2_block[i]= random()%255;
7752 printf("Testing NAL layer\n");
7754 uint8_t bitstream[COUNT];
7755 uint8_t nal[COUNT*2];
7757 memset(&h, 0, sizeof(H264Context));
7759 for(i=0; i<COUNT; i++){
7767 for(j=0; j<COUNT; j++){
7768 bitstream[j]= (random() % 255) + 1;
7771 for(j=0; j<zeros; j++){
7772 int pos= random() % COUNT;
7773 while(bitstream[pos] == 0){
7782 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7784 printf("encoding failed\n");
7788 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7792 if(out_length != COUNT){
7793 printf("incorrect length %d %d\n", out_length, COUNT);
7797 if(consumed != nal_length){
7798 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7802 if(memcmp(bitstream, out, COUNT)){
7803 printf("missmatch\n");
7808 printf("Testing RBSP\n");
7816 static int decode_end(AVCodecContext *avctx)
7818 H264Context *h = avctx->priv_data;
7819 MpegEncContext *s = &h->s;
7821 av_freep(&h->rbsp_buffer);
7822 free_tables(h); //FIXME cleanup init stuff perhaps
7825 // memset(h, 0, sizeof(H264Context));
7831 AVCodec h264_decoder = {
7835 sizeof(H264Context),
7840 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
7844 AVCodecParser h264_parser = {
7846 sizeof(H264Context),