2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
58 * Sequence parameter set
64 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
65 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
66 int poc_type; ///< pic_order_cnt_type
67 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
68 int delta_pic_order_always_zero_flag;
69 int offset_for_non_ref_pic;
70 int offset_for_top_to_bottom_field;
71 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
72 int ref_frame_count; ///< num_ref_frames
73 int gaps_in_frame_num_allowed_flag;
74 int mb_width; ///< frame_width_in_mbs_minus1 + 1
75 int mb_height; ///< frame_height_in_mbs_minus1 + 1
76 int frame_mbs_only_flag;
77 int mb_aff; ///<mb_adaptive_frame_field_flag
78 int direct_8x8_inference_flag;
79 int crop; ///< frame_cropping_flag
80 int crop_left; ///< frame_cropping_rect_left_offset
81 int crop_right; ///< frame_cropping_rect_right_offset
82 int crop_top; ///< frame_cropping_rect_top_offset
83 int crop_bottom; ///< frame_cropping_rect_bottom_offset
84 int vui_parameters_present_flag;
86 int timing_info_present_flag;
87 uint32_t num_units_in_tick;
89 int fixed_frame_rate_flag;
90 short offset_for_ref_frame[256]; //FIXME dyn aloc?
91 int bitstream_restriction_flag;
92 int num_reorder_frames;
96 * Picture parameter set
100 int cabac; ///< entropy_coding_mode_flag
101 int pic_order_present; ///< pic_order_present_flag
102 int slice_group_count; ///< num_slice_groups_minus1 + 1
103 int mb_slice_group_map_type;
104 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
105 int weighted_pred; ///< weighted_pred_flag
106 int weighted_bipred_idc;
107 int init_qp; ///< pic_init_qp_minus26 + 26
108 int init_qs; ///< pic_init_qs_minus26 + 26
109 int chroma_qp_index_offset;
110 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
111 int constrained_intra_pred; ///< constrained_intra_pred_flag
112 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
113 int transform_8x8_mode; ///< transform_8x8_mode_flag
117 * Memory management control operation opcode.
119 typedef enum MMCOOpcode{
130 * Memory management control operation.
141 typedef struct H264Context{
149 #define NAL_IDR_SLICE 5
153 #define NAL_PICTURE_DELIMITER 9
154 #define NAL_FILTER_DATA 10
155 uint8_t *rbsp_buffer;
156 int rbsp_buffer_size;
159 * Used to parse AVC variant of h264
161 int is_avc; ///< this flag is != 0 if codec is avc1
162 int got_avcC; ///< flag used to parse avcC data only once
163 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
167 int prev_mb_skipped; //FIXME remove (IMHO not used)
170 int chroma_pred_mode;
171 int intra16x16_pred_mode;
176 int8_t intra4x4_pred_mode_cache[5*8];
177 int8_t (*intra4x4_pred_mode)[8];
178 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
179 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
180 void (*pred8x8 [4+3])(uint8_t *src, int stride);
181 void (*pred16x16[4+3])(uint8_t *src, int stride);
182 unsigned int topleft_samples_available;
183 unsigned int top_samples_available;
184 unsigned int topright_samples_available;
185 unsigned int left_samples_available;
186 uint8_t (*top_borders[2])[16+2*8];
187 uint8_t left_border[2*(17+2*9)];
190 * non zero coeff count cache.
191 * is 64 if not available.
193 uint8_t non_zero_count_cache[6*8] __align8;
194 uint8_t (*non_zero_count)[16];
197 * Motion vector cache.
199 int16_t mv_cache[2][5*8][2] __align8;
200 int8_t ref_cache[2][5*8] __align8;
201 #define LIST_NOT_USED -1 //FIXME rename?
202 #define PART_NOT_AVAILABLE -2
205 * is 1 if the specific list MV&references are set to 0,0,-2.
207 int mv_cache_clean[2];
210 * number of neighbors (top and/or left) that used 8x8 dct
212 int neighbor_transform_size;
215 * block_offset[ 0..23] for frame macroblocks
216 * block_offset[24..47] for field macroblocks
218 int block_offset[2*(16+8)];
220 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
222 int b_stride; //FIXME use s->b4_stride
228 int unknown_svq3_flag;
229 int next_slice_index;
231 SPS sps_buffer[MAX_SPS_COUNT];
232 SPS sps; ///< current sps
234 PPS pps_buffer[MAX_PPS_COUNT];
238 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
240 uint16_t (*dequant4_coeff)[16]; // FIXME quant matrices should be per SPS or PPS
241 uint16_t (*dequant8_coeff)[64];
244 uint8_t *slice_table_base;
245 uint8_t *slice_table; ///< slice_table_base + mb_stride + 1
247 int slice_type_fixed;
249 //interlacing specific flags
251 int mb_field_decoding_flag;
258 int delta_poc_bottom;
261 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
262 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
263 int frame_num_offset; ///< for POC type 2
264 int prev_frame_num_offset; ///< for POC type 2
265 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
268 * frame_num for frames or 2*frame_num for field pics.
273 * max_frame_num or 2*max_frame_num for field pics.
277 //Weighted pred stuff
279 int use_weight_chroma;
280 int luma_log2_weight_denom;
281 int chroma_log2_weight_denom;
282 int luma_weight[2][16];
283 int luma_offset[2][16];
284 int chroma_weight[2][16][2];
285 int chroma_offset[2][16][2];
286 int implicit_weight[16][16];
289 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
290 int slice_alpha_c0_offset;
291 int slice_beta_offset;
293 int redundant_pic_count;
295 int direct_spatial_mv_pred;
296 int dist_scale_factor[16];
297 int map_col_to_list0[2][16];
300 * num_ref_idx_l0/1_active_minus1 + 1
302 int ref_count[2];// FIXME split for AFF
303 Picture *short_ref[32];
304 Picture *long_ref[32];
305 Picture default_ref_list[2][32];
306 Picture ref_list[2][32]; //FIXME size?
307 Picture field_ref_list[2][32]; //FIXME size?
308 Picture *delayed_pic[16]; //FIXME size?
309 Picture *delayed_output_pic;
312 * memory management control operations buffer.
314 MMCO mmco[MAX_MMCO_COUNT];
317 int long_ref_count; ///< number of actual long term references
318 int short_ref_count; ///< number of actual short term references
321 GetBitContext intra_gb;
322 GetBitContext inter_gb;
323 GetBitContext *intra_gb_ptr;
324 GetBitContext *inter_gb_ptr;
326 DCTELEM mb[16*24] __align8;
332 uint8_t cabac_state[460];
335 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
339 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
340 uint8_t *chroma_pred_mode_table;
341 int last_qscale_diff;
342 int16_t (*mvd_table[2])[2];
343 int16_t mvd_cache[2][5*8][2] __align8;
344 uint8_t *direct_table;
345 uint8_t direct_cache[5*8];
347 uint8_t zigzag_scan[16];
348 uint8_t field_scan[16];
349 const uint8_t *zigzag_scan_q0;
350 const uint8_t *field_scan_q0;
355 static VLC coeff_token_vlc[4];
356 static VLC chroma_dc_coeff_token_vlc;
358 static VLC total_zeros_vlc[15];
359 static VLC chroma_dc_total_zeros_vlc[3];
361 static VLC run_vlc[6];
364 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
365 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
366 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
368 static inline uint32_t pack16to32(int a, int b){
369 #ifdef WORDS_BIGENDIAN
370 return (b&0xFFFF) + (a<<16);
372 return (a&0xFFFF) + (b<<16);
378 * @param h height of the rectangle, should be a constant
379 * @param w width of the rectangle, should be a constant
380 * @param size the size of val (1 or 4), should be a constant
382 static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
383 uint8_t *p= (uint8_t*)vp;
384 assert(size==1 || size==4);
389 assert((((int)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
390 assert((stride&(w-1))==0);
391 //FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it
394 *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
395 }else if(w==2 && h==4){
396 *(uint16_t*)(p + 0*stride)=
397 *(uint16_t*)(p + 1*stride)=
398 *(uint16_t*)(p + 2*stride)=
399 *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
400 }else if(w==4 && h==1){
401 *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
402 }else if(w==4 && h==2){
403 *(uint32_t*)(p + 0*stride)=
404 *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
405 }else if(w==4 && h==4){
406 *(uint32_t*)(p + 0*stride)=
407 *(uint32_t*)(p + 1*stride)=
408 *(uint32_t*)(p + 2*stride)=
409 *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
410 }else if(w==8 && h==1){
412 *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
413 }else if(w==8 && h==2){
414 *(uint32_t*)(p + 0 + 0*stride)=
415 *(uint32_t*)(p + 4 + 0*stride)=
416 *(uint32_t*)(p + 0 + 1*stride)=
417 *(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101;
418 }else if(w==8 && h==4){
419 *(uint64_t*)(p + 0*stride)=
420 *(uint64_t*)(p + 1*stride)=
421 *(uint64_t*)(p + 2*stride)=
422 *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
423 }else if(w==16 && h==2){
424 *(uint64_t*)(p + 0+0*stride)=
425 *(uint64_t*)(p + 8+0*stride)=
426 *(uint64_t*)(p + 0+1*stride)=
427 *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
428 }else if(w==16 && h==4){
429 *(uint64_t*)(p + 0+0*stride)=
430 *(uint64_t*)(p + 8+0*stride)=
431 *(uint64_t*)(p + 0+1*stride)=
432 *(uint64_t*)(p + 8+1*stride)=
433 *(uint64_t*)(p + 0+2*stride)=
434 *(uint64_t*)(p + 8+2*stride)=
435 *(uint64_t*)(p + 0+3*stride)=
436 *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
441 static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
442 MpegEncContext * const s = &h->s;
443 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
444 int topleft_xy, top_xy, topright_xy, left_xy[2];
445 int topleft_type, top_type, topright_type, left_type[2];
449 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
450 // the actual condition is whether we're on the edge of a slice,
451 // and even then the intra and nnz parts are unnecessary.
452 if(for_deblock && h->slice_num == 1)
455 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
457 top_xy = mb_xy - s->mb_stride;
458 topleft_xy = top_xy - 1;
459 topright_xy= top_xy + 1;
460 left_xy[1] = left_xy[0] = mb_xy-1;
470 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
471 const int top_pair_xy = pair_xy - s->mb_stride;
472 const int topleft_pair_xy = top_pair_xy - 1;
473 const int topright_pair_xy = top_pair_xy + 1;
474 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
475 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
476 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
477 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
478 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
479 const int bottom = (s->mb_y & 1);
480 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
482 ? !curr_mb_frame_flag // bottom macroblock
483 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
485 top_xy -= s->mb_stride;
488 ? !curr_mb_frame_flag // bottom macroblock
489 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
491 topleft_xy -= s->mb_stride;
494 ? !curr_mb_frame_flag // bottom macroblock
495 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
497 topright_xy -= s->mb_stride;
499 if (left_mb_frame_flag != curr_mb_frame_flag) {
500 left_xy[1] = left_xy[0] = pair_xy - 1;
501 if (curr_mb_frame_flag) {
522 left_xy[1] += s->mb_stride;
535 h->top_mb_xy = top_xy;
536 h->left_mb_xy[0] = left_xy[0];
537 h->left_mb_xy[1] = left_xy[1];
539 topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
540 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
541 topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
542 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
543 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
545 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
546 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
547 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
548 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
549 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
552 if(IS_INTRA(mb_type)){
553 h->topleft_samples_available=
554 h->top_samples_available=
555 h->left_samples_available= 0xFFFF;
556 h->topright_samples_available= 0xEEEA;
558 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
559 h->topleft_samples_available= 0xB3FF;
560 h->top_samples_available= 0x33FF;
561 h->topright_samples_available= 0x26EA;
564 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
565 h->topleft_samples_available&= 0xDF5F;
566 h->left_samples_available&= 0x5F5F;
570 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
571 h->topleft_samples_available&= 0x7FFF;
573 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
574 h->topright_samples_available&= 0xFBFF;
576 if(IS_INTRA4x4(mb_type)){
577 if(IS_INTRA4x4(top_type)){
578 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
579 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
580 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
581 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
584 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
589 h->intra4x4_pred_mode_cache[4+8*0]=
590 h->intra4x4_pred_mode_cache[5+8*0]=
591 h->intra4x4_pred_mode_cache[6+8*0]=
592 h->intra4x4_pred_mode_cache[7+8*0]= pred;
595 if(IS_INTRA4x4(left_type[i])){
596 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
597 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
600 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
605 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
606 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
621 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
623 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
624 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
625 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
626 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
628 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
629 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
631 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
632 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
635 h->non_zero_count_cache[4+8*0]=
636 h->non_zero_count_cache[5+8*0]=
637 h->non_zero_count_cache[6+8*0]=
638 h->non_zero_count_cache[7+8*0]=
640 h->non_zero_count_cache[1+8*0]=
641 h->non_zero_count_cache[2+8*0]=
643 h->non_zero_count_cache[1+8*3]=
644 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
648 for (i=0; i<2; i++) {
650 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
651 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
652 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
653 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
655 h->non_zero_count_cache[3+8*1 + 2*8*i]=
656 h->non_zero_count_cache[3+8*2 + 2*8*i]=
657 h->non_zero_count_cache[0+8*1 + 8*i]=
658 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
665 h->top_cbp = h->cbp_table[top_xy];
666 } else if(IS_INTRA(mb_type)) {
673 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
674 } else if(IS_INTRA(mb_type)) {
680 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
683 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
688 //FIXME direct mb can skip much of this
689 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
691 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
692 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
693 /*if(!h->mv_cache_clean[list]){
694 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
695 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
696 h->mv_cache_clean[list]= 1;
700 h->mv_cache_clean[list]= 0;
702 if(IS_INTER(top_type)){
703 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
704 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
705 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
706 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
707 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
708 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
709 h->ref_cache[list][scan8[0] + 0 - 1*8]=
710 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
711 h->ref_cache[list][scan8[0] + 2 - 1*8]=
712 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
714 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
715 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
716 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
717 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
718 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
721 //FIXME unify cleanup or sth
722 if(IS_INTER(left_type[0])){
723 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
724 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
725 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
726 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
727 h->ref_cache[list][scan8[0] - 1 + 0*8]=
728 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
730 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
731 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
732 h->ref_cache[list][scan8[0] - 1 + 0*8]=
733 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
736 if(IS_INTER(left_type[1])){
737 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
738 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
739 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
740 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
741 h->ref_cache[list][scan8[0] - 1 + 2*8]=
742 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
744 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
745 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
746 h->ref_cache[list][scan8[0] - 1 + 2*8]=
747 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
748 assert((!left_type[0]) == (!left_type[1]));
751 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
754 if(IS_INTER(topleft_type)){
755 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
756 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
757 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
758 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
760 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
761 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
764 if(IS_INTER(topright_type)){
765 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
766 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
767 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
768 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
770 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
771 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
775 h->ref_cache[list][scan8[5 ]+1] =
776 h->ref_cache[list][scan8[7 ]+1] =
777 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
778 h->ref_cache[list][scan8[4 ]] =
779 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
780 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
781 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
782 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
783 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
784 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
787 /* XXX beurk, Load mvd */
788 if(IS_INTER(topleft_type)){
789 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
790 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy];
792 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0;
795 if(IS_INTER(top_type)){
796 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
797 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
798 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
799 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
800 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
802 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
803 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
804 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
805 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
807 if(IS_INTER(left_type[0])){
808 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
809 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
810 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
812 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
813 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
815 if(IS_INTER(left_type[1])){
816 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
817 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
818 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
820 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
821 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
823 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
824 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
825 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
826 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
827 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
829 if(h->slice_type == B_TYPE){
830 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
832 if(IS_DIRECT(top_type)){
833 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
834 }else if(IS_8X8(top_type)){
835 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
836 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
837 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
839 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
843 if(IS_DIRECT(left_type[0])){
844 h->direct_cache[scan8[0] - 1 + 0*8]=
845 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
846 }else if(IS_8X8(left_type[0])){
847 int b8_xy = h->mb2b8_xy[left_xy[0]] + 1;
848 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[b8_xy];
849 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[b8_xy + h->b8_stride];
851 h->direct_cache[scan8[0] - 1 + 0*8]=
852 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
860 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
863 static inline void write_back_intra_pred_mode(H264Context *h){
864 MpegEncContext * const s = &h->s;
865 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
867 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
868 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
869 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
870 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
871 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
872 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
873 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
877 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
879 static inline int check_intra4x4_pred_mode(H264Context *h){
880 MpegEncContext * const s = &h->s;
881 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
882 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
885 if(!(h->top_samples_available&0x8000)){
887 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
889 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
892 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
897 if(!(h->left_samples_available&0x8000)){
899 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
901 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
904 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
910 } //FIXME cleanup like next
913 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
915 static inline int check_intra_pred_mode(H264Context *h, int mode){
916 MpegEncContext * const s = &h->s;
917 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
918 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
920 if(mode < 0 || mode > 6) {
921 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
925 if(!(h->top_samples_available&0x8000)){
928 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
933 if(!(h->left_samples_available&0x8000)){
936 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
945 * gets the predicted intra4x4 prediction mode.
947 static inline int pred_intra_mode(H264Context *h, int n){
948 const int index8= scan8[n];
949 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
950 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
951 const int min= FFMIN(left, top);
953 tprintf("mode:%d %d min:%d\n", left ,top, min);
955 if(min<0) return DC_PRED;
959 static inline void write_back_non_zero_count(H264Context *h){
960 MpegEncContext * const s = &h->s;
961 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
963 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
964 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
965 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
966 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
967 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
968 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
969 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
971 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
972 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
973 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
975 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
976 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
977 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
981 * gets the predicted number of non zero coefficients.
982 * @param n block index
984 static inline int pred_non_zero_count(H264Context *h, int n){
985 const int index8= scan8[n];
986 const int left= h->non_zero_count_cache[index8 - 1];
987 const int top = h->non_zero_count_cache[index8 - 8];
990 if(i<64) i= (i+1)>>1;
992 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
997 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
998 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1000 if(topright_ref != PART_NOT_AVAILABLE){
1001 *C= h->mv_cache[list][ i - 8 + part_width ];
1002 return topright_ref;
1004 tprintf("topright MV not available\n");
1006 *C= h->mv_cache[list][ i - 8 - 1 ];
1007 return h->ref_cache[list][ i - 8 - 1 ];
1012 * gets the predicted MV.
1013 * @param n the block index
1014 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1015 * @param mx the x component of the predicted motion vector
1016 * @param my the y component of the predicted motion vector
1018 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1019 const int index8= scan8[n];
1020 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1021 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1022 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1023 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1025 int diagonal_ref, match_count;
1027 assert(part_width==1 || part_width==2 || part_width==4);
1037 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1038 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1039 tprintf("pred_motion match_count=%d\n", match_count);
1040 if(match_count > 1){ //most common
1041 *mx= mid_pred(A[0], B[0], C[0]);
1042 *my= mid_pred(A[1], B[1], C[1]);
1043 }else if(match_count==1){
1047 }else if(top_ref==ref){
1055 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1059 *mx= mid_pred(A[0], B[0], C[0]);
1060 *my= mid_pred(A[1], B[1], C[1]);
1064 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1068 * gets the directionally predicted 16x8 MV.
1069 * @param n the block index
1070 * @param mx the x component of the predicted motion vector
1071 * @param my the y component of the predicted motion vector
1073 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1075 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1076 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1078 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1086 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1087 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1089 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1091 if(left_ref == ref){
1099 pred_motion(h, n, 4, list, ref, mx, my);
1103 * gets the directionally predicted 8x16 MV.
1104 * @param n the block index
1105 * @param mx the x component of the predicted motion vector
1106 * @param my the y component of the predicted motion vector
1108 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1110 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1111 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1113 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1115 if(left_ref == ref){
1124 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1126 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1128 if(diagonal_ref == ref){
1136 pred_motion(h, n, 2, list, ref, mx, my);
1139 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1140 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1141 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1143 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1145 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1146 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1147 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1153 pred_motion(h, 0, 4, 0, 0, mx, my);
1158 static inline void direct_dist_scale_factor(H264Context * const h){
1159 const int poc = h->s.current_picture_ptr->poc;
1160 const int poc1 = h->ref_list[1][0].poc;
1162 for(i=0; i<h->ref_count[0]; i++){
1163 int poc0 = h->ref_list[0][i].poc;
1164 int td = clip(poc1 - poc0, -128, 127);
1165 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1166 h->dist_scale_factor[i] = 256;
1168 int tb = clip(poc - poc0, -128, 127);
1169 int tx = (16384 + (ABS(td) >> 1)) / td;
1170 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1174 static inline void direct_ref_list_init(H264Context * const h){
1175 MpegEncContext * const s = &h->s;
1176 Picture * const ref1 = &h->ref_list[1][0];
1177 Picture * const cur = s->current_picture_ptr;
1179 if(cur->pict_type == I_TYPE)
1180 cur->ref_count[0] = 0;
1181 if(cur->pict_type != B_TYPE)
1182 cur->ref_count[1] = 0;
1183 for(list=0; list<2; list++){
1184 cur->ref_count[list] = h->ref_count[list];
1185 for(j=0; j<h->ref_count[list]; j++)
1186 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1188 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1190 for(list=0; list<2; list++){
1191 for(i=0; i<ref1->ref_count[list]; i++){
1192 const int poc = ref1->ref_poc[list][i];
1193 h->map_col_to_list0[list][i] = PART_NOT_AVAILABLE;
1194 for(j=0; j<h->ref_count[list]; j++)
1195 if(h->ref_list[list][j].poc == poc){
1196 h->map_col_to_list0[list][i] = j;
1203 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1204 MpegEncContext * const s = &h->s;
1205 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1206 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1207 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1208 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1209 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1210 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1211 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1212 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1213 const int is_b8x8 = IS_8X8(*mb_type);
1217 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1218 /* FIXME save sub mb types from previous frames (or derive from MVs)
1219 * so we know exactly what block size to use */
1220 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1221 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1222 }else if(!is_b8x8 && (IS_16X16(mb_type_col) || IS_INTRA(mb_type_col))){
1223 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1224 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1226 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1227 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1230 *mb_type |= MB_TYPE_DIRECT2;
1232 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1234 if(h->direct_spatial_mv_pred){
1239 /* ref = min(neighbors) */
1240 for(list=0; list<2; list++){
1241 int refa = h->ref_cache[list][scan8[0] - 1];
1242 int refb = h->ref_cache[list][scan8[0] - 8];
1243 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1245 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1247 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1249 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1255 if(ref[0] < 0 && ref[1] < 0){
1256 ref[0] = ref[1] = 0;
1257 mv[0][0] = mv[0][1] =
1258 mv[1][0] = mv[1][1] = 0;
1260 for(list=0; list<2; list++){
1262 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1264 mv[list][0] = mv[list][1] = 0;
1269 *mb_type &= ~MB_TYPE_P0L1;
1270 sub_mb_type &= ~MB_TYPE_P0L1;
1271 }else if(ref[0] < 0){
1272 *mb_type &= ~MB_TYPE_P0L0;
1273 sub_mb_type &= ~MB_TYPE_P0L0;
1276 if(IS_16X16(*mb_type)){
1277 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref[0], 1);
1278 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, ref[1], 1);
1279 if(!IS_INTRA(mb_type_col)
1280 && ( (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1)
1281 || (l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1
1282 && (h->x264_build>33 || !h->x264_build)))){
1284 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1286 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1288 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1290 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1292 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1293 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1296 for(i8=0; i8<4; i8++){
1297 const int x8 = i8&1;
1298 const int y8 = i8>>1;
1300 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1302 h->sub_mb_type[i8] = sub_mb_type;
1304 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1305 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1306 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref[0], 1);
1307 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, ref[1], 1);
1310 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1311 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1312 && (h->x264_build>33 || !h->x264_build)))){
1313 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1314 for(i4=0; i4<4; i4++){
1315 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1316 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1318 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1320 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1326 }else{ /* direct temporal mv pred */
1327 if(IS_16X16(*mb_type)){
1328 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1329 if(IS_INTRA(mb_type_col)){
1330 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1331 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1332 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1334 const int ref0 = l1ref0[0] >= 0 ? h->map_col_to_list0[0][l1ref0[0]]
1335 : h->map_col_to_list0[1][l1ref1[0]];
1336 const int dist_scale_factor = h->dist_scale_factor[ref0];
1337 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1339 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1340 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1341 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1342 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1343 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1346 for(i8=0; i8<4; i8++){
1347 const int x8 = i8&1;
1348 const int y8 = i8>>1;
1349 int ref0, dist_scale_factor;
1350 const int16_t (*l1mv)[2]= l1mv0;
1352 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1354 h->sub_mb_type[i8] = sub_mb_type;
1355 if(IS_INTRA(mb_type_col)){
1356 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1357 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1358 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1359 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1363 ref0 = l1ref0[x8 + y8*h->b8_stride];
1365 ref0 = h->map_col_to_list0[0][ref0];
1367 ref0 = h->map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1370 dist_scale_factor = h->dist_scale_factor[ref0];
1372 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1373 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1374 for(i4=0; i4<4; i4++){
1375 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1376 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1377 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1378 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1379 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1380 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1387 static inline void write_back_motion(H264Context *h, int mb_type){
1388 MpegEncContext * const s = &h->s;
1389 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1390 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1393 for(list=0; list<2; list++){
1395 if(!USES_LIST(mb_type, list)){
1396 if(1){ //FIXME skip or never read if mb_type doesn't use it
1398 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
1399 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
1401 if( h->pps.cabac ) {
1402 /* FIXME needed ? */
1404 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]=
1405 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= 0;
1409 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]=
1410 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= LIST_NOT_USED;
1417 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1418 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1420 if( h->pps.cabac ) {
1422 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1423 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1427 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
1428 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
1432 if(h->slice_type == B_TYPE && h->pps.cabac){
1433 if(IS_8X8(mb_type)){
1434 h->direct_table[b8_xy+1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1435 h->direct_table[b8_xy+0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1436 h->direct_table[b8_xy+1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1442 * Decodes a network abstraction layer unit.
1443 * @param consumed is the number of bytes used as input
1444 * @param length is the length of the array
1445 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1446 * @returns decoded bytes, might be src+1 if no escapes
1448 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1452 // src[0]&0x80; //forbidden bit
1453 h->nal_ref_idc= src[0]>>5;
1454 h->nal_unit_type= src[0]&0x1F;
1458 for(i=0; i<length; i++)
1459 printf("%2X ", src[i]);
1461 for(i=0; i+1<length; i+=2){
1462 if(src[i]) continue;
1463 if(i>0 && src[i-1]==0) i--;
1464 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1466 /* startcode, so we must be past the end */
1473 if(i>=length-1){ //no escaped 0
1474 *dst_length= length;
1475 *consumed= length+1; //+1 for the header
1479 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1480 dst= h->rbsp_buffer;
1482 //printf("decoding esc\n");
1485 //remove escapes (very rare 1:2^22)
1486 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1487 if(src[si+2]==3){ //escape
1492 }else //next start code
1496 dst[di++]= src[si++];
1500 *consumed= si + 1;//+1 for the header
1501 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1507 * @param src the data which should be escaped
1508 * @param dst the target buffer, dst+1 == src is allowed as a special case
1509 * @param length the length of the src data
1510 * @param dst_length the length of the dst array
1511 * @returns length of escaped data in bytes or -1 if an error occured
1513 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1514 int i, escape_count, si, di;
1518 assert(dst_length>0);
1520 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1522 if(length==0) return 1;
1525 for(i=0; i<length; i+=2){
1526 if(src[i]) continue;
1527 if(i>0 && src[i-1]==0)
1529 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1535 if(escape_count==0){
1537 memcpy(dst+1, src, length);
1541 if(length + escape_count + 1> dst_length)
1544 //this should be damn rare (hopefully)
1546 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1547 temp= h->rbsp_buffer;
1548 //printf("encoding esc\n");
1553 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1554 temp[di++]= 0; si++;
1555 temp[di++]= 0; si++;
1557 temp[di++]= src[si++];
1560 temp[di++]= src[si++];
1562 memcpy(dst+1, temp, length+escape_count);
1564 assert(di == length+escape_count);
1570 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1572 static void encode_rbsp_trailing(PutBitContext *pb){
1575 length= (-put_bits_count(pb))&7;
1576 if(length) put_bits(pb, length, 0);
1581 * identifies the exact end of the bitstream
1582 * @return the length of the trailing, or 0 if damaged
1584 static int decode_rbsp_trailing(uint8_t *src){
1588 tprintf("rbsp trailing %X\n", v);
1598 * idct tranforms the 16 dc values and dequantize them.
1599 * @param qp quantization parameter
1601 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
1602 const int qmul= dequant_coeff[qp][0];
1605 int temp[16]; //FIXME check if this is a good idea
1606 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1607 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1609 //memset(block, 64, 2*256);
1612 const int offset= y_offset[i];
1613 const int z0= block[offset+stride*0] + block[offset+stride*4];
1614 const int z1= block[offset+stride*0] - block[offset+stride*4];
1615 const int z2= block[offset+stride*1] - block[offset+stride*5];
1616 const int z3= block[offset+stride*1] + block[offset+stride*5];
1625 const int offset= x_offset[i];
1626 const int z0= temp[4*0+i] + temp[4*2+i];
1627 const int z1= temp[4*0+i] - temp[4*2+i];
1628 const int z2= temp[4*1+i] - temp[4*3+i];
1629 const int z3= temp[4*1+i] + temp[4*3+i];
1631 block[stride*0 +offset]= ((z0 + z3)*qmul + 2)>>2; //FIXME think about merging this into decode_resdual
1632 block[stride*2 +offset]= ((z1 + z2)*qmul + 2)>>2;
1633 block[stride*8 +offset]= ((z1 - z2)*qmul + 2)>>2;
1634 block[stride*10+offset]= ((z0 - z3)*qmul + 2)>>2;
1640 * dct tranforms the 16 dc values.
1641 * @param qp quantization parameter ??? FIXME
1643 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1644 // const int qmul= dequant_coeff[qp][0];
1646 int temp[16]; //FIXME check if this is a good idea
1647 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1648 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1651 const int offset= y_offset[i];
1652 const int z0= block[offset+stride*0] + block[offset+stride*4];
1653 const int z1= block[offset+stride*0] - block[offset+stride*4];
1654 const int z2= block[offset+stride*1] - block[offset+stride*5];
1655 const int z3= block[offset+stride*1] + block[offset+stride*5];
1664 const int offset= x_offset[i];
1665 const int z0= temp[4*0+i] + temp[4*2+i];
1666 const int z1= temp[4*0+i] - temp[4*2+i];
1667 const int z2= temp[4*1+i] - temp[4*3+i];
1668 const int z3= temp[4*1+i] + temp[4*3+i];
1670 block[stride*0 +offset]= (z0 + z3)>>1;
1671 block[stride*2 +offset]= (z1 + z2)>>1;
1672 block[stride*8 +offset]= (z1 - z2)>>1;
1673 block[stride*10+offset]= (z0 - z3)>>1;
1681 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp){
1682 const int qmul= dequant_coeff[qp][0];
1683 const int stride= 16*2;
1684 const int xStride= 16;
1687 a= block[stride*0 + xStride*0];
1688 b= block[stride*0 + xStride*1];
1689 c= block[stride*1 + xStride*0];
1690 d= block[stride*1 + xStride*1];
1697 block[stride*0 + xStride*0]= ((a+c)*qmul + 0)>>1;
1698 block[stride*0 + xStride*1]= ((e+b)*qmul + 0)>>1;
1699 block[stride*1 + xStride*0]= ((a-c)*qmul + 0)>>1;
1700 block[stride*1 + xStride*1]= ((e-b)*qmul + 0)>>1;
1704 static void chroma_dc_dct_c(DCTELEM *block){
1705 const int stride= 16*2;
1706 const int xStride= 16;
1709 a= block[stride*0 + xStride*0];
1710 b= block[stride*0 + xStride*1];
1711 c= block[stride*1 + xStride*0];
1712 d= block[stride*1 + xStride*1];
1719 block[stride*0 + xStride*0]= (a+c);
1720 block[stride*0 + xStride*1]= (e+b);
1721 block[stride*1 + xStride*0]= (a-c);
1722 block[stride*1 + xStride*1]= (e-b);
1727 * gets the chroma qp.
1729 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1731 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
1736 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1738 //FIXME try int temp instead of block
1741 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1742 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1743 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1744 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1745 const int z0= d0 + d3;
1746 const int z3= d0 - d3;
1747 const int z1= d1 + d2;
1748 const int z2= d1 - d2;
1750 block[0 + 4*i]= z0 + z1;
1751 block[1 + 4*i]= 2*z3 + z2;
1752 block[2 + 4*i]= z0 - z1;
1753 block[3 + 4*i]= z3 - 2*z2;
1757 const int z0= block[0*4 + i] + block[3*4 + i];
1758 const int z3= block[0*4 + i] - block[3*4 + i];
1759 const int z1= block[1*4 + i] + block[2*4 + i];
1760 const int z2= block[1*4 + i] - block[2*4 + i];
1762 block[0*4 + i]= z0 + z1;
1763 block[1*4 + i]= 2*z3 + z2;
1764 block[2*4 + i]= z0 - z1;
1765 block[3*4 + i]= z3 - 2*z2;
1770 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1771 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1772 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1774 const int * const quant_table= quant_coeff[qscale];
1775 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1776 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1777 const unsigned int threshold2= (threshold1<<1);
1783 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1784 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1785 const unsigned int dc_threshold2= (dc_threshold1<<1);
1787 int level= block[0]*quant_coeff[qscale+18][0];
1788 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1790 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1793 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1796 // last_non_zero = i;
1801 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1802 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1803 const unsigned int dc_threshold2= (dc_threshold1<<1);
1805 int level= block[0]*quant_table[0];
1806 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1808 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1811 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1814 // last_non_zero = i;
1827 const int j= scantable[i];
1828 int level= block[j]*quant_table[j];
1830 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1831 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1832 if(((unsigned)(level+threshold1))>threshold2){
1834 level= (bias + level)>>QUANT_SHIFT;
1837 level= (bias - level)>>QUANT_SHIFT;
1846 return last_non_zero;
1849 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1850 const uint32_t a= ((uint32_t*)(src-stride))[0];
1851 ((uint32_t*)(src+0*stride))[0]= a;
1852 ((uint32_t*)(src+1*stride))[0]= a;
1853 ((uint32_t*)(src+2*stride))[0]= a;
1854 ((uint32_t*)(src+3*stride))[0]= a;
1857 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1858 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1859 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1860 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1861 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1864 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1865 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1866 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1868 ((uint32_t*)(src+0*stride))[0]=
1869 ((uint32_t*)(src+1*stride))[0]=
1870 ((uint32_t*)(src+2*stride))[0]=
1871 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1874 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1875 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1877 ((uint32_t*)(src+0*stride))[0]=
1878 ((uint32_t*)(src+1*stride))[0]=
1879 ((uint32_t*)(src+2*stride))[0]=
1880 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1883 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1884 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1886 ((uint32_t*)(src+0*stride))[0]=
1887 ((uint32_t*)(src+1*stride))[0]=
1888 ((uint32_t*)(src+2*stride))[0]=
1889 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1892 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1893 ((uint32_t*)(src+0*stride))[0]=
1894 ((uint32_t*)(src+1*stride))[0]=
1895 ((uint32_t*)(src+2*stride))[0]=
1896 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1900 #define LOAD_TOP_RIGHT_EDGE\
1901 const int t4= topright[0];\
1902 const int t5= topright[1];\
1903 const int t6= topright[2];\
1904 const int t7= topright[3];\
1906 #define LOAD_LEFT_EDGE\
1907 const int l0= src[-1+0*stride];\
1908 const int l1= src[-1+1*stride];\
1909 const int l2= src[-1+2*stride];\
1910 const int l3= src[-1+3*stride];\
1912 #define LOAD_TOP_EDGE\
1913 const int t0= src[ 0-1*stride];\
1914 const int t1= src[ 1-1*stride];\
1915 const int t2= src[ 2-1*stride];\
1916 const int t3= src[ 3-1*stride];\
1918 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1919 const int lt= src[-1-1*stride];
1923 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
1925 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
1928 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
1932 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1935 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1937 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1938 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1941 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1946 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1948 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1951 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1955 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1958 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1960 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1961 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1964 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1965 const int lt= src[-1-1*stride];
1968 const __attribute__((unused)) int unu= l3;
1971 src[1+2*stride]=(lt + t0 + 1)>>1;
1973 src[2+2*stride]=(t0 + t1 + 1)>>1;
1975 src[3+2*stride]=(t1 + t2 + 1)>>1;
1976 src[3+0*stride]=(t2 + t3 + 1)>>1;
1978 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1980 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
1982 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1983 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1984 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1985 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1988 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
1991 const __attribute__((unused)) int unu= t7;
1993 src[0+0*stride]=(t0 + t1 + 1)>>1;
1995 src[0+2*stride]=(t1 + t2 + 1)>>1;
1997 src[1+2*stride]=(t2 + t3 + 1)>>1;
1999 src[2+2*stride]=(t3 + t4+ 1)>>1;
2000 src[3+2*stride]=(t4 + t5+ 1)>>1;
2001 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2003 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2005 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2007 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2008 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2011 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2014 src[0+0*stride]=(l0 + l1 + 1)>>1;
2015 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2017 src[0+1*stride]=(l1 + l2 + 1)>>1;
2019 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2021 src[0+2*stride]=(l2 + l3 + 1)>>1;
2023 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2032 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2033 const int lt= src[-1-1*stride];
2036 const __attribute__((unused)) int unu= t3;
2039 src[2+1*stride]=(lt + l0 + 1)>>1;
2041 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2042 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2043 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2045 src[2+2*stride]=(l0 + l1 + 1)>>1;
2047 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2049 src[2+3*stride]=(l1 + l2+ 1)>>1;
2051 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2052 src[0+3*stride]=(l2 + l3 + 1)>>1;
2053 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2056 static void pred16x16_vertical_c(uint8_t *src, int stride){
2058 const uint32_t a= ((uint32_t*)(src-stride))[0];
2059 const uint32_t b= ((uint32_t*)(src-stride))[1];
2060 const uint32_t c= ((uint32_t*)(src-stride))[2];
2061 const uint32_t d= ((uint32_t*)(src-stride))[3];
2063 for(i=0; i<16; i++){
2064 ((uint32_t*)(src+i*stride))[0]= a;
2065 ((uint32_t*)(src+i*stride))[1]= b;
2066 ((uint32_t*)(src+i*stride))[2]= c;
2067 ((uint32_t*)(src+i*stride))[3]= d;
2071 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2074 for(i=0; i<16; i++){
2075 ((uint32_t*)(src+i*stride))[0]=
2076 ((uint32_t*)(src+i*stride))[1]=
2077 ((uint32_t*)(src+i*stride))[2]=
2078 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2082 static void pred16x16_dc_c(uint8_t *src, int stride){
2086 dc+= src[-1+i*stride];
2093 dc= 0x01010101*((dc + 16)>>5);
2095 for(i=0; i<16; i++){
2096 ((uint32_t*)(src+i*stride))[0]=
2097 ((uint32_t*)(src+i*stride))[1]=
2098 ((uint32_t*)(src+i*stride))[2]=
2099 ((uint32_t*)(src+i*stride))[3]= dc;
2103 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2107 dc+= src[-1+i*stride];
2110 dc= 0x01010101*((dc + 8)>>4);
2112 for(i=0; i<16; i++){
2113 ((uint32_t*)(src+i*stride))[0]=
2114 ((uint32_t*)(src+i*stride))[1]=
2115 ((uint32_t*)(src+i*stride))[2]=
2116 ((uint32_t*)(src+i*stride))[3]= dc;
2120 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2126 dc= 0x01010101*((dc + 8)>>4);
2128 for(i=0; i<16; i++){
2129 ((uint32_t*)(src+i*stride))[0]=
2130 ((uint32_t*)(src+i*stride))[1]=
2131 ((uint32_t*)(src+i*stride))[2]=
2132 ((uint32_t*)(src+i*stride))[3]= dc;
2136 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2139 for(i=0; i<16; i++){
2140 ((uint32_t*)(src+i*stride))[0]=
2141 ((uint32_t*)(src+i*stride))[1]=
2142 ((uint32_t*)(src+i*stride))[2]=
2143 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2147 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2150 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2151 const uint8_t * const src0 = src+7-stride;
2152 const uint8_t *src1 = src+8*stride-1;
2153 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2154 int H = src0[1] - src0[-1];
2155 int V = src1[0] - src2[ 0];
2156 for(k=2; k<=8; ++k) {
2157 src1 += stride; src2 -= stride;
2158 H += k*(src0[k] - src0[-k]);
2159 V += k*(src1[0] - src2[ 0]);
2162 H = ( 5*(H/4) ) / 16;
2163 V = ( 5*(V/4) ) / 16;
2165 /* required for 100% accuracy */
2166 i = H; H = V; V = i;
2168 H = ( 5*H+32 ) >> 6;
2169 V = ( 5*V+32 ) >> 6;
2172 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2173 for(j=16; j>0; --j) {
2176 for(i=-16; i<0; i+=4) {
2177 src[16+i] = cm[ (b ) >> 5 ];
2178 src[17+i] = cm[ (b+ H) >> 5 ];
2179 src[18+i] = cm[ (b+2*H) >> 5 ];
2180 src[19+i] = cm[ (b+3*H) >> 5 ];
2187 static void pred16x16_plane_c(uint8_t *src, int stride){
2188 pred16x16_plane_compat_c(src, stride, 0);
2191 static void pred8x8_vertical_c(uint8_t *src, int stride){
2193 const uint32_t a= ((uint32_t*)(src-stride))[0];
2194 const uint32_t b= ((uint32_t*)(src-stride))[1];
2197 ((uint32_t*)(src+i*stride))[0]= a;
2198 ((uint32_t*)(src+i*stride))[1]= b;
2202 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2206 ((uint32_t*)(src+i*stride))[0]=
2207 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2211 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2215 ((uint32_t*)(src+i*stride))[0]=
2216 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2220 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2226 dc0+= src[-1+i*stride];
2227 dc2+= src[-1+(i+4)*stride];
2229 dc0= 0x01010101*((dc0 + 2)>>2);
2230 dc2= 0x01010101*((dc2 + 2)>>2);
2233 ((uint32_t*)(src+i*stride))[0]=
2234 ((uint32_t*)(src+i*stride))[1]= dc0;
2237 ((uint32_t*)(src+i*stride))[0]=
2238 ((uint32_t*)(src+i*stride))[1]= dc2;
2242 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2248 dc0+= src[i-stride];
2249 dc1+= src[4+i-stride];
2251 dc0= 0x01010101*((dc0 + 2)>>2);
2252 dc1= 0x01010101*((dc1 + 2)>>2);
2255 ((uint32_t*)(src+i*stride))[0]= dc0;
2256 ((uint32_t*)(src+i*stride))[1]= dc1;
2259 ((uint32_t*)(src+i*stride))[0]= dc0;
2260 ((uint32_t*)(src+i*stride))[1]= dc1;
2265 static void pred8x8_dc_c(uint8_t *src, int stride){
2267 int dc0, dc1, dc2, dc3;
2271 dc0+= src[-1+i*stride] + src[i-stride];
2272 dc1+= src[4+i-stride];
2273 dc2+= src[-1+(i+4)*stride];
2275 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2276 dc0= 0x01010101*((dc0 + 4)>>3);
2277 dc1= 0x01010101*((dc1 + 2)>>2);
2278 dc2= 0x01010101*((dc2 + 2)>>2);
2281 ((uint32_t*)(src+i*stride))[0]= dc0;
2282 ((uint32_t*)(src+i*stride))[1]= dc1;
2285 ((uint32_t*)(src+i*stride))[0]= dc2;
2286 ((uint32_t*)(src+i*stride))[1]= dc3;
2290 static void pred8x8_plane_c(uint8_t *src, int stride){
2293 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2294 const uint8_t * const src0 = src+3-stride;
2295 const uint8_t *src1 = src+4*stride-1;
2296 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2297 int H = src0[1] - src0[-1];
2298 int V = src1[0] - src2[ 0];
2299 for(k=2; k<=4; ++k) {
2300 src1 += stride; src2 -= stride;
2301 H += k*(src0[k] - src0[-k]);
2302 V += k*(src1[0] - src2[ 0]);
2304 H = ( 17*H+16 ) >> 5;
2305 V = ( 17*V+16 ) >> 5;
2307 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2308 for(j=8; j>0; --j) {
2311 src[0] = cm[ (b ) >> 5 ];
2312 src[1] = cm[ (b+ H) >> 5 ];
2313 src[2] = cm[ (b+2*H) >> 5 ];
2314 src[3] = cm[ (b+3*H) >> 5 ];
2315 src[4] = cm[ (b+4*H) >> 5 ];
2316 src[5] = cm[ (b+5*H) >> 5 ];
2317 src[6] = cm[ (b+6*H) >> 5 ];
2318 src[7] = cm[ (b+7*H) >> 5 ];
2323 #define SRC(x,y) src[(x)+(y)*stride]
2325 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2326 #define PREDICT_8x8_LOAD_LEFT \
2327 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2328 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2329 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2330 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2333 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2334 #define PREDICT_8x8_LOAD_TOP \
2335 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2336 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2337 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2338 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2339 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2342 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2343 #define PREDICT_8x8_LOAD_TOPRIGHT \
2344 int t8, t9, t10, t11, t12, t13, t14, t15; \
2345 if(has_topright) { \
2346 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2347 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2348 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2350 #define PREDICT_8x8_LOAD_TOPLEFT \
2351 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2353 #define PREDICT_8x8_DC(v) \
2355 for( y = 0; y < 8; y++ ) { \
2356 ((uint32_t*)src)[0] = \
2357 ((uint32_t*)src)[1] = v; \
2361 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2363 PREDICT_8x8_DC(0x80808080);
2365 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2367 PREDICT_8x8_LOAD_LEFT;
2368 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2371 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2373 PREDICT_8x8_LOAD_TOP;
2374 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2377 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2379 PREDICT_8x8_LOAD_LEFT;
2380 PREDICT_8x8_LOAD_TOP;
2381 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2382 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2385 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2387 PREDICT_8x8_LOAD_LEFT;
2388 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2389 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2390 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2393 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2396 PREDICT_8x8_LOAD_TOP;
2405 for( y = 1; y < 8; y++ )
2406 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2408 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2410 PREDICT_8x8_LOAD_TOP;
2411 PREDICT_8x8_LOAD_TOPRIGHT;
2412 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2413 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2414 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2415 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2416 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2417 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2418 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2419 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2420 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2421 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2422 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2423 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2424 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2425 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2426 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2428 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2430 PREDICT_8x8_LOAD_TOP;
2431 PREDICT_8x8_LOAD_LEFT;
2432 PREDICT_8x8_LOAD_TOPLEFT;
2433 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2434 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2435 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2436 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2437 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2438 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2439 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2440 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2441 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2442 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2443 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2444 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2445 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2446 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2447 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2450 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2452 PREDICT_8x8_LOAD_TOP;
2453 PREDICT_8x8_LOAD_LEFT;
2454 PREDICT_8x8_LOAD_TOPLEFT;
2455 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2456 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2457 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2458 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2459 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2460 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2461 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2462 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2463 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2464 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2465 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2466 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2467 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2468 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2469 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2470 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2471 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2472 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2473 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2474 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2475 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2476 SRC(7,0)= (t6 + t7 + 1) >> 1;
2478 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2480 PREDICT_8x8_LOAD_TOP;
2481 PREDICT_8x8_LOAD_LEFT;
2482 PREDICT_8x8_LOAD_TOPLEFT;
2483 SRC(0,7)= (l6 + l7 + 1) >> 1;
2484 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2485 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2486 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2487 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2488 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2489 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2490 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2491 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2492 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2493 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2494 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2495 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2496 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2497 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2498 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2499 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2500 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2501 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2502 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2503 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2504 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2506 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2508 PREDICT_8x8_LOAD_TOP;
2509 PREDICT_8x8_LOAD_TOPRIGHT;
2510 SRC(0,0)= (t0 + t1 + 1) >> 1;
2511 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2512 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2513 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2514 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2515 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2516 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2517 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2518 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2519 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2520 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2521 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2522 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2523 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2524 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2525 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2526 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2527 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2528 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2529 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2530 SRC(7,6)= (t10 + t11 + 1) >> 1;
2531 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2533 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2535 PREDICT_8x8_LOAD_LEFT;
2536 SRC(0,0)= (l0 + l1 + 1) >> 1;
2537 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2538 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2539 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2540 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2541 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2542 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2543 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2544 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2545 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2546 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2547 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2548 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2549 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2550 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2551 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2552 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2553 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2555 #undef PREDICT_8x8_LOAD_LEFT
2556 #undef PREDICT_8x8_LOAD_TOP
2557 #undef PREDICT_8x8_LOAD_TOPLEFT
2558 #undef PREDICT_8x8_LOAD_TOPRIGHT
2559 #undef PREDICT_8x8_DC
2565 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2566 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2567 int src_x_offset, int src_y_offset,
2568 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2569 MpegEncContext * const s = &h->s;
2570 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2571 const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2572 const int luma_xy= (mx&3) + ((my&3)<<2);
2573 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
2574 uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
2575 uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
2576 int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
2577 int extra_height= extra_width;
2579 const int full_mx= mx>>2;
2580 const int full_my= my>>2;
2581 const int pic_width = 16*s->mb_width;
2582 const int pic_height = 16*s->mb_height;
2584 assert(pic->data[0]);
2586 if(mx&7) extra_width -= 3;
2587 if(my&7) extra_height -= 3;
2589 if( full_mx < 0-extra_width
2590 || full_my < 0-extra_height
2591 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2592 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2593 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2594 src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
2598 qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
2600 qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
2603 if(s->flags&CODEC_FLAG_GRAY) return;
2606 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2607 src_cb= s->edge_emu_buffer;
2609 chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
2612 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2613 src_cr= s->edge_emu_buffer;
2615 chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
2618 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2619 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2620 int x_offset, int y_offset,
2621 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2622 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2623 int list0, int list1){
2624 MpegEncContext * const s = &h->s;
2625 qpel_mc_func *qpix_op= qpix_put;
2626 h264_chroma_mc_func chroma_op= chroma_put;
2628 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2629 dest_cb += x_offset + y_offset*s->uvlinesize;
2630 dest_cr += x_offset + y_offset*s->uvlinesize;
2631 x_offset += 8*s->mb_x;
2632 y_offset += 8*s->mb_y;
2635 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2636 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2637 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2638 qpix_op, chroma_op);
2641 chroma_op= chroma_avg;
2645 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2646 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2647 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2648 qpix_op, chroma_op);
2652 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2653 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2654 int x_offset, int y_offset,
2655 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2656 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2657 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2658 int list0, int list1){
2659 MpegEncContext * const s = &h->s;
2661 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2662 dest_cb += x_offset + y_offset*s->uvlinesize;
2663 dest_cr += x_offset + y_offset*s->uvlinesize;
2664 x_offset += 8*s->mb_x;
2665 y_offset += 8*s->mb_y;
2668 /* don't optimize for luma-only case, since B-frames usually
2669 * use implicit weights => chroma too. */
2670 uint8_t *tmp_cb = s->obmc_scratchpad;
2671 uint8_t *tmp_cr = tmp_cb + 8*s->uvlinesize;
2672 uint8_t *tmp_y = tmp_cr + 8*s->uvlinesize;
2673 int refn0 = h->ref_cache[0][ scan8[n] ];
2674 int refn1 = h->ref_cache[1][ scan8[n] ];
2676 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2677 dest_y, dest_cb, dest_cr,
2678 x_offset, y_offset, qpix_put, chroma_put);
2679 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2680 tmp_y, tmp_cb, tmp_cr,
2681 x_offset, y_offset, qpix_put, chroma_put);
2683 if(h->use_weight == 2){
2684 int weight0 = h->implicit_weight[refn0][refn1];
2685 int weight1 = 64 - weight0;
2686 luma_weight_avg( dest_y, tmp_y, s-> linesize, 5, weight0, weight1, 0, 0);
2687 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0, 0);
2688 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0, 0);
2690 luma_weight_avg(dest_y, tmp_y, s->linesize, h->luma_log2_weight_denom,
2691 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2692 h->luma_offset[0][refn0], h->luma_offset[1][refn1]);
2693 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2694 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2695 h->chroma_offset[0][refn0][0], h->chroma_offset[1][refn1][0]);
2696 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2697 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2698 h->chroma_offset[0][refn0][1], h->chroma_offset[1][refn1][1]);
2701 int list = list1 ? 1 : 0;
2702 int refn = h->ref_cache[list][ scan8[n] ];
2703 Picture *ref= &h->ref_list[list][refn];
2704 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2705 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2706 qpix_put, chroma_put);
2708 luma_weight_op(dest_y, s->linesize, h->luma_log2_weight_denom,
2709 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2710 if(h->use_weight_chroma){
2711 chroma_weight_op(dest_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2712 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2713 chroma_weight_op(dest_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2714 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2719 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2720 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2721 int x_offset, int y_offset,
2722 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2723 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2724 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2725 int list0, int list1){
2726 if((h->use_weight==2 && list0 && list1
2727 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2728 || h->use_weight==1)
2729 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2730 x_offset, y_offset, qpix_put, chroma_put,
2731 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2733 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2734 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2737 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2738 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2739 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2740 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2741 MpegEncContext * const s = &h->s;
2742 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2743 const int mb_type= s->current_picture.mb_type[mb_xy];
2745 assert(IS_INTER(mb_type));
2747 if(IS_16X16(mb_type)){
2748 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2749 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2750 &weight_op[0], &weight_avg[0],
2751 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2752 }else if(IS_16X8(mb_type)){
2753 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2754 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2755 &weight_op[1], &weight_avg[1],
2756 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2757 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2758 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2759 &weight_op[1], &weight_avg[1],
2760 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2761 }else if(IS_8X16(mb_type)){
2762 mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
2763 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2764 &weight_op[2], &weight_avg[2],
2765 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2766 mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
2767 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2768 &weight_op[2], &weight_avg[2],
2769 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2773 assert(IS_8X8(mb_type));
2776 const int sub_mb_type= h->sub_mb_type[i];
2778 int x_offset= (i&1)<<2;
2779 int y_offset= (i&2)<<1;
2781 if(IS_SUB_8X8(sub_mb_type)){
2782 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2783 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2784 &weight_op[3], &weight_avg[3],
2785 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2786 }else if(IS_SUB_8X4(sub_mb_type)){
2787 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2788 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2789 &weight_op[4], &weight_avg[4],
2790 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2791 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2792 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2793 &weight_op[4], &weight_avg[4],
2794 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2795 }else if(IS_SUB_4X8(sub_mb_type)){
2796 mc_part(h, n , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2797 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2798 &weight_op[5], &weight_avg[5],
2799 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2800 mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2801 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2802 &weight_op[5], &weight_avg[5],
2803 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2806 assert(IS_SUB_4X4(sub_mb_type));
2808 int sub_x_offset= x_offset + 2*(j&1);
2809 int sub_y_offset= y_offset + (j&2);
2810 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2811 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2812 &weight_op[6], &weight_avg[6],
2813 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2820 static void decode_init_vlc(H264Context *h){
2821 static int done = 0;
2827 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
2828 &chroma_dc_coeff_token_len [0], 1, 1,
2829 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
2832 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
2833 &coeff_token_len [i][0], 1, 1,
2834 &coeff_token_bits[i][0], 1, 1, 1);
2838 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2839 &chroma_dc_total_zeros_len [i][0], 1, 1,
2840 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
2842 for(i=0; i<15; i++){
2843 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
2844 &total_zeros_len [i][0], 1, 1,
2845 &total_zeros_bits[i][0], 1, 1, 1);
2849 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2850 &run_len [i][0], 1, 1,
2851 &run_bits[i][0], 1, 1, 1);
2853 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2854 &run_len [6][0], 1, 1,
2855 &run_bits[6][0], 1, 1, 1);
2860 * Sets the intra prediction function pointers.
2862 static void init_pred_ptrs(H264Context *h){
2863 // MpegEncContext * const s = &h->s;
2865 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
2866 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
2867 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
2868 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2869 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2870 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
2871 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
2872 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
2873 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
2874 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
2875 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
2876 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
2878 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
2879 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
2880 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
2881 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
2882 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
2883 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
2884 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
2885 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
2886 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
2887 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
2888 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
2889 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
2891 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
2892 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
2893 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
2894 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
2895 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
2896 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
2897 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
2899 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
2900 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
2901 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
2902 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
2903 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
2904 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
2905 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
2908 static void free_tables(H264Context *h){
2909 av_freep(&h->intra4x4_pred_mode);
2910 av_freep(&h->chroma_pred_mode_table);
2911 av_freep(&h->cbp_table);
2912 av_freep(&h->mvd_table[0]);
2913 av_freep(&h->mvd_table[1]);
2914 av_freep(&h->direct_table);
2915 av_freep(&h->non_zero_count);
2916 av_freep(&h->slice_table_base);
2917 av_freep(&h->top_borders[1]);
2918 av_freep(&h->top_borders[0]);
2919 h->slice_table= NULL;
2921 av_freep(&h->mb2b_xy);
2922 av_freep(&h->mb2b8_xy);
2924 av_freep(&h->dequant4_coeff);
2925 av_freep(&h->dequant8_coeff);
2927 av_freep(&h->s.obmc_scratchpad);
2932 * needs width/height
2934 static int alloc_tables(H264Context *h){
2935 MpegEncContext * const s = &h->s;
2936 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2939 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2941 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2942 CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t))
2943 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2944 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2945 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2947 if( h->pps.cabac ) {
2948 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2949 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2950 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2951 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2954 memset(h->slice_table_base, -1, big_mb_num * sizeof(uint8_t));
2955 h->slice_table= h->slice_table_base + s->mb_stride + 1;
2957 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2958 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2959 for(y=0; y<s->mb_height; y++){
2960 for(x=0; x<s->mb_width; x++){
2961 const int mb_xy= x + y*s->mb_stride;
2962 const int b_xy = 4*x + 4*y*h->b_stride;
2963 const int b8_xy= 2*x + 2*y*h->b8_stride;
2965 h->mb2b_xy [mb_xy]= b_xy;
2966 h->mb2b8_xy[mb_xy]= b8_xy;
2970 CHECKED_ALLOCZ(h->dequant4_coeff, 52*16 * sizeof(uint16_t));
2971 CHECKED_ALLOCZ(h->dequant8_coeff, 52*64 * sizeof(uint16_t));
2972 memcpy(h->dequant4_coeff, dequant_coeff, 52*16 * sizeof(uint16_t));
2973 for(q=0; q<52; q++){
2974 int shift = div6[q];
2976 if(shift >= 2) // qp<12 are shifted during dequant
2979 h->dequant8_coeff[q][x] = dequant8_coeff_init[idx][
2980 dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] << shift;
2982 if(h->sps.transform_bypass){
2984 h->dequant4_coeff[0][x] = 1;
2986 h->dequant8_coeff[0][x] = 1<<2;
2989 s->obmc_scratchpad = NULL;
2997 static void common_init(H264Context *h){
2998 MpegEncContext * const s = &h->s;
3000 s->width = s->avctx->width;
3001 s->height = s->avctx->height;
3002 s->codec_id= s->avctx->codec->id;
3006 s->unrestricted_mv=1;
3007 s->decode=1; //FIXME
3010 static int decode_init(AVCodecContext *avctx){
3011 H264Context *h= avctx->priv_data;
3012 MpegEncContext * const s = &h->s;
3014 MPV_decode_defaults(s);
3019 s->out_format = FMT_H264;
3020 s->workaround_bugs= avctx->workaround_bugs;
3023 // s->decode_mb= ff_h263_decode_mb;
3025 avctx->pix_fmt= PIX_FMT_YUV420P;
3029 if(avctx->extradata_size > 0 && avctx->extradata &&
3030 *(char *)avctx->extradata == 1){
3040 static void frame_start(H264Context *h){
3041 MpegEncContext * const s = &h->s;
3044 MPV_frame_start(s, s->avctx);
3045 ff_er_frame_start(s);
3047 assert(s->linesize && s->uvlinesize);
3049 for(i=0; i<16; i++){
3050 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3051 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3054 h->block_offset[16+i]=
3055 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3056 h->block_offset[24+16+i]=
3057 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3060 /* can't be in alloc_tables because linesize isn't known there.
3061 * FIXME: redo bipred weight to not require extra buffer? */
3062 if(!s->obmc_scratchpad)
3063 s->obmc_scratchpad = av_malloc(16*s->linesize + 2*8*s->uvlinesize);
3065 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3068 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3069 MpegEncContext * const s = &h->s;
3073 src_cb -= uvlinesize;
3074 src_cr -= uvlinesize;
3076 // There are two lines saved, the line above the the top macroblock of a pair,
3077 // and the line above the bottom macroblock
3078 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3079 for(i=1; i<17; i++){
3080 h->left_border[i]= src_y[15+i* linesize];
3083 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3084 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3086 if(!(s->flags&CODEC_FLAG_GRAY)){
3087 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3088 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3090 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3091 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3093 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3094 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3098 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3099 MpegEncContext * const s = &h->s;
3102 int deblock_left = (s->mb_x > 0);
3103 int deblock_top = (s->mb_y > 0);
3105 src_y -= linesize + 1;
3106 src_cb -= uvlinesize + 1;
3107 src_cr -= uvlinesize + 1;
3109 #define XCHG(a,b,t,xchg)\
3116 for(i = !deblock_top; i<17; i++){
3117 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3122 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3123 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3124 if(s->mb_x+1 < s->mb_width){
3125 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3129 if(!(s->flags&CODEC_FLAG_GRAY)){
3131 for(i = !deblock_top; i<9; i++){
3132 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3133 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3137 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3138 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3143 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3144 MpegEncContext * const s = &h->s;
3147 src_y -= 2 * linesize;
3148 src_cb -= 2 * uvlinesize;
3149 src_cr -= 2 * uvlinesize;
3151 // There are two lines saved, the line above the the top macroblock of a pair,
3152 // and the line above the bottom macroblock
3153 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3154 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3155 for(i=2; i<34; i++){
3156 h->left_border[i]= src_y[15+i* linesize];
3159 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3160 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3161 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3162 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3164 if(!(s->flags&CODEC_FLAG_GRAY)){
3165 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3166 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3167 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3168 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3169 for(i=2; i<18; i++){
3170 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3171 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3173 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3174 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3175 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3176 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3180 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3181 MpegEncContext * const s = &h->s;
3184 int deblock_left = (s->mb_x > 0);
3185 int deblock_top = (s->mb_y > 0);
3187 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3189 src_y -= 2 * linesize + 1;
3190 src_cb -= 2 * uvlinesize + 1;
3191 src_cr -= 2 * uvlinesize + 1;
3193 #define XCHG(a,b,t,xchg)\
3200 for(i = (!deblock_top)<<1; i<34; i++){
3201 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3206 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3207 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3208 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3209 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3212 if(!(s->flags&CODEC_FLAG_GRAY)){
3214 for(i = (!deblock_top) << 1; i<18; i++){
3215 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3216 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3220 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3221 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3222 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3223 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3228 static void hl_decode_mb(H264Context *h){
3229 MpegEncContext * const s = &h->s;
3230 const int mb_x= s->mb_x;
3231 const int mb_y= s->mb_y;
3232 const int mb_xy= mb_x + mb_y*s->mb_stride;
3233 const int mb_type= s->current_picture.mb_type[mb_xy];
3234 uint8_t *dest_y, *dest_cb, *dest_cr;
3235 int linesize, uvlinesize /*dct_offset*/;
3237 int *block_offset = &h->block_offset[0];
3238 const unsigned int bottom = mb_y & 1;
3239 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3240 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3245 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3246 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3247 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3249 if (h->mb_field_decoding_flag) {
3250 linesize = s->linesize * 2;
3251 uvlinesize = s->uvlinesize * 2;
3252 block_offset = &h->block_offset[24];
3253 if(mb_y&1){ //FIXME move out of this func?
3254 dest_y -= s->linesize*15;
3255 dest_cb-= s->uvlinesize*7;
3256 dest_cr-= s->uvlinesize*7;
3259 linesize = s->linesize;
3260 uvlinesize = s->uvlinesize;
3261 // dct_offset = s->linesize * 16;
3264 idct_add = transform_bypass
3265 ? IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4
3266 : IS_8x8DCT(mb_type) ? s->dsp.h264_idct8_add : s->dsp.h264_idct_add;
3268 if (IS_INTRA_PCM(mb_type)) {
3271 // The pixels are stored in h->mb array in the same order as levels,
3272 // copy them in output in the correct order.
3273 for(i=0; i<16; i++) {
3274 for (y=0; y<4; y++) {
3275 for (x=0; x<4; x++) {
3276 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3280 for(i=16; i<16+4; i++) {
3281 for (y=0; y<4; y++) {
3282 for (x=0; x<4; x++) {
3283 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3287 for(i=20; i<20+4; i++) {
3288 for (y=0; y<4; y++) {
3289 for (x=0; x<4; x++) {
3290 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3295 if(IS_INTRA(mb_type)){
3296 if(h->deblocking_filter) {
3297 if (h->mb_aff_frame) {
3299 xchg_pair_border(h, dest_y, dest_cb, dest_cr, s->linesize, s->uvlinesize, 1);
3301 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3305 if(!(s->flags&CODEC_FLAG_GRAY)){
3306 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3307 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3310 if(IS_INTRA4x4(mb_type)){
3312 if(IS_8x8DCT(mb_type)){
3313 for(i=0; i<16; i+=4){
3314 uint8_t * const ptr= dest_y + block_offset[i];
3315 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3316 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3317 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3318 if(h->non_zero_count_cache[ scan8[i] ])
3319 idct_add(ptr, h->mb + i*16, linesize);
3322 for(i=0; i<16; i++){
3323 uint8_t * const ptr= dest_y + block_offset[i];
3325 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3328 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3329 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3330 assert(mb_y || linesize <= block_offset[i]);
3331 if(!topright_avail){
3332 tr= ptr[3 - linesize]*0x01010101;
3333 topright= (uint8_t*) &tr;
3335 topright= ptr + 4 - linesize;
3339 h->pred4x4[ dir ](ptr, topright, linesize);
3340 if(h->non_zero_count_cache[ scan8[i] ]){
3341 if(s->codec_id == CODEC_ID_H264)
3342 idct_add(ptr, h->mb + i*16, linesize);
3344 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3349 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3350 if(s->codec_id == CODEC_ID_H264){
3351 if(!transform_bypass)
3352 h264_luma_dc_dequant_idct_c(h->mb, s->qscale);
3354 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3356 if(h->deblocking_filter) {
3357 if (h->mb_aff_frame) {
3359 uint8_t *pair_dest_y = s->current_picture.data[0] + ((mb_y-1) * 16* s->linesize ) + mb_x * 16;
3360 uint8_t *pair_dest_cb = s->current_picture.data[1] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3361 uint8_t *pair_dest_cr = s->current_picture.data[2] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3363 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3367 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3370 }else if(s->codec_id == CODEC_ID_H264){
3371 hl_motion(h, dest_y, dest_cb, dest_cr,
3372 s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
3373 s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
3374 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3378 if(!IS_INTRA4x4(mb_type)){
3379 if(s->codec_id == CODEC_ID_H264){
3380 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3381 for(i=0; i<16; i+=di){
3382 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3383 uint8_t * const ptr= dest_y + block_offset[i];
3384 idct_add(ptr, h->mb + i*16, linesize);
3388 for(i=0; i<16; i++){
3389 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3390 uint8_t * const ptr= dest_y + block_offset[i];
3391 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3397 if(!(s->flags&CODEC_FLAG_GRAY)){
3398 idct_add = transform_bypass ? s->dsp.add_pixels4 : s->dsp.h264_idct_add;
3399 if(!transform_bypass){
3400 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
3401 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
3403 if(s->codec_id == CODEC_ID_H264){
3404 for(i=16; i<16+4; i++){
3405 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3406 uint8_t * const ptr= dest_cb + block_offset[i];
3407 idct_add(ptr, h->mb + i*16, uvlinesize);
3410 for(i=20; i<20+4; i++){
3411 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3412 uint8_t * const ptr= dest_cr + block_offset[i];
3413 idct_add(ptr, h->mb + i*16, uvlinesize);
3417 for(i=16; i<16+4; i++){
3418 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3419 uint8_t * const ptr= dest_cb + block_offset[i];
3420 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3423 for(i=20; i<20+4; i++){
3424 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3425 uint8_t * const ptr= dest_cr + block_offset[i];
3426 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3432 if(h->deblocking_filter) {
3433 if (h->mb_aff_frame) {
3434 const int mb_y = s->mb_y - 1;
3435 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3436 const int mb_xy= mb_x + mb_y*s->mb_stride;
3437 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3438 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3439 uint8_t tmp = s->current_picture.data[1][384];
3440 if (!bottom) return;
3441 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3442 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3443 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3445 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3446 // TODO deblock a pair
3449 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3450 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3451 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3452 if (tmp != s->current_picture.data[1][384]) {
3453 tprintf("modified pixel 8,1 (1)\n");
3457 tprintf("call mbaff filter_mb\n");
3458 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3459 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3460 if (tmp != s->current_picture.data[1][384]) {
3461 tprintf("modified pixel 8,1 (2)\n");
3464 tprintf("call filter_mb\n");
3465 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3466 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3467 filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3473 * fills the default_ref_list.
3475 static int fill_default_ref_list(H264Context *h){
3476 MpegEncContext * const s = &h->s;
3478 int smallest_poc_greater_than_current = -1;
3479 Picture sorted_short_ref[32];
3481 if(h->slice_type==B_TYPE){
3485 /* sort frame according to poc in B slice */
3486 for(out_i=0; out_i<h->short_ref_count; out_i++){
3488 int best_poc=INT_MAX;
3490 for(i=0; i<h->short_ref_count; i++){
3491 const int poc= h->short_ref[i]->poc;
3492 if(poc > limit && poc < best_poc){
3498 assert(best_i != INT_MIN);
3501 sorted_short_ref[out_i]= *h->short_ref[best_i];
3502 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3503 if (-1 == smallest_poc_greater_than_current) {
3504 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3505 smallest_poc_greater_than_current = out_i;
3511 if(s->picture_structure == PICT_FRAME){
3512 if(h->slice_type==B_TYPE){
3514 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3516 // find the largest poc
3517 for(list=0; list<2; list++){
3520 int step= list ? -1 : 1;
3522 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3523 while(j<0 || j>= h->short_ref_count){
3524 if(j != -99 && step == (list ? -1 : 1))
3527 j= smallest_poc_greater_than_current + (step>>1);
3529 if(sorted_short_ref[j].reference != 3) continue;
3530 h->default_ref_list[list][index ]= sorted_short_ref[j];
3531 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3534 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3535 if(h->long_ref[i] == NULL) continue;
3536 if(h->long_ref[i]->reference != 3) continue;
3538 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3539 h->default_ref_list[ list ][index++].pic_id= i;;
3542 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3543 // swap the two first elements of L1 when
3544 // L0 and L1 are identical
3545 Picture temp= h->default_ref_list[1][0];
3546 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3547 h->default_ref_list[1][1] = temp;
3550 if(index < h->ref_count[ list ])
3551 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3555 for(i=0; i<h->short_ref_count; i++){
3556 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3557 h->default_ref_list[0][index ]= *h->short_ref[i];
3558 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3560 for(i = 0; i < 16; i++){
3561 if(h->long_ref[i] == NULL) continue;
3562 if(h->long_ref[i]->reference != 3) continue;
3563 h->default_ref_list[0][index ]= *h->long_ref[i];
3564 h->default_ref_list[0][index++].pic_id= i;;
3566 if(index < h->ref_count[0])
3567 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3570 if(h->slice_type==B_TYPE){
3572 //FIXME second field balh
3576 for (i=0; i<h->ref_count[0]; i++) {
3577 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3579 if(h->slice_type==B_TYPE){
3580 for (i=0; i<h->ref_count[1]; i++) {
3581 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3588 static void print_short_term(H264Context *h);
3589 static void print_long_term(H264Context *h);
3591 static int decode_ref_pic_list_reordering(H264Context *h){
3592 MpegEncContext * const s = &h->s;
3595 print_short_term(h);
3597 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3599 for(list=0; list<2; list++){
3600 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3602 if(get_bits1(&s->gb)){
3603 int pred= h->curr_pic_num;
3605 for(index=0; ; index++){
3606 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3609 Picture *ref = NULL;
3611 if(reordering_of_pic_nums_idc==3)
3614 if(index >= h->ref_count[list]){
3615 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3619 if(reordering_of_pic_nums_idc<3){
3620 if(reordering_of_pic_nums_idc<2){
3621 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3623 if(abs_diff_pic_num >= h->max_pic_num){
3624 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3628 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3629 else pred+= abs_diff_pic_num;
3630 pred &= h->max_pic_num - 1;
3632 for(i= h->short_ref_count-1; i>=0; i--){
3633 ref = h->short_ref[i];
3634 assert(ref->reference == 3);
3635 assert(!ref->long_ref);
3636 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3640 ref->pic_id= ref->frame_num;
3642 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3643 ref = h->long_ref[pic_id];
3644 ref->pic_id= pic_id;
3645 assert(ref->reference == 3);
3646 assert(ref->long_ref);
3651 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3652 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3654 for(i=index; i+1<h->ref_count[list]; i++){
3655 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3658 for(; i > index; i--){
3659 h->ref_list[list][i]= h->ref_list[list][i-1];
3661 h->ref_list[list][index]= *ref;
3664 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3670 if(h->slice_type!=B_TYPE) break;
3672 for(list=0; list<2; list++){
3673 for(index= 0; index < h->ref_count[list]; index++){
3674 if(!h->ref_list[list][index].data[0])
3675 h->ref_list[list][index]= s->current_picture;
3677 if(h->slice_type!=B_TYPE) break;
3680 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3681 direct_dist_scale_factor(h);
3682 direct_ref_list_init(h);
3686 static int pred_weight_table(H264Context *h){
3687 MpegEncContext * const s = &h->s;
3689 int luma_def, chroma_def;
3692 h->use_weight_chroma= 0;
3693 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3694 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3695 luma_def = 1<<h->luma_log2_weight_denom;
3696 chroma_def = 1<<h->chroma_log2_weight_denom;
3698 for(list=0; list<2; list++){
3699 for(i=0; i<h->ref_count[list]; i++){
3700 int luma_weight_flag, chroma_weight_flag;
3702 luma_weight_flag= get_bits1(&s->gb);
3703 if(luma_weight_flag){
3704 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3705 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3706 if( h->luma_weight[list][i] != luma_def
3707 || h->luma_offset[list][i] != 0)
3710 h->luma_weight[list][i]= luma_def;
3711 h->luma_offset[list][i]= 0;
3714 chroma_weight_flag= get_bits1(&s->gb);
3715 if(chroma_weight_flag){
3718 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3719 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3720 if( h->chroma_weight[list][i][j] != chroma_def
3721 || h->chroma_offset[list][i][j] != 0)
3722 h->use_weight_chroma= 1;
3727 h->chroma_weight[list][i][j]= chroma_def;
3728 h->chroma_offset[list][i][j]= 0;
3732 if(h->slice_type != B_TYPE) break;
3734 h->use_weight= h->use_weight || h->use_weight_chroma;
3738 static void implicit_weight_table(H264Context *h){
3739 MpegEncContext * const s = &h->s;
3741 int cur_poc = s->current_picture_ptr->poc;
3743 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3744 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3746 h->use_weight_chroma= 0;
3751 h->use_weight_chroma= 2;
3752 h->luma_log2_weight_denom= 5;
3753 h->chroma_log2_weight_denom= 5;
3756 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3757 int poc0 = h->ref_list[0][ref0].poc;
3758 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3759 int poc1 = h->ref_list[1][ref1].poc;
3760 int td = clip(poc1 - poc0, -128, 127);
3762 int tb = clip(cur_poc - poc0, -128, 127);
3763 int tx = (16384 + (ABS(td) >> 1)) / td;
3764 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3765 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3766 h->implicit_weight[ref0][ref1] = 32;
3768 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3770 h->implicit_weight[ref0][ref1] = 32;
3775 static inline void unreference_pic(H264Context *h, Picture *pic){
3778 if(pic == h->delayed_output_pic)
3781 for(i = 0; h->delayed_pic[i]; i++)
3782 if(pic == h->delayed_pic[i]){
3790 * instantaneous decoder refresh.
3792 static void idr(H264Context *h){
3795 for(i=0; i<16; i++){
3796 if (h->long_ref[i] != NULL) {
3797 unreference_pic(h, h->long_ref[i]);
3798 h->long_ref[i]= NULL;
3801 h->long_ref_count=0;
3803 for(i=0; i<h->short_ref_count; i++){
3804 unreference_pic(h, h->short_ref[i]);
3805 h->short_ref[i]= NULL;
3807 h->short_ref_count=0;
3810 /* forget old pics after a seek */
3811 static void flush_dpb(AVCodecContext *avctx){
3812 H264Context *h= avctx->priv_data;
3815 h->delayed_pic[i]= NULL;
3816 h->delayed_output_pic= NULL;
3818 if(h->s.current_picture_ptr)
3819 h->s.current_picture_ptr->reference= 0;
3824 * @return the removed picture or NULL if an error occurs
3826 static Picture * remove_short(H264Context *h, int frame_num){
3827 MpegEncContext * const s = &h->s;
3830 if(s->avctx->debug&FF_DEBUG_MMCO)
3831 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3833 for(i=0; i<h->short_ref_count; i++){
3834 Picture *pic= h->short_ref[i];
3835 if(s->avctx->debug&FF_DEBUG_MMCO)
3836 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3837 if(pic->frame_num == frame_num){
3838 h->short_ref[i]= NULL;
3839 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
3840 h->short_ref_count--;
3849 * @return the removed picture or NULL if an error occurs
3851 static Picture * remove_long(H264Context *h, int i){
3854 pic= h->long_ref[i];
3855 h->long_ref[i]= NULL;
3856 if(pic) h->long_ref_count--;
3862 * print short term list
3864 static void print_short_term(H264Context *h) {
3866 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3867 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3868 for(i=0; i<h->short_ref_count; i++){
3869 Picture *pic= h->short_ref[i];
3870 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3876 * print long term list
3878 static void print_long_term(H264Context *h) {
3880 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3881 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3882 for(i = 0; i < 16; i++){
3883 Picture *pic= h->long_ref[i];
3885 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3892 * Executes the reference picture marking (memory management control operations).
3894 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3895 MpegEncContext * const s = &h->s;
3897 int current_is_long=0;
3900 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3901 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3903 for(i=0; i<mmco_count; i++){
3904 if(s->avctx->debug&FF_DEBUG_MMCO)
3905 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
3907 switch(mmco[i].opcode){
3908 case MMCO_SHORT2UNUSED:
3909 pic= remove_short(h, mmco[i].short_frame_num);
3911 unreference_pic(h, pic);
3912 else if(s->avctx->debug&FF_DEBUG_MMCO)
3913 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
3915 case MMCO_SHORT2LONG:
3916 pic= remove_long(h, mmco[i].long_index);
3917 if(pic) unreference_pic(h, pic);
3919 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
3920 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3921 h->long_ref_count++;
3923 case MMCO_LONG2UNUSED:
3924 pic= remove_long(h, mmco[i].long_index);
3926 unreference_pic(h, pic);
3927 else if(s->avctx->debug&FF_DEBUG_MMCO)
3928 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
3931 pic= remove_long(h, mmco[i].long_index);
3932 if(pic) unreference_pic(h, pic);
3934 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
3935 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3936 h->long_ref_count++;
3940 case MMCO_SET_MAX_LONG:
3941 assert(mmco[i].long_index <= 16);
3942 // just remove the long term which index is greater than new max
3943 for(j = mmco[i].long_index; j<16; j++){
3944 pic = remove_long(h, j);
3945 if (pic) unreference_pic(h, pic);
3949 while(h->short_ref_count){
3950 pic= remove_short(h, h->short_ref[0]->frame_num);
3951 unreference_pic(h, pic);
3953 for(j = 0; j < 16; j++) {
3954 pic= remove_long(h, j);
3955 if(pic) unreference_pic(h, pic);
3962 if(!current_is_long){
3963 pic= remove_short(h, s->current_picture_ptr->frame_num);
3965 unreference_pic(h, pic);
3966 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3969 if(h->short_ref_count)
3970 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3972 h->short_ref[0]= s->current_picture_ptr;
3973 h->short_ref[0]->long_ref=0;
3974 h->short_ref_count++;
3977 print_short_term(h);
3982 static int decode_ref_pic_marking(H264Context *h){
3983 MpegEncContext * const s = &h->s;
3986 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3987 s->broken_link= get_bits1(&s->gb) -1;
3988 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
3989 if(h->mmco[0].long_index == -1)
3992 h->mmco[0].opcode= MMCO_LONG;
3996 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
3997 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3998 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4000 h->mmco[i].opcode= opcode;
4001 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4002 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4003 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4004 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4008 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4009 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4010 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4011 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4016 if(opcode > MMCO_LONG){
4017 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4020 if(opcode == MMCO_END)
4025 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4027 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4028 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4029 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4039 static int init_poc(H264Context *h){
4040 MpegEncContext * const s = &h->s;
4041 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4044 if(h->nal_unit_type == NAL_IDR_SLICE){
4045 h->frame_num_offset= 0;
4047 if(h->frame_num < h->prev_frame_num)
4048 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4050 h->frame_num_offset= h->prev_frame_num_offset;
4053 if(h->sps.poc_type==0){
4054 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4056 if(h->nal_unit_type == NAL_IDR_SLICE){
4061 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4062 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4063 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4064 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4066 h->poc_msb = h->prev_poc_msb;
4067 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4069 field_poc[1] = h->poc_msb + h->poc_lsb;
4070 if(s->picture_structure == PICT_FRAME)
4071 field_poc[1] += h->delta_poc_bottom;
4072 }else if(h->sps.poc_type==1){
4073 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4076 if(h->sps.poc_cycle_length != 0)
4077 abs_frame_num = h->frame_num_offset + h->frame_num;
4081 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4084 expected_delta_per_poc_cycle = 0;
4085 for(i=0; i < h->sps.poc_cycle_length; i++)
4086 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4088 if(abs_frame_num > 0){
4089 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4090 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4092 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4093 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4094 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4098 if(h->nal_ref_idc == 0)
4099 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4101 field_poc[0] = expectedpoc + h->delta_poc[0];
4102 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4104 if(s->picture_structure == PICT_FRAME)
4105 field_poc[1] += h->delta_poc[1];
4108 if(h->nal_unit_type == NAL_IDR_SLICE){
4111 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4112 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4118 if(s->picture_structure != PICT_BOTTOM_FIELD)
4119 s->current_picture_ptr->field_poc[0]= field_poc[0];
4120 if(s->picture_structure != PICT_TOP_FIELD)
4121 s->current_picture_ptr->field_poc[1]= field_poc[1];
4122 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4123 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4129 * decodes a slice header.
4130 * this will allso call MPV_common_init() and frame_start() as needed
4132 static int decode_slice_header(H264Context *h){
4133 MpegEncContext * const s = &h->s;
4134 int first_mb_in_slice, pps_id;
4135 int num_ref_idx_active_override_flag;
4136 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4138 int default_ref_list_done = 0;
4140 s->current_picture.reference= h->nal_ref_idc != 0;
4141 s->dropable= h->nal_ref_idc == 0;
4143 first_mb_in_slice= get_ue_golomb(&s->gb);
4145 slice_type= get_ue_golomb(&s->gb);
4147 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4152 h->slice_type_fixed=1;
4154 h->slice_type_fixed=0;
4156 slice_type= slice_type_map[ slice_type ];
4157 if (slice_type == I_TYPE
4158 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4159 default_ref_list_done = 1;
4161 h->slice_type= slice_type;
4163 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4165 pps_id= get_ue_golomb(&s->gb);
4167 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4170 h->pps= h->pps_buffer[pps_id];
4171 if(h->pps.slice_group_count == 0){
4172 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4176 h->sps= h->sps_buffer[ h->pps.sps_id ];
4177 if(h->sps.log2_max_frame_num == 0){
4178 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4182 s->mb_width= h->sps.mb_width;
4183 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4185 h->b_stride= s->mb_width*4 + 1;
4186 h->b8_stride= s->mb_width*2 + 1;
4188 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4189 if(h->sps.frame_mbs_only_flag)
4190 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4192 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4194 if (s->context_initialized
4195 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4199 if (!s->context_initialized) {
4200 if (MPV_common_init(s) < 0)
4203 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4204 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4205 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4208 for(i=0; i<16; i++){
4209 #define T(x) (x>>2) | ((x<<2) & 0xF)
4210 h->zigzag_scan[i] = T(zigzag_scan[i]);
4211 h-> field_scan[i] = T( field_scan[i]);
4214 if(h->sps.transform_bypass){ //FIXME same ugly
4215 h->zigzag_scan_q0 = zigzag_scan;
4216 h->field_scan_q0 = field_scan;
4218 h->zigzag_scan_q0 = h->zigzag_scan;
4219 h->field_scan_q0 = h->field_scan;
4224 s->avctx->width = s->width;
4225 s->avctx->height = s->height;
4226 s->avctx->sample_aspect_ratio= h->sps.sar;
4227 if(!s->avctx->sample_aspect_ratio.den)
4228 s->avctx->sample_aspect_ratio.den = 1;
4230 if(h->sps.timing_info_present_flag){
4231 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
4235 if(h->slice_num == 0){
4239 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4240 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4242 h->mb_aff_frame = 0;
4243 if(h->sps.frame_mbs_only_flag){
4244 s->picture_structure= PICT_FRAME;
4246 if(get_bits1(&s->gb)) { //field_pic_flag
4247 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4249 s->picture_structure= PICT_FRAME;
4250 first_mb_in_slice <<= h->sps.mb_aff;
4251 h->mb_aff_frame = h->sps.mb_aff;
4255 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4256 s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width;
4257 if(s->mb_y >= s->mb_height){
4261 if(s->picture_structure==PICT_FRAME){
4262 h->curr_pic_num= h->frame_num;
4263 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4265 h->curr_pic_num= 2*h->frame_num;
4266 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4269 if(h->nal_unit_type == NAL_IDR_SLICE){
4270 get_ue_golomb(&s->gb); /* idr_pic_id */
4273 if(h->sps.poc_type==0){
4274 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4276 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4277 h->delta_poc_bottom= get_se_golomb(&s->gb);
4281 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4282 h->delta_poc[0]= get_se_golomb(&s->gb);
4284 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4285 h->delta_poc[1]= get_se_golomb(&s->gb);
4290 if(h->pps.redundant_pic_cnt_present){
4291 h->redundant_pic_count= get_ue_golomb(&s->gb);
4294 //set defaults, might be overriden a few line later
4295 h->ref_count[0]= h->pps.ref_count[0];
4296 h->ref_count[1]= h->pps.ref_count[1];
4298 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4299 if(h->slice_type == B_TYPE){
4300 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4302 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4304 if(num_ref_idx_active_override_flag){
4305 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4306 if(h->slice_type==B_TYPE)
4307 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4309 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4310 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4316 if(!default_ref_list_done){
4317 fill_default_ref_list(h);
4320 if(decode_ref_pic_list_reordering(h) < 0)
4323 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4324 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4325 pred_weight_table(h);
4326 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4327 implicit_weight_table(h);
4331 if(s->current_picture.reference)
4332 decode_ref_pic_marking(h);
4334 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4335 h->cabac_init_idc = get_ue_golomb(&s->gb);
4337 h->last_qscale_diff = 0;
4338 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4339 if(s->qscale<0 || s->qscale>51){
4340 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4343 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4344 //FIXME qscale / qp ... stuff
4345 if(h->slice_type == SP_TYPE){
4346 get_bits1(&s->gb); /* sp_for_switch_flag */
4348 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4349 get_se_golomb(&s->gb); /* slice_qs_delta */
4352 h->deblocking_filter = 1;
4353 h->slice_alpha_c0_offset = 0;
4354 h->slice_beta_offset = 0;
4355 if( h->pps.deblocking_filter_parameters_present ) {
4356 h->deblocking_filter= get_ue_golomb(&s->gb);
4357 if(h->deblocking_filter < 2)
4358 h->deblocking_filter^= 1; // 1<->0
4360 if( h->deblocking_filter ) {
4361 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4362 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4365 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4366 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4367 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4368 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4369 h->deblocking_filter= 0;
4372 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4373 slice_group_change_cycle= get_bits(&s->gb, ?);
4378 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4379 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4381 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4383 av_get_pict_type_char(h->slice_type),
4384 pps_id, h->frame_num,
4385 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4386 h->ref_count[0], h->ref_count[1],
4388 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4390 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4400 static inline int get_level_prefix(GetBitContext *gb){
4404 OPEN_READER(re, gb);
4405 UPDATE_CACHE(re, gb);
4406 buf=GET_CACHE(re, gb);
4408 log= 32 - av_log2(buf);
4410 print_bin(buf>>(32-log), log);
4411 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4414 LAST_SKIP_BITS(re, gb, log);
4415 CLOSE_READER(re, gb);
4420 static inline int get_dct8x8_allowed(H264Context *h){
4423 if(!IS_SUB_8X8(h->sub_mb_type[i])
4424 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4431 * decodes a residual block.
4432 * @param n block index
4433 * @param scantable scantable
4434 * @param max_coeff number of coefficients in the block
4435 * @return <0 if an error occured
4437 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff){
4438 MpegEncContext * const s = &h->s;
4439 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4440 int level[16], run[16];
4441 int suffix_length, zeros_left, coeff_num, coeff_token, total_coeff, i, trailing_ones;
4443 //FIXME put trailing_onex into the context
4445 if(n == CHROMA_DC_BLOCK_INDEX){
4446 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4447 total_coeff= coeff_token>>2;
4449 if(n == LUMA_DC_BLOCK_INDEX){
4450 total_coeff= pred_non_zero_count(h, 0);
4451 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4452 total_coeff= coeff_token>>2;
4454 total_coeff= pred_non_zero_count(h, n);
4455 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4456 total_coeff= coeff_token>>2;
4457 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4461 //FIXME set last_non_zero?
4466 trailing_ones= coeff_token&3;
4467 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4468 assert(total_coeff<=16);
4470 for(i=0; i<trailing_ones; i++){
4471 level[i]= 1 - 2*get_bits1(gb);
4474 suffix_length= total_coeff > 10 && trailing_ones < 3;
4476 for(; i<total_coeff; i++){
4477 const int prefix= get_level_prefix(gb);
4478 int level_code, mask;
4480 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4482 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4484 level_code= (prefix<<suffix_length); //part
4485 }else if(prefix==14){
4487 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4489 level_code= prefix + get_bits(gb, 4); //part
4490 }else if(prefix==15){
4491 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4492 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4494 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4498 if(i==trailing_ones && i<3) level_code+= 2; //FIXME split first iteration
4500 mask= -(level_code&1);
4501 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4503 if(suffix_length==0) suffix_length=1; //FIXME split first iteration
4506 if(ABS(level[i]) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
4508 if((2+level_code)>>1) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
4509 /* ? == prefix > 2 or sth */
4511 tprintf("level: %d suffix_length:%d\n", level[i], suffix_length);
4514 if(total_coeff == max_coeff)
4517 if(n == CHROMA_DC_BLOCK_INDEX)
4518 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4520 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4523 for(i=0; i<total_coeff-1; i++){
4526 else if(zeros_left < 7){
4527 run[i]= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4529 run[i]= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4531 zeros_left -= run[i];
4535 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4539 for(; i<total_coeff-1; i++){
4547 for(i=total_coeff-1; i>=0; i--){ //FIXME merge into rundecode?
4550 coeff_num += run[i] + 1; //FIXME add 1 earlier ?
4551 j= scantable[ coeff_num ];
4556 for(i=total_coeff-1; i>=0; i--){ //FIXME merge into rundecode?
4559 coeff_num += run[i] + 1; //FIXME add 1 earlier ?
4560 j= scantable[ coeff_num ];
4562 block[j]= level[i] * qmul[j];
4563 // printf("%d %d ", block[j], qmul[j]);
4570 * decodes a P_SKIP or B_SKIP macroblock
4572 static void decode_mb_skip(H264Context *h){
4573 MpegEncContext * const s = &h->s;
4574 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4577 memset(h->non_zero_count[mb_xy], 0, 16);
4578 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4580 if(h->mb_aff_frame && s->mb_skip_run==0 && (s->mb_y&1)==0){
4581 h->mb_field_decoding_flag= get_bits1(&s->gb);
4583 if(h->mb_field_decoding_flag)
4584 mb_type|= MB_TYPE_INTERLACED;
4586 if( h->slice_type == B_TYPE )
4588 // just for fill_caches. pred_direct_motion will set the real mb_type
4589 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4591 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4592 pred_direct_motion(h, &mb_type);
4594 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4595 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
4601 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4603 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4604 pred_pskip_motion(h, &mx, &my);
4605 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4606 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4608 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4611 write_back_motion(h, mb_type);
4612 s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP;
4613 s->current_picture.qscale_table[mb_xy]= s->qscale;
4614 h->slice_table[ mb_xy ]= h->slice_num;
4615 h->prev_mb_skipped= 1;
4619 * decodes a macroblock
4620 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4622 static int decode_mb_cavlc(H264Context *h){
4623 MpegEncContext * const s = &h->s;
4624 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4625 int mb_type, partition_count, cbp;
4626 int dct8x8_allowed= h->pps.transform_8x8_mode;
4628 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4630 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4631 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4633 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4634 if(s->mb_skip_run==-1)
4635 s->mb_skip_run= get_ue_golomb(&s->gb);
4637 if (s->mb_skip_run--) {
4642 if(h->mb_aff_frame){
4643 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
4644 h->mb_field_decoding_flag = get_bits1(&s->gb);
4646 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4648 h->prev_mb_skipped= 0;
4650 mb_type= get_ue_golomb(&s->gb);
4651 if(h->slice_type == B_TYPE){
4653 partition_count= b_mb_type_info[mb_type].partition_count;
4654 mb_type= b_mb_type_info[mb_type].type;
4657 goto decode_intra_mb;
4659 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4661 partition_count= p_mb_type_info[mb_type].partition_count;
4662 mb_type= p_mb_type_info[mb_type].type;
4665 goto decode_intra_mb;
4668 assert(h->slice_type == I_TYPE);
4671 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4675 cbp= i_mb_type_info[mb_type].cbp;
4676 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4677 mb_type= i_mb_type_info[mb_type].type;
4680 if(h->mb_field_decoding_flag)
4681 mb_type |= MB_TYPE_INTERLACED;
4683 h->slice_table[ mb_xy ]= h->slice_num;
4685 if(IS_INTRA_PCM(mb_type)){
4688 // we assume these blocks are very rare so we dont optimize it
4689 align_get_bits(&s->gb);
4691 // The pixels are stored in the same order as levels in h->mb array.
4692 for(y=0; y<16; y++){
4693 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4694 for(x=0; x<16; x++){
4695 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4696 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4700 const int index= 256 + 4*(y&3) + 32*(y>>2);
4702 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4703 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4707 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4709 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4710 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4714 // In deblocking, the quantizer is 0
4715 s->current_picture.qscale_table[mb_xy]= 0;
4716 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
4717 // All coeffs are present
4718 memset(h->non_zero_count[mb_xy], 16, 16);
4720 s->current_picture.mb_type[mb_xy]= mb_type;
4724 fill_caches(h, mb_type, 0);
4727 if(IS_INTRA(mb_type)){
4728 // init_top_left_availability(h);
4729 if(IS_INTRA4x4(mb_type)){
4732 if(dct8x8_allowed && get_bits1(&s->gb)){
4733 mb_type |= MB_TYPE_8x8DCT;
4737 // fill_intra4x4_pred_table(h);
4738 for(i=0; i<16; i+=di){
4739 const int mode_coded= !get_bits1(&s->gb);
4740 const int predicted_mode= pred_intra_mode(h, i);
4744 const int rem_mode= get_bits(&s->gb, 3);
4745 if(rem_mode<predicted_mode)
4750 mode= predicted_mode;
4754 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4756 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4758 write_back_intra_pred_mode(h);
4759 if( check_intra4x4_pred_mode(h) < 0)
4762 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4763 if(h->intra16x16_pred_mode < 0)
4766 h->chroma_pred_mode= get_ue_golomb(&s->gb);
4768 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
4769 if(h->chroma_pred_mode < 0)
4771 }else if(partition_count==4){
4772 int i, j, sub_partition_count[4], list, ref[2][4];
4774 if(h->slice_type == B_TYPE){
4776 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4777 if(h->sub_mb_type[i] >=13){
4778 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4781 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4782 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4784 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4785 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3]))
4786 pred_direct_motion(h, &mb_type);
4788 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4790 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4791 if(h->sub_mb_type[i] >=4){
4792 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4795 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4796 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4800 for(list=0; list<2; list++){
4801 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4802 if(ref_count == 0) continue;
4803 if (h->mb_aff_frame && h->mb_field_decoding_flag) {
4807 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4808 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4809 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4818 dct8x8_allowed = get_dct8x8_allowed(h);
4820 for(list=0; list<2; list++){
4821 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4822 if(ref_count == 0) continue;
4825 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4826 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4827 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4829 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4830 const int sub_mb_type= h->sub_mb_type[i];
4831 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4832 for(j=0; j<sub_partition_count[i]; j++){
4834 const int index= 4*i + block_width*j;
4835 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4836 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4837 mx += get_se_golomb(&s->gb);
4838 my += get_se_golomb(&s->gb);
4839 tprintf("final mv:%d %d\n", mx, my);
4841 if(IS_SUB_8X8(sub_mb_type)){
4842 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
4843 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4844 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
4845 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4846 }else if(IS_SUB_8X4(sub_mb_type)){
4847 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
4848 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
4849 }else if(IS_SUB_4X8(sub_mb_type)){
4850 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
4851 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
4853 assert(IS_SUB_4X4(sub_mb_type));
4854 mv_cache[ 0 ][0]= mx;
4855 mv_cache[ 0 ][1]= my;
4859 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4865 }else if(IS_DIRECT(mb_type)){
4866 pred_direct_motion(h, &mb_type);
4867 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4869 int list, mx, my, i;
4870 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4871 if(IS_16X16(mb_type)){
4872 for(list=0; list<2; list++){
4873 if(h->ref_count[list]>0){
4874 if(IS_DIR(mb_type, 0, list)){
4875 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4876 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4878 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
4881 for(list=0; list<2; list++){
4882 if(IS_DIR(mb_type, 0, list)){
4883 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4884 mx += get_se_golomb(&s->gb);
4885 my += get_se_golomb(&s->gb);
4886 tprintf("final mv:%d %d\n", mx, my);
4888 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
4890 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
4893 else if(IS_16X8(mb_type)){
4894 for(list=0; list<2; list++){
4895 if(h->ref_count[list]>0){
4897 if(IS_DIR(mb_type, i, list)){
4898 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4899 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4901 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
4905 for(list=0; list<2; list++){
4907 if(IS_DIR(mb_type, i, list)){
4908 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4909 mx += get_se_golomb(&s->gb);
4910 my += get_se_golomb(&s->gb);
4911 tprintf("final mv:%d %d\n", mx, my);
4913 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
4915 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
4919 assert(IS_8X16(mb_type));
4920 for(list=0; list<2; list++){
4921 if(h->ref_count[list]>0){
4923 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4924 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4925 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4927 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
4931 for(list=0; list<2; list++){
4933 if(IS_DIR(mb_type, i, list)){
4934 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4935 mx += get_se_golomb(&s->gb);
4936 my += get_se_golomb(&s->gb);
4937 tprintf("final mv:%d %d\n", mx, my);
4939 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
4941 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
4947 if(IS_INTER(mb_type))
4948 write_back_motion(h, mb_type);
4950 if(!IS_INTRA16x16(mb_type)){
4951 cbp= get_ue_golomb(&s->gb);
4953 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
4957 if(IS_INTRA4x4(mb_type))
4958 cbp= golomb_to_intra4x4_cbp[cbp];
4960 cbp= golomb_to_inter_cbp[cbp];
4963 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4964 if(get_bits1(&s->gb))
4965 mb_type |= MB_TYPE_8x8DCT;
4967 s->current_picture.mb_type[mb_xy]= mb_type;
4969 if(cbp || IS_INTRA16x16(mb_type)){
4970 int i8x8, i4x4, chroma_idx;
4971 int chroma_qp, dquant;
4972 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4973 const uint8_t *scan, *dc_scan;
4975 // fill_non_zero_count_cache(h);
4977 if(IS_INTERLACED(mb_type)){
4978 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4979 dc_scan= luma_dc_field_scan;
4981 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4982 dc_scan= luma_dc_zigzag_scan;
4985 dquant= get_se_golomb(&s->gb);
4987 if( dquant > 25 || dquant < -26 ){
4988 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4992 s->qscale += dquant;
4993 if(((unsigned)s->qscale) > 51){
4994 if(s->qscale<0) s->qscale+= 52;
4995 else s->qscale-= 52;
4998 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4999 if(IS_INTRA16x16(mb_type)){
5000 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[s->qscale], 16) < 0){
5001 return -1; //FIXME continue if partitioned and other return -1 too
5004 assert((cbp&15) == 0 || (cbp&15) == 15);
5007 for(i8x8=0; i8x8<4; i8x8++){
5008 for(i4x4=0; i4x4<4; i4x4++){
5009 const int index= i4x4 + 4*i8x8;
5010 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[s->qscale], 15) < 0 ){
5016 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5019 for(i8x8=0; i8x8<4; i8x8++){
5020 if(cbp & (1<<i8x8)){
5021 if(IS_8x8DCT(mb_type)){
5022 DCTELEM *buf = &h->mb[64*i8x8];
5024 for(i4x4=0; i4x4<4; i4x4++){
5025 if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4,
5026 h->dequant8_coeff[s->qscale], 16) <0 )
5032 buf[i] = (buf[i] + 2) >> 2;
5034 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5035 nnz[0] |= nnz[1] | nnz[8] | nnz[9];
5037 for(i4x4=0; i4x4<4; i4x4++){
5038 const int index= i4x4 + 4*i8x8;
5040 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[s->qscale], 16) <0 ){
5046 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5047 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5053 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5054 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, h->dequant4_coeff[chroma_qp], 4) < 0){
5060 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5061 for(i4x4=0; i4x4<4; i4x4++){
5062 const int index= 16 + 4*chroma_idx + i4x4;
5063 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_qp], 15) < 0){
5069 uint8_t * const nnz= &h->non_zero_count_cache[0];
5070 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5071 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5074 uint8_t * const nnz= &h->non_zero_count_cache[0];
5075 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5076 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5077 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5079 s->current_picture.qscale_table[mb_xy]= s->qscale;
5080 write_back_non_zero_count(h);
5085 static int decode_cabac_field_decoding_flag(H264Context *h) {
5086 MpegEncContext * const s = &h->s;
5087 const int mb_x = s->mb_x;
5088 const int mb_y = s->mb_y & ~1;
5089 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5090 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5092 unsigned int ctx = 0;
5094 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5097 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5101 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5104 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5105 uint8_t *state= &h->cabac_state[ctx_base];
5109 MpegEncContext * const s = &h->s;
5110 const int mba_xy = h->left_mb_xy[0];
5111 const int mbb_xy = h->top_mb_xy;
5113 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5115 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5117 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5118 return 0; /* I4x4 */
5121 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5122 return 0; /* I4x4 */
5125 if( get_cabac_terminate( &h->cabac ) )
5126 return 25; /* PCM */
5128 mb_type = 1; /* I16x16 */
5129 if( get_cabac( &h->cabac, &state[1] ) )
5130 mb_type += 12; /* cbp_luma != 0 */
5132 if( get_cabac( &h->cabac, &state[2] ) ) {
5133 if( get_cabac( &h->cabac, &state[2+intra_slice] ) )
5134 mb_type += 4 * 2; /* cbp_chroma == 2 */
5136 mb_type += 4 * 1; /* cbp_chroma == 1 */
5138 if( get_cabac( &h->cabac, &state[3+intra_slice] ) )
5140 if( get_cabac( &h->cabac, &state[3+2*intra_slice] ) )
5145 static int decode_cabac_mb_type( H264Context *h ) {
5146 MpegEncContext * const s = &h->s;
5148 if( h->slice_type == I_TYPE ) {
5149 return decode_cabac_intra_mb_type(h, 3, 1);
5150 } else if( h->slice_type == P_TYPE ) {
5151 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5153 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5154 if( get_cabac( &h->cabac, &h->cabac_state[16] ) == 0 )
5155 return 0; /* P_L0_D16x16; */
5157 return 3; /* P_8x8; */
5159 if( get_cabac( &h->cabac, &h->cabac_state[17] ) == 0 )
5160 return 2; /* P_L0_D8x16; */
5162 return 1; /* P_L0_D16x8; */
5165 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5167 } else if( h->slice_type == B_TYPE ) {
5168 const int mba_xy = h->left_mb_xy[0];
5169 const int mbb_xy = h->top_mb_xy;
5173 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] )
5174 && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5176 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )
5177 && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5180 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5181 return 0; /* B_Direct_16x16 */
5183 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5184 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5187 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5188 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5189 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5190 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5192 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5193 else if( bits == 13 ) {
5194 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5195 } else if( bits == 14 )
5196 return 11; /* B_L1_L0_8x16 */
5197 else if( bits == 15 )
5198 return 22; /* B_8x8 */
5200 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5201 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5203 /* TODO SI/SP frames? */
5208 static int decode_cabac_mb_skip( H264Context *h) {
5209 MpegEncContext * const s = &h->s;
5210 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5211 const int mba_xy = mb_xy - 1;
5212 const int mbb_xy = mb_xy - s->mb_stride;
5215 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5217 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5220 if( h->slice_type == P_TYPE || h->slice_type == SP_TYPE)
5221 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5223 return get_cabac( &h->cabac, &h->cabac_state[24+ctx] );
5226 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5229 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5232 if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
5234 if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
5236 if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
5238 if( mode >= pred_mode )
5244 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5245 const int mba_xy = h->left_mb_xy[0];
5246 const int mbb_xy = h->top_mb_xy;
5250 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5251 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5254 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5257 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5260 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5262 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5268 static const uint8_t block_idx_x[16] = {
5269 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5271 static const uint8_t block_idx_y[16] = {
5272 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5274 static const uint8_t block_idx_xy[4][4] = {
5281 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5282 MpegEncContext * const s = &h->s;
5287 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5293 x = block_idx_x[4*i8x8];
5294 y = block_idx_y[4*i8x8];
5298 else if( s->mb_x > 0 && (h->slice_table[h->left_mb_xy[0]] == h->slice_num)) {
5299 cbp_a = h->left_cbp;
5300 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5305 else if( s->mb_y > 0 && (h->slice_table[h->top_mb_xy] == h->slice_num)) {
5307 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5310 /* No need to test for skip as we put 0 for skip block */
5311 /* No need to test for IPCM as we put 1 for IPCM block */
5313 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5314 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5319 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5320 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5324 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5330 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5334 cbp_a = (h->left_cbp>>4)&0x03;
5335 cbp_b = (h-> top_cbp>>4)&0x03;
5338 if( cbp_a > 0 ) ctx++;
5339 if( cbp_b > 0 ) ctx += 2;
5340 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5344 if( cbp_a == 2 ) ctx++;
5345 if( cbp_b == 2 ) ctx += 2;
5346 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5348 static int decode_cabac_mb_dqp( H264Context *h) {
5349 MpegEncContext * const s = &h->s;
5355 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5357 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5359 if( h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) )
5362 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5368 if(val > 52) //prevent infinite loop
5375 return -(val + 1)/2;
5377 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5378 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5380 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5382 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5386 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5388 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5389 return 0; /* B_Direct_8x8 */
5390 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5391 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5393 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5394 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5395 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5398 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5399 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5403 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5404 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5407 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5408 int refa = h->ref_cache[list][scan8[n] - 1];
5409 int refb = h->ref_cache[list][scan8[n] - 8];
5413 if( h->slice_type == B_TYPE) {
5414 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5416 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5425 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5435 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5436 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5437 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5438 int ctxbase = (l == 0) ? 40 : 47;
5443 else if( amvd > 32 )
5448 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5453 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5461 while( get_cabac_bypass( &h->cabac ) ) {
5466 if( get_cabac_bypass( &h->cabac ) )
5470 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
5474 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5479 nza = h->left_cbp&0x100;
5480 nzb = h-> top_cbp&0x100;
5481 } else if( cat == 1 || cat == 2 ) {
5482 nza = h->non_zero_count_cache[scan8[idx] - 1];
5483 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5484 } else if( cat == 3 ) {
5485 nza = (h->left_cbp>>(6+idx))&0x01;
5486 nzb = (h-> top_cbp>>(6+idx))&0x01;
5489 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5490 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5499 return ctx + 4 * cat;
5502 static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff) {
5503 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5504 static const int significant_coeff_flag_field_offset[2] = { 105, 277 };
5505 static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 };
5506 static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 };
5507 static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 };
5508 static const int coeff_abs_level_m1_offset[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 };
5509 static const int identity[15] = {
5510 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
5512 static const int significant_coeff_flag_offset_8x8[63] = {
5513 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5514 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5515 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5516 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
5518 static const int last_coeff_flag_offset_8x8[63] = {
5519 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5520 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5521 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5522 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5528 int coeff_count = 0;
5531 int abslevelgt1 = 0;
5533 const int* significant_coeff_ctx_offset;
5534 const int* last_coeff_ctx_offset;
5535 const int significant_coeff_ctx_base = significant_coeff_flag_offset[cat]
5536 + significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5537 const int last_coeff_ctx_base = last_significant_coeff_flag_offset[cat]
5538 + last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5540 /* cat: 0-> DC 16x16 n = 0
5541 * 1-> AC 16x16 n = luma4x4idx
5542 * 2-> Luma4x4 n = luma4x4idx
5543 * 3-> DC Chroma n = iCbCr
5544 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5545 * 5-> Luma8x8 n = 4 * luma8x8idx
5548 /* read coded block flag */
5550 significant_coeff_ctx_offset = significant_coeff_flag_offset_8x8;
5551 last_coeff_ctx_offset = last_coeff_flag_offset_8x8;
5553 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5554 if( cat == 1 || cat == 2 )
5555 h->non_zero_count_cache[scan8[n]] = 0;
5557 h->non_zero_count_cache[scan8[16+n]] = 0;
5562 significant_coeff_ctx_offset =
5563 last_coeff_ctx_offset = identity;
5566 for(last= 0; last < max_coeff - 1; last++) {
5567 int sig_ctx = significant_coeff_ctx_base + significant_coeff_ctx_offset[last];
5568 if( get_cabac( &h->cabac, &h->cabac_state[sig_ctx] )) {
5569 int last_ctx = last_coeff_ctx_base + last_coeff_ctx_offset[last];
5570 index[coeff_count++] = last;
5571 if( get_cabac( &h->cabac, &h->cabac_state[last_ctx] ) ) {
5577 if( last == max_coeff -1 ) {
5578 index[coeff_count++] = last;
5580 assert(coeff_count > 0);
5583 h->cbp_table[mb_xy] |= 0x100;
5584 else if( cat == 1 || cat == 2 )
5585 h->non_zero_count_cache[scan8[n]] = coeff_count;
5587 h->cbp_table[mb_xy] |= 0x40 << n;
5589 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5592 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, 1, 1);
5595 for( i = coeff_count - 1; i >= 0; i-- ) {
5596 int ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + coeff_abs_level_m1_offset[cat];
5597 int j= scantable[index[i]];
5599 if( get_cabac( &h->cabac, &h->cabac_state[ctx] ) == 0 ) {
5600 if( cat == 0 || cat == 3 ) {
5601 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
5604 if( get_cabac_bypass( &h->cabac ) ) block[j] = -qmul[j];
5605 else block[j] = qmul[j];
5611 ctx = 5 + FFMIN( 4, abslevelgt1 ) + coeff_abs_level_m1_offset[cat];
5612 while( coeff_abs < 15 && get_cabac( &h->cabac, &h->cabac_state[ctx] ) ) {
5616 if( coeff_abs >= 15 ) {
5618 while( get_cabac_bypass( &h->cabac ) ) {
5619 coeff_abs += 1 << j;
5624 if( get_cabac_bypass( &h->cabac ) )
5625 coeff_abs += 1 << j ;
5629 if( cat == 0 || cat == 3 ) {
5630 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
5631 else block[j] = coeff_abs;
5633 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs * qmul[j];
5634 else block[j] = coeff_abs * qmul[j];
5643 void inline compute_mb_neighboors(H264Context *h)
5645 MpegEncContext * const s = &h->s;
5646 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5647 h->top_mb_xy = mb_xy - s->mb_stride;
5648 h->left_mb_xy[0] = mb_xy - 1;
5649 if(h->mb_aff_frame){
5650 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5651 const int top_pair_xy = pair_xy - s->mb_stride;
5652 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5653 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5654 const int curr_mb_frame_flag = !h->mb_field_decoding_flag;
5655 const int bottom = (s->mb_y & 1);
5657 ? !curr_mb_frame_flag // bottom macroblock
5658 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5660 h->top_mb_xy -= s->mb_stride;
5662 if (left_mb_frame_flag != curr_mb_frame_flag) {
5663 h->left_mb_xy[0] = pair_xy - 1;
5670 * decodes a macroblock
5671 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5673 static int decode_mb_cabac(H264Context *h) {
5674 MpegEncContext * const s = &h->s;
5675 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5676 int mb_type, partition_count, cbp = 0;
5677 int dct8x8_allowed= h->pps.transform_8x8_mode;
5679 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5681 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5682 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5683 /* read skip flags */
5684 if( decode_cabac_mb_skip( h ) ) {
5687 h->cbp_table[mb_xy] = 0;
5688 h->chroma_pred_mode_table[mb_xy] = 0;
5689 h->last_qscale_diff = 0;
5695 if(h->mb_aff_frame){
5696 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
5697 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5699 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5701 h->prev_mb_skipped = 0;
5703 compute_mb_neighboors(h);
5704 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5705 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5709 if( h->slice_type == B_TYPE ) {
5711 partition_count= b_mb_type_info[mb_type].partition_count;
5712 mb_type= b_mb_type_info[mb_type].type;
5715 goto decode_intra_mb;
5717 } else if( h->slice_type == P_TYPE ) {
5719 partition_count= p_mb_type_info[mb_type].partition_count;
5720 mb_type= p_mb_type_info[mb_type].type;
5723 goto decode_intra_mb;
5726 assert(h->slice_type == I_TYPE);
5728 partition_count = 0;
5729 cbp= i_mb_type_info[mb_type].cbp;
5730 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5731 mb_type= i_mb_type_info[mb_type].type;
5733 if(h->mb_field_decoding_flag)
5734 mb_type |= MB_TYPE_INTERLACED;
5736 h->slice_table[ mb_xy ]= h->slice_num;
5738 if(IS_INTRA_PCM(mb_type)) {
5742 // We assume these blocks are very rare so we dont optimize it.
5743 // FIXME The two following lines get the bitstream position in the cabac
5744 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5745 ptr= h->cabac.bytestream;
5746 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
5748 // The pixels are stored in the same order as levels in h->mb array.
5749 for(y=0; y<16; y++){
5750 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5751 for(x=0; x<16; x++){
5752 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
5753 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5757 const int index= 256 + 4*(y&3) + 32*(y>>2);
5759 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5760 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5764 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5766 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5767 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5771 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5773 // All blocks are present
5774 h->cbp_table[mb_xy] = 0x1ef;
5775 h->chroma_pred_mode_table[mb_xy] = 0;
5776 // In deblocking, the quantizer is 0
5777 s->current_picture.qscale_table[mb_xy]= 0;
5778 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5779 // All coeffs are present
5780 memset(h->non_zero_count[mb_xy], 16, 16);
5781 s->current_picture.mb_type[mb_xy]= mb_type;
5785 fill_caches(h, mb_type, 0);
5787 if( IS_INTRA( mb_type ) ) {
5789 if( IS_INTRA4x4( mb_type ) ) {
5790 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5791 mb_type |= MB_TYPE_8x8DCT;
5792 for( i = 0; i < 16; i+=4 ) {
5793 int pred = pred_intra_mode( h, i );
5794 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5795 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5798 for( i = 0; i < 16; i++ ) {
5799 int pred = pred_intra_mode( h, i );
5800 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5802 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5805 write_back_intra_pred_mode(h);
5806 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5808 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5809 if( h->intra16x16_pred_mode < 0 ) return -1;
5811 h->chroma_pred_mode_table[mb_xy] =
5812 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5814 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
5815 if( h->chroma_pred_mode < 0 ) return -1;
5816 } else if( partition_count == 4 ) {
5817 int i, j, sub_partition_count[4], list, ref[2][4];
5819 if( h->slice_type == B_TYPE ) {
5820 for( i = 0; i < 4; i++ ) {
5821 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5822 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5823 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5825 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5826 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5827 pred_direct_motion(h, &mb_type);
5828 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5829 for( i = 0; i < 4; i++ )
5830 if( IS_DIRECT(h->sub_mb_type[i]) )
5831 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5835 for( i = 0; i < 4; i++ ) {
5836 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5837 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5838 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5842 for( list = 0; list < 2; list++ ) {
5843 if( h->ref_count[list] > 0 ) {
5844 for( i = 0; i < 4; i++ ) {
5845 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5846 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5847 if( h->ref_count[list] > 1 )
5848 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5854 h->ref_cache[list][ scan8[4*i]+1 ]=
5855 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5861 dct8x8_allowed = get_dct8x8_allowed(h);
5863 for(list=0; list<2; list++){
5865 if(IS_DIRECT(h->sub_mb_type[i])){
5866 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5869 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5871 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5872 const int sub_mb_type= h->sub_mb_type[i];
5873 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5874 for(j=0; j<sub_partition_count[i]; j++){
5877 const int index= 4*i + block_width*j;
5878 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5879 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5880 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5882 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5883 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5884 tprintf("final mv:%d %d\n", mx, my);
5886 if(IS_SUB_8X8(sub_mb_type)){
5887 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5888 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5889 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5890 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5892 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
5893 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5894 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
5895 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5896 }else if(IS_SUB_8X4(sub_mb_type)){
5897 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5898 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5900 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
5901 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
5902 }else if(IS_SUB_4X8(sub_mb_type)){
5903 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5904 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5906 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
5907 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
5909 assert(IS_SUB_4X4(sub_mb_type));
5910 mv_cache[ 0 ][0]= mx;
5911 mv_cache[ 0 ][1]= my;
5913 mvd_cache[ 0 ][0]= mx - mpx;
5914 mvd_cache[ 0 ][1]= my - mpy;
5918 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5919 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5920 p[0] = p[1] = p[8] = p[9] = 0;
5921 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5925 } else if( IS_DIRECT(mb_type) ) {
5926 pred_direct_motion(h, &mb_type);
5927 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5928 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5929 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5931 int list, mx, my, i, mpx, mpy;
5932 if(IS_16X16(mb_type)){
5933 for(list=0; list<2; list++){
5934 if(IS_DIR(mb_type, 0, list)){
5935 if(h->ref_count[list] > 0 ){
5936 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5937 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5940 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
5942 for(list=0; list<2; list++){
5943 if(IS_DIR(mb_type, 0, list)){
5944 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5946 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5947 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5948 tprintf("final mv:%d %d\n", mx, my);
5950 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5951 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5953 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5956 else if(IS_16X8(mb_type)){
5957 for(list=0; list<2; list++){
5958 if(h->ref_count[list]>0){
5960 if(IS_DIR(mb_type, i, list)){
5961 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5962 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5964 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5968 for(list=0; list<2; list++){
5970 if(IS_DIR(mb_type, i, list)){
5971 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5972 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5973 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5974 tprintf("final mv:%d %d\n", mx, my);
5976 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5977 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5979 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5980 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5985 assert(IS_8X16(mb_type));
5986 for(list=0; list<2; list++){
5987 if(h->ref_count[list]>0){
5989 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5990 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5991 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5993 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5997 for(list=0; list<2; list++){
5999 if(IS_DIR(mb_type, i, list)){
6000 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6001 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6002 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6004 tprintf("final mv:%d %d\n", mx, my);
6005 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6006 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6008 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6009 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6016 if( IS_INTER( mb_type ) ) {
6017 h->chroma_pred_mode_table[mb_xy] = 0;
6018 write_back_motion( h, mb_type );
6021 if( !IS_INTRA16x16( mb_type ) ) {
6022 cbp = decode_cabac_mb_cbp_luma( h );
6023 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6026 h->cbp_table[mb_xy] = cbp;
6028 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6029 if( decode_cabac_mb_transform_size( h ) )
6030 mb_type |= MB_TYPE_8x8DCT;
6032 s->current_picture.mb_type[mb_xy]= mb_type;
6034 if( cbp || IS_INTRA16x16( mb_type ) ) {
6035 const uint8_t *scan, *dc_scan;
6038 if(IS_INTERLACED(mb_type)){
6039 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6040 dc_scan= luma_dc_field_scan;
6042 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6043 dc_scan= luma_dc_zigzag_scan;
6046 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6047 if( dqp == INT_MIN ){
6048 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6052 if(((unsigned)s->qscale) > 51){
6053 if(s->qscale<0) s->qscale+= 52;
6054 else s->qscale-= 52;
6056 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6058 if( IS_INTRA16x16( mb_type ) ) {
6060 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6061 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, h->dequant4_coeff[s->qscale], 16) < 0)
6064 for( i = 0; i < 16; i++ ) {
6065 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6066 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[s->qscale], 15) < 0 )
6070 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6074 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6075 if( cbp & (1<<i8x8) ) {
6076 if( IS_8x8DCT(mb_type) ) {
6077 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6078 zigzag_scan8x8, h->dequant8_coeff[s->qscale], 64) < 0 )
6083 h->mb[64*i8x8+i] = (h->mb[64*i8x8+i] + 2) >> 2;
6086 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6087 const int index = 4*i8x8 + i4x4;
6088 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6089 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[s->qscale], 16) < 0 )
6093 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6094 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6101 for( c = 0; c < 2; c++ ) {
6102 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6103 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, h->dequant4_coeff[h->chroma_qp], 4) < 0)
6110 for( c = 0; c < 2; c++ ) {
6111 for( i = 0; i < 4; i++ ) {
6112 const int index = 16 + 4 * c + i;
6113 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6114 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[h->chroma_qp], 15) < 0)
6119 uint8_t * const nnz= &h->non_zero_count_cache[0];
6120 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6121 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6124 uint8_t * const nnz= &h->non_zero_count_cache[0];
6125 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6126 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6127 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6130 s->current_picture.qscale_table[mb_xy]= s->qscale;
6131 write_back_non_zero_count(h);
6137 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6139 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6140 const int alpha = alpha_table[index_a];
6141 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6146 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6147 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6149 /* 16px edge length, because bS=4 is triggered by being at
6150 * the edge of an intra MB, so all 4 bS are the same */
6151 for( d = 0; d < 16; d++ ) {
6152 const int p0 = pix[-1];
6153 const int p1 = pix[-2];
6154 const int p2 = pix[-3];
6156 const int q0 = pix[0];
6157 const int q1 = pix[1];
6158 const int q2 = pix[2];
6160 if( ABS( p0 - q0 ) < alpha &&
6161 ABS( p1 - p0 ) < beta &&
6162 ABS( q1 - q0 ) < beta ) {
6164 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6165 if( ABS( p2 - p0 ) < beta)
6167 const int p3 = pix[-4];
6169 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6170 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6171 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6174 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6176 if( ABS( q2 - q0 ) < beta)
6178 const int q3 = pix[3];
6180 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6181 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6182 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6185 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6189 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6190 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6192 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6198 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6200 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6201 const int alpha = alpha_table[index_a];
6202 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6207 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6208 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6210 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6214 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6216 for( i = 0; i < 16; i++, pix += stride) {
6222 int bS_index = (i >> 1);
6223 if (h->mb_field_decoding_flag) {
6225 bS_index |= (i & 1);
6228 if( bS[bS_index] == 0 ) {
6232 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6233 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6234 alpha = alpha_table[index_a];
6235 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6238 if( bS[bS_index] < 4 ) {
6239 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6240 /* 4px edge length */
6241 const int p0 = pix[-1];
6242 const int p1 = pix[-2];
6243 const int p2 = pix[-3];
6244 const int q0 = pix[0];
6245 const int q1 = pix[1];
6246 const int q2 = pix[2];
6248 if( ABS( p0 - q0 ) < alpha &&
6249 ABS( p1 - p0 ) < beta &&
6250 ABS( q1 - q0 ) < beta ) {
6254 if( ABS( p2 - p0 ) < beta ) {
6255 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6258 if( ABS( q2 - q0 ) < beta ) {
6259 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6263 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6264 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6265 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6266 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6269 /* 4px edge length */
6270 const int p0 = pix[-1];
6271 const int p1 = pix[-2];
6272 const int p2 = pix[-3];
6274 const int q0 = pix[0];
6275 const int q1 = pix[1];
6276 const int q2 = pix[2];
6278 if( ABS( p0 - q0 ) < alpha &&
6279 ABS( p1 - p0 ) < beta &&
6280 ABS( q1 - q0 ) < beta ) {
6282 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6283 if( ABS( p2 - p0 ) < beta)
6285 const int p3 = pix[-4];
6287 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6288 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6289 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6292 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6294 if( ABS( q2 - q0 ) < beta)
6296 const int q3 = pix[3];
6298 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6299 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6300 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6303 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6307 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6308 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6310 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6315 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp[2] ) {
6317 for( i = 0; i < 8; i++, pix += stride) {
6325 if( bS[bS_index] == 0 ) {
6329 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6330 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6331 alpha = alpha_table[index_a];
6332 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6333 if( bS[bS_index] < 4 ) {
6334 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6335 /* 2px edge length (because we use same bS than the one for luma) */
6336 const int p0 = pix[-1];
6337 const int p1 = pix[-2];
6338 const int q0 = pix[0];
6339 const int q1 = pix[1];
6341 if( ABS( p0 - q0 ) < alpha &&
6342 ABS( p1 - p0 ) < beta &&
6343 ABS( q1 - q0 ) < beta ) {
6344 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6346 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6347 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6348 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6351 const int p0 = pix[-1];
6352 const int p1 = pix[-2];
6353 const int q0 = pix[0];
6354 const int q1 = pix[1];
6356 if( ABS( p0 - q0 ) < alpha &&
6357 ABS( p1 - p0 ) < beta &&
6358 ABS( q1 - q0 ) < beta ) {
6360 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6361 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6362 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6368 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6370 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6371 const int alpha = alpha_table[index_a];
6372 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6373 const int pix_next = stride;
6378 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6379 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6381 /* 16px edge length, see filter_mb_edgev */
6382 for( d = 0; d < 16; d++ ) {
6383 const int p0 = pix[-1*pix_next];
6384 const int p1 = pix[-2*pix_next];
6385 const int p2 = pix[-3*pix_next];
6386 const int q0 = pix[0];
6387 const int q1 = pix[1*pix_next];
6388 const int q2 = pix[2*pix_next];
6390 if( ABS( p0 - q0 ) < alpha &&
6391 ABS( p1 - p0 ) < beta &&
6392 ABS( q1 - q0 ) < beta ) {
6394 const int p3 = pix[-4*pix_next];
6395 const int q3 = pix[ 3*pix_next];
6397 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6398 if( ABS( p2 - p0 ) < beta) {
6400 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6401 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6402 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6405 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6407 if( ABS( q2 - q0 ) < beta) {
6409 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6410 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6411 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6414 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6418 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6419 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6421 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6428 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6430 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6431 const int alpha = alpha_table[index_a];
6432 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6437 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6438 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6440 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6444 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6445 MpegEncContext * const s = &h->s;
6446 const int mb_xy= mb_x + mb_y*s->mb_stride;
6447 int first_vertical_edge_done = 0;
6449 /* FIXME: A given frame may occupy more than one position in
6450 * the reference list. So ref2frm should be populated with
6451 * frame numbers, not indices. */
6452 static const int ref2frm[18] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6455 // left mb is in picture
6456 && h->slice_table[mb_xy-1] != 255
6457 // and current and left pair do not have the same interlaced type
6458 && (IS_INTERLACED(s->current_picture.mb_type[mb_xy]) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6459 // and left mb is in the same slice if deblocking_filter == 2
6460 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6461 /* First vertical edge is different in MBAFF frames
6462 * There are 8 different bS to compute and 2 different Qp
6469 first_vertical_edge_done = 1;
6470 for( i = 0; i < 8; i++ ) {
6472 int b_idx= 8 + 4 + 8*y;
6473 int bn_idx= b_idx - 1;
6475 int mbn_xy = h->mb_field_decoding_flag ? h->left_mb_xy[i>>2] : h->left_mb_xy[i&1];
6477 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6478 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6480 } else if( h->non_zero_count_cache[b_idx] != 0 ||
6481 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6482 h->non_zero_count_cache[bn_idx] != 0 ) {
6487 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6488 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6489 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6490 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6497 if(bS[0]+bS[1]+bS[2]+bS[3] != 0) {
6498 // Do not use s->qscale as luma quantizer because it has not the same
6499 // value in IPCM macroblocks.
6500 qp[0] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[0]] + 1 ) >> 1;
6501 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6502 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[0]] ) + 1 ) >> 1;
6503 qp[1] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[1]] + 1 ) >> 1;
6504 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6505 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[1]] ) + 1 ) >> 1;
6508 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
6509 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6510 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6511 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
6512 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
6515 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6516 for( dir = 0; dir < 2; dir++ )
6519 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6520 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6522 if (first_vertical_edge_done) {
6524 first_vertical_edge_done = 0;
6527 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6531 for( edge = start; edge < 4; edge++ ) {
6532 /* mbn_xy: neighbor macroblock */
6533 int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6537 if( (edge&1) && IS_8x8DCT(s->current_picture.mb_type[mb_xy]) )
6540 if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0)
6541 && !IS_INTERLACED(s->current_picture.mb_type[mb_xy])
6542 && IS_INTERLACED(s->current_picture.mb_type[mbn_xy])
6544 // This is a special case in the norm where the filtering must
6545 // be done twice (one each of the field) even if we are in a
6546 // frame macroblock.
6548 unsigned int tmp_linesize = 2 * linesize;
6549 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6550 int mbn_xy = mb_xy - 2 * s->mb_stride;
6554 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6555 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6556 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6559 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6562 // Do not use s->qscale as luma quantizer because it has not the same
6563 // value in IPCM macroblocks.
6564 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6565 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6566 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6567 filter_mb_edgeh( h, &img_y[0], tmp_linesize, bS, qp );
6568 chroma_qp = ( h->chroma_qp +
6569 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6570 filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp );
6571 filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp );
6574 mbn_xy += s->mb_stride;
6575 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6576 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6577 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6580 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6583 // Do not use s->qscale as luma quantizer because it has not the same
6584 // value in IPCM macroblocks.
6585 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6586 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6587 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6588 filter_mb_edgeh( h, &img_y[linesize], tmp_linesize, bS, qp );
6589 chroma_qp = ( h->chroma_qp +
6590 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6591 filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6592 filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6595 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6596 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6599 if ( (!IS_INTERLACED(s->current_picture.mb_type[mb_xy]) && !IS_INTERLACED(s->current_picture.mb_type[mbm_xy]))
6600 || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6609 bS[0] = bS[1] = bS[2] = bS[3] = value;
6612 for( i = 0; i < 4; i++ ) {
6613 int x = dir == 0 ? edge : i;
6614 int y = dir == 0 ? i : edge;
6615 int b_idx= 8 + 4 + x + 8*y;
6616 int bn_idx= b_idx - (dir ? 8:1);
6618 if( h->non_zero_count_cache[b_idx] != 0 ||
6619 h->non_zero_count_cache[bn_idx] != 0 ) {
6626 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6627 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6628 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6629 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6637 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6642 // Do not use s->qscale as luma quantizer because it has not the same
6643 // value in IPCM macroblocks.
6644 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6645 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6646 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6647 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6649 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6650 if( (edge&1) == 0 ) {
6651 int chroma_qp = ( h->chroma_qp +
6652 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6653 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
6654 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
6657 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6658 if( (edge&1) == 0 ) {
6659 int chroma_qp = ( h->chroma_qp +
6660 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6661 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6662 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6669 static int decode_slice(H264Context *h){
6670 MpegEncContext * const s = &h->s;
6671 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6675 if( h->pps.cabac ) {
6679 align_get_bits( &s->gb );
6682 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
6683 ff_init_cabac_decoder( &h->cabac,
6684 s->gb.buffer + get_bits_count(&s->gb)/8,
6685 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6686 /* calculate pre-state */
6687 for( i= 0; i < 460; i++ ) {
6689 if( h->slice_type == I_TYPE )
6690 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6692 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6695 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6697 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6701 int ret = decode_mb_cabac(h);
6704 if(ret>=0) hl_decode_mb(h);
6706 /* XXX: useless as decode_mb_cabac it doesn't support that ... */
6707 if( ret >= 0 && h->mb_aff_frame ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6710 if(ret>=0) ret = decode_mb_cabac(h);
6712 if(ret>=0) hl_decode_mb(h);
6715 eos = get_cabac_terminate( &h->cabac );
6717 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
6718 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6719 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6723 if( ++s->mb_x >= s->mb_width ) {
6725 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6727 if(h->mb_aff_frame) {
6732 if( eos || s->mb_y >= s->mb_height ) {
6733 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6734 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6741 int ret = decode_mb_cavlc(h);
6743 if(ret>=0) hl_decode_mb(h);
6745 if(ret>=0 && h->mb_aff_frame){ //FIXME optimal? or let mb_decode decode 16x32 ?
6747 ret = decode_mb_cavlc(h);
6749 if(ret>=0) hl_decode_mb(h);
6754 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6755 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6760 if(++s->mb_x >= s->mb_width){
6762 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6764 if(h->mb_aff_frame) {
6767 if(s->mb_y >= s->mb_height){
6768 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6770 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6771 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6775 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6782 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6783 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6784 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6785 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6789 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6798 for(;s->mb_y < s->mb_height; s->mb_y++){
6799 for(;s->mb_x < s->mb_width; s->mb_x++){
6800 int ret= decode_mb(h);
6805 fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6806 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6811 if(++s->mb_x >= s->mb_width){
6813 if(++s->mb_y >= s->mb_height){
6814 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6815 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6819 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6826 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6827 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6828 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6832 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6839 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6842 return -1; //not reached
6845 static int decode_unregistered_user_data(H264Context *h, int size){
6846 MpegEncContext * const s = &h->s;
6847 uint8_t user_data[16+256];
6853 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6854 user_data[i]= get_bits(&s->gb, 8);
6858 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6859 if(e==1 && build>=0)
6860 h->x264_build= build;
6862 if(s->avctx->debug & FF_DEBUG_BUGS)
6863 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6866 skip_bits(&s->gb, 8);
6871 static int decode_sei(H264Context *h){
6872 MpegEncContext * const s = &h->s;
6874 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6879 type+= show_bits(&s->gb, 8);
6880 }while(get_bits(&s->gb, 8) == 255);
6884 size+= show_bits(&s->gb, 8);
6885 }while(get_bits(&s->gb, 8) == 255);
6889 if(decode_unregistered_user_data(h, size) < 0);
6893 skip_bits(&s->gb, 8*size);
6896 //FIXME check bits here
6897 align_get_bits(&s->gb);
6903 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6904 MpegEncContext * const s = &h->s;
6906 cpb_count = get_ue_golomb(&s->gb) + 1;
6907 get_bits(&s->gb, 4); /* bit_rate_scale */
6908 get_bits(&s->gb, 4); /* cpb_size_scale */
6909 for(i=0; i<cpb_count; i++){
6910 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6911 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6912 get_bits1(&s->gb); /* cbr_flag */
6914 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6915 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6916 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6917 get_bits(&s->gb, 5); /* time_offset_length */
6920 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6921 MpegEncContext * const s = &h->s;
6922 int aspect_ratio_info_present_flag, aspect_ratio_idc;
6923 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6925 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6927 if( aspect_ratio_info_present_flag ) {
6928 aspect_ratio_idc= get_bits(&s->gb, 8);
6929 if( aspect_ratio_idc == EXTENDED_SAR ) {
6930 sps->sar.num= get_bits(&s->gb, 16);
6931 sps->sar.den= get_bits(&s->gb, 16);
6932 }else if(aspect_ratio_idc < 16){
6933 sps->sar= pixel_aspect[aspect_ratio_idc];
6935 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6942 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6944 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6945 get_bits1(&s->gb); /* overscan_appropriate_flag */
6948 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6949 get_bits(&s->gb, 3); /* video_format */
6950 get_bits1(&s->gb); /* video_full_range_flag */
6951 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6952 get_bits(&s->gb, 8); /* colour_primaries */
6953 get_bits(&s->gb, 8); /* transfer_characteristics */
6954 get_bits(&s->gb, 8); /* matrix_coefficients */
6958 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6959 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6960 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6963 sps->timing_info_present_flag = get_bits1(&s->gb);
6964 if(sps->timing_info_present_flag){
6965 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6966 sps->time_scale = get_bits_long(&s->gb, 32);
6967 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6970 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6971 if(nal_hrd_parameters_present_flag)
6972 decode_hrd_parameters(h, sps);
6973 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6974 if(vcl_hrd_parameters_present_flag)
6975 decode_hrd_parameters(h, sps);
6976 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6977 get_bits1(&s->gb); /* low_delay_hrd_flag */
6978 get_bits1(&s->gb); /* pic_struct_present_flag */
6980 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6981 if(sps->bitstream_restriction_flag){
6982 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6983 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6984 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6985 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6986 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6987 sps->num_reorder_frames = get_ue_golomb(&s->gb);
6988 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
6994 static inline int decode_seq_parameter_set(H264Context *h){
6995 MpegEncContext * const s = &h->s;
6996 int profile_idc, level_idc;
7000 profile_idc= get_bits(&s->gb, 8);
7001 get_bits1(&s->gb); //constraint_set0_flag
7002 get_bits1(&s->gb); //constraint_set1_flag
7003 get_bits1(&s->gb); //constraint_set2_flag
7004 get_bits1(&s->gb); //constraint_set3_flag
7005 get_bits(&s->gb, 4); // reserved
7006 level_idc= get_bits(&s->gb, 8);
7007 sps_id= get_ue_golomb(&s->gb);
7009 sps= &h->sps_buffer[ sps_id ];
7010 sps->profile_idc= profile_idc;
7011 sps->level_idc= level_idc;
7013 if(sps->profile_idc >= 100){ //high profile
7014 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7015 get_bits1(&s->gb); //residual_color_transform_flag
7016 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7017 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7018 sps->transform_bypass = get_bits1(&s->gb);
7019 if(get_bits1(&s->gb)){ //seq_scaling_matrix_present_flag
7020 av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n");
7025 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7026 sps->poc_type= get_ue_golomb(&s->gb);
7028 if(sps->poc_type == 0){ //FIXME #define
7029 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7030 } else if(sps->poc_type == 1){//FIXME #define
7031 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7032 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7033 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7034 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7036 for(i=0; i<sps->poc_cycle_length; i++)
7037 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7039 if(sps->poc_type > 2){
7040 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7044 sps->ref_frame_count= get_ue_golomb(&s->gb);
7045 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7046 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7048 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7049 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7050 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7051 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7052 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7055 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7056 if(!sps->frame_mbs_only_flag)
7057 sps->mb_aff= get_bits1(&s->gb);
7061 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7063 sps->crop= get_bits1(&s->gb);
7065 sps->crop_left = get_ue_golomb(&s->gb);
7066 sps->crop_right = get_ue_golomb(&s->gb);
7067 sps->crop_top = get_ue_golomb(&s->gb);
7068 sps->crop_bottom= get_ue_golomb(&s->gb);
7069 if(sps->crop_left || sps->crop_top){
7070 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7076 sps->crop_bottom= 0;
7079 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7080 if( sps->vui_parameters_present_flag )
7081 decode_vui_parameters(h, sps);
7083 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7084 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7085 sps_id, sps->profile_idc, sps->level_idc,
7087 sps->ref_frame_count,
7088 sps->mb_width, sps->mb_height,
7089 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7090 sps->direct_8x8_inference_flag ? "8B8" : "",
7091 sps->crop_left, sps->crop_right,
7092 sps->crop_top, sps->crop_bottom,
7093 sps->vui_parameters_present_flag ? "VUI" : ""
7099 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7100 MpegEncContext * const s = &h->s;
7101 int pps_id= get_ue_golomb(&s->gb);
7102 PPS *pps= &h->pps_buffer[pps_id];
7104 pps->sps_id= get_ue_golomb(&s->gb);
7105 pps->cabac= get_bits1(&s->gb);
7106 pps->pic_order_present= get_bits1(&s->gb);
7107 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7108 if(pps->slice_group_count > 1 ){
7109 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7110 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7111 switch(pps->mb_slice_group_map_type){
7114 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7115 | run_length[ i ] |1 |ue(v) |
7120 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7122 | top_left_mb[ i ] |1 |ue(v) |
7123 | bottom_right_mb[ i ] |1 |ue(v) |
7131 | slice_group_change_direction_flag |1 |u(1) |
7132 | slice_group_change_rate_minus1 |1 |ue(v) |
7137 | slice_group_id_cnt_minus1 |1 |ue(v) |
7138 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7140 | slice_group_id[ i ] |1 |u(v) |
7145 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7146 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7147 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7148 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7152 pps->weighted_pred= get_bits1(&s->gb);
7153 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7154 pps->init_qp= get_se_golomb(&s->gb) + 26;
7155 pps->init_qs= get_se_golomb(&s->gb) + 26;
7156 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7157 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7158 pps->constrained_intra_pred= get_bits1(&s->gb);
7159 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7161 if(get_bits_count(&s->gb) < bit_length){
7162 pps->transform_8x8_mode= get_bits1(&s->gb);
7163 if(get_bits1(&s->gb)){ //pic_scaling_matrix_present_flag
7164 av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n");
7167 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7170 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7171 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7172 pps_id, pps->sps_id,
7173 pps->cabac ? "CABAC" : "CAVLC",
7174 pps->slice_group_count,
7175 pps->ref_count[0], pps->ref_count[1],
7176 pps->weighted_pred ? "weighted" : "",
7177 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7178 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7179 pps->constrained_intra_pred ? "CONSTR" : "",
7180 pps->redundant_pic_cnt_present ? "REDU" : "",
7181 pps->transform_8x8_mode ? "8x8DCT" : ""
7189 * finds the end of the current frame in the bitstream.
7190 * @return the position of the first byte of the next frame, or -1
7192 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7195 ParseContext *pc = &(h->s.parse_context);
7196 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7197 // mb_addr= pc->mb_addr - 1;
7199 for(i=0; i<=buf_size; i++){
7200 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7201 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7202 if(pc->frame_start_found){
7203 // If there isn't one more byte in the buffer
7204 // the test on first_mb_in_slice cannot be done yet
7205 // do it at next call.
7206 if (i >= buf_size) break;
7207 if (buf[i] & 0x80) {
7208 // first_mb_in_slice is 0, probably the first nal of a new
7210 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7212 pc->frame_start_found= 0;
7216 pc->frame_start_found = 1;
7218 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7219 if(pc->frame_start_found){
7221 pc->frame_start_found= 0;
7226 state= (state<<8) | buf[i];
7230 return END_NOT_FOUND;
7233 static int h264_parse(AVCodecParserContext *s,
7234 AVCodecContext *avctx,
7235 uint8_t **poutbuf, int *poutbuf_size,
7236 const uint8_t *buf, int buf_size)
7238 H264Context *h = s->priv_data;
7239 ParseContext *pc = &h->s.parse_context;
7242 next= find_frame_end(h, buf, buf_size);
7244 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
7250 *poutbuf = (uint8_t *)buf;
7251 *poutbuf_size = buf_size;
7255 static int h264_split(AVCodecContext *avctx,
7256 const uint8_t *buf, int buf_size)
7259 uint32_t state = -1;
7262 for(i=0; i<=buf_size; i++){
7263 if((state&0xFFFFFF1F) == 0x107)
7265 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7267 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
7269 while(i>4 && buf[i-5]==0) i--;
7274 state= (state<<8) | buf[i];
7280 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7281 MpegEncContext * const s = &h->s;
7282 AVCodecContext * const avctx= s->avctx;
7286 for(i=0; i<50; i++){
7287 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7291 s->current_picture_ptr= NULL;
7300 if(buf_index >= buf_size) break;
7302 for(i = 0; i < h->nal_length_size; i++)
7303 nalsize = (nalsize << 8) | buf[buf_index++];
7305 // start code prefix search
7306 for(; buf_index + 3 < buf_size; buf_index++){
7307 // this should allways succeed in the first iteration
7308 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7312 if(buf_index+3 >= buf_size) break;
7317 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7318 if(ptr[dst_length - 1] == 0) dst_length--;
7319 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
7321 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7322 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7325 if (h->is_avc && (nalsize != consumed))
7326 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7328 buf_index += consumed;
7330 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
7331 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7334 switch(h->nal_unit_type){
7336 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7338 init_get_bits(&s->gb, ptr, bit_length);
7340 h->inter_gb_ptr= &s->gb;
7341 s->data_partitioning = 0;
7343 if(decode_slice_header(h) < 0){
7344 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7347 if(h->redundant_pic_count==0 && s->hurry_up < 5
7348 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7349 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7350 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7351 && avctx->skip_frame < AVDISCARD_ALL)
7355 init_get_bits(&s->gb, ptr, bit_length);
7357 h->inter_gb_ptr= NULL;
7358 s->data_partitioning = 1;
7360 if(decode_slice_header(h) < 0){
7361 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7365 init_get_bits(&h->intra_gb, ptr, bit_length);
7366 h->intra_gb_ptr= &h->intra_gb;
7369 init_get_bits(&h->inter_gb, ptr, bit_length);
7370 h->inter_gb_ptr= &h->inter_gb;
7372 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
7374 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7375 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7376 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7377 && avctx->skip_frame < AVDISCARD_ALL)
7381 init_get_bits(&s->gb, ptr, bit_length);
7385 init_get_bits(&s->gb, ptr, bit_length);
7386 decode_seq_parameter_set(h);
7388 if(s->flags& CODEC_FLAG_LOW_DELAY)
7391 if(avctx->has_b_frames < 2)
7392 avctx->has_b_frames= !s->low_delay;
7395 init_get_bits(&s->gb, ptr, bit_length);
7397 decode_picture_parameter_set(h, bit_length);
7400 case NAL_PICTURE_DELIMITER:
7402 case NAL_FILTER_DATA:
7405 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
7409 if(!s->current_picture_ptr) return buf_index; //no frame
7411 s->current_picture_ptr->pict_type= s->pict_type;
7412 s->current_picture_ptr->key_frame= s->pict_type == I_TYPE && h->nal_unit_type == NAL_IDR_SLICE;
7414 h->prev_frame_num_offset= h->frame_num_offset;
7415 h->prev_frame_num= h->frame_num;
7416 if(s->current_picture_ptr->reference){
7417 h->prev_poc_msb= h->poc_msb;
7418 h->prev_poc_lsb= h->poc_lsb;
7420 if(s->current_picture_ptr->reference)
7421 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7431 * returns the number of bytes consumed for building the current frame
7433 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7434 if(s->flags&CODEC_FLAG_TRUNCATED){
7435 pos -= s->parse_context.last_index;
7436 if(pos<0) pos=0; // FIXME remove (unneeded?)
7440 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
7441 if(pos+10>buf_size) pos=buf_size; // oops ;)
7447 static int decode_frame(AVCodecContext *avctx,
7448 void *data, int *data_size,
7449 uint8_t *buf, int buf_size)
7451 H264Context *h = avctx->priv_data;
7452 MpegEncContext *s = &h->s;
7453 AVFrame *pict = data;
7456 s->flags= avctx->flags;
7457 s->flags2= avctx->flags2;
7459 /* no supplementary picture */
7460 if (buf_size == 0) {
7464 if(s->flags&CODEC_FLAG_TRUNCATED){
7465 int next= find_frame_end(h, buf, buf_size);
7467 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7469 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7472 if(h->is_avc && !h->got_avcC) {
7473 int i, cnt, nalsize;
7474 unsigned char *p = avctx->extradata;
7475 if(avctx->extradata_size < 7) {
7476 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7480 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7483 /* sps and pps in the avcC always have length coded with 2 bytes,
7484 so put a fake nal_length_size = 2 while parsing them */
7485 h->nal_length_size = 2;
7486 // Decode sps from avcC
7487 cnt = *(p+5) & 0x1f; // Number of sps
7489 for (i = 0; i < cnt; i++) {
7490 nalsize = BE_16(p) + 2;
7491 if(decode_nal_units(h, p, nalsize) < 0) {
7492 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7497 // Decode pps from avcC
7498 cnt = *(p++); // Number of pps
7499 for (i = 0; i < cnt; i++) {
7500 nalsize = BE_16(p) + 2;
7501 if(decode_nal_units(h, p, nalsize) != nalsize) {
7502 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7507 // Now store right nal length size, that will be use to parse all other nals
7508 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7509 // Do not reparse avcC
7513 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
7514 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7518 buf_index=decode_nal_units(h, buf, buf_size);
7522 //FIXME do something with unavailable reference frames
7524 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
7525 if(!s->current_picture_ptr){
7526 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
7531 Picture *out = s->current_picture_ptr;
7532 #if 0 //decode order
7533 *data_size = sizeof(AVFrame);
7535 /* Sort B-frames into display order */
7536 Picture *cur = s->current_picture_ptr;
7537 Picture *prev = h->delayed_output_pic;
7542 int dropped_frame = 0;
7545 if(h->sps.bitstream_restriction_flag
7546 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7547 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7551 while(h->delayed_pic[pics]) pics++;
7552 h->delayed_pic[pics++] = cur;
7553 if(cur->reference == 0)
7556 for(i=0; h->delayed_pic[i]; i++)
7557 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7560 out = h->delayed_pic[0];
7561 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7562 if(h->delayed_pic[i]->poc < out->poc){
7563 out = h->delayed_pic[i];
7567 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7568 if(prev && pics <= s->avctx->has_b_frames)
7570 else if((out_of_order && pics-1 == s->avctx->has_b_frames)
7572 ((!cross_idr && prev && out->poc > prev->poc + 2)
7573 || cur->pict_type == B_TYPE)))
7576 s->avctx->has_b_frames++;
7579 else if(out_of_order)
7582 if(out_of_order || pics > s->avctx->has_b_frames){
7583 dropped_frame = (out != h->delayed_pic[out_idx]);
7584 for(i=out_idx; h->delayed_pic[i]; i++)
7585 h->delayed_pic[i] = h->delayed_pic[i+1];
7588 if(prev == out && !dropped_frame)
7591 *data_size = sizeof(AVFrame);
7592 if(prev && prev != out && prev->reference == 1)
7593 prev->reference = 0;
7594 h->delayed_output_pic = out;
7597 *pict= *(AVFrame*)out;
7600 assert(pict->data[0]);
7601 ff_print_debug_info(s, pict);
7602 //printf("out %d\n", (int)pict->data[0]);
7605 /* Return the Picture timestamp as the frame number */
7606 /* we substract 1 because it is added on utils.c */
7607 avctx->frame_number = s->picture_number - 1;
7609 return get_consumed_bytes(s, buf_index, buf_size);
7612 static inline void fill_mb_avail(H264Context *h){
7613 MpegEncContext * const s = &h->s;
7614 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7617 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7618 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7619 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7625 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7626 h->mb_avail[4]= 1; //FIXME move out
7627 h->mb_avail[5]= 0; //FIXME move out
7633 #define SIZE (COUNT*40)
7639 // int int_temp[10000];
7641 AVCodecContext avctx;
7643 dsputil_init(&dsp, &avctx);
7645 init_put_bits(&pb, temp, SIZE);
7646 printf("testing unsigned exp golomb\n");
7647 for(i=0; i<COUNT; i++){
7649 set_ue_golomb(&pb, i);
7650 STOP_TIMER("set_ue_golomb");
7652 flush_put_bits(&pb);
7654 init_get_bits(&gb, temp, 8*SIZE);
7655 for(i=0; i<COUNT; i++){
7658 s= show_bits(&gb, 24);
7661 j= get_ue_golomb(&gb);
7663 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7666 STOP_TIMER("get_ue_golomb");
7670 init_put_bits(&pb, temp, SIZE);
7671 printf("testing signed exp golomb\n");
7672 for(i=0; i<COUNT; i++){
7674 set_se_golomb(&pb, i - COUNT/2);
7675 STOP_TIMER("set_se_golomb");
7677 flush_put_bits(&pb);
7679 init_get_bits(&gb, temp, 8*SIZE);
7680 for(i=0; i<COUNT; i++){
7683 s= show_bits(&gb, 24);
7686 j= get_se_golomb(&gb);
7687 if(j != i - COUNT/2){
7688 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7691 STOP_TIMER("get_se_golomb");
7694 printf("testing 4x4 (I)DCT\n");
7697 uint8_t src[16], ref[16];
7698 uint64_t error= 0, max_error=0;
7700 for(i=0; i<COUNT; i++){
7702 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7703 for(j=0; j<16; j++){
7704 ref[j]= random()%255;
7705 src[j]= random()%255;
7708 h264_diff_dct_c(block, src, ref, 4);
7711 for(j=0; j<16; j++){
7712 // printf("%d ", block[j]);
7713 block[j]= block[j]*4;
7714 if(j&1) block[j]= (block[j]*4 + 2)/5;
7715 if(j&4) block[j]= (block[j]*4 + 2)/5;
7719 s->dsp.h264_idct_add(ref, block, 4);
7720 /* for(j=0; j<16; j++){
7721 printf("%d ", ref[j]);
7725 for(j=0; j<16; j++){
7726 int diff= ABS(src[j] - ref[j]);
7729 max_error= FFMAX(max_error, diff);
7732 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7734 printf("testing quantizer\n");
7735 for(qp=0; qp<52; qp++){
7737 src1_block[i]= src2_block[i]= random()%255;
7741 printf("Testing NAL layer\n");
7743 uint8_t bitstream[COUNT];
7744 uint8_t nal[COUNT*2];
7746 memset(&h, 0, sizeof(H264Context));
7748 for(i=0; i<COUNT; i++){
7756 for(j=0; j<COUNT; j++){
7757 bitstream[j]= (random() % 255) + 1;
7760 for(j=0; j<zeros; j++){
7761 int pos= random() % COUNT;
7762 while(bitstream[pos] == 0){
7771 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7773 printf("encoding failed\n");
7777 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7781 if(out_length != COUNT){
7782 printf("incorrect length %d %d\n", out_length, COUNT);
7786 if(consumed != nal_length){
7787 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7791 if(memcmp(bitstream, out, COUNT)){
7792 printf("missmatch\n");
7797 printf("Testing RBSP\n");
7805 static int decode_end(AVCodecContext *avctx)
7807 H264Context *h = avctx->priv_data;
7808 MpegEncContext *s = &h->s;
7810 av_freep(&h->rbsp_buffer);
7811 free_tables(h); //FIXME cleanup init stuff perhaps
7814 // memset(h, 0, sizeof(H264Context));
7820 AVCodec h264_decoder = {
7824 sizeof(H264Context),
7829 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
7833 AVCodecParser h264_parser = {
7835 sizeof(H264Context),