2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
58 * Sequence parameter set
64 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
65 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
66 int poc_type; ///< pic_order_cnt_type
67 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
68 int delta_pic_order_always_zero_flag;
69 int offset_for_non_ref_pic;
70 int offset_for_top_to_bottom_field;
71 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
72 int ref_frame_count; ///< num_ref_frames
73 int gaps_in_frame_num_allowed_flag;
74 int mb_width; ///< frame_width_in_mbs_minus1 + 1
75 int mb_height; ///< frame_height_in_mbs_minus1 + 1
76 int frame_mbs_only_flag;
77 int mb_aff; ///<mb_adaptive_frame_field_flag
78 int direct_8x8_inference_flag;
79 int crop; ///< frame_cropping_flag
80 int crop_left; ///< frame_cropping_rect_left_offset
81 int crop_right; ///< frame_cropping_rect_right_offset
82 int crop_top; ///< frame_cropping_rect_top_offset
83 int crop_bottom; ///< frame_cropping_rect_bottom_offset
84 int vui_parameters_present_flag;
86 int timing_info_present_flag;
87 uint32_t num_units_in_tick;
89 int fixed_frame_rate_flag;
90 short offset_for_ref_frame[256]; //FIXME dyn aloc?
91 int bitstream_restriction_flag;
92 int num_reorder_frames;
96 * Picture parameter set
100 int cabac; ///< entropy_coding_mode_flag
101 int pic_order_present; ///< pic_order_present_flag
102 int slice_group_count; ///< num_slice_groups_minus1 + 1
103 int mb_slice_group_map_type;
104 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
105 int weighted_pred; ///< weighted_pred_flag
106 int weighted_bipred_idc;
107 int init_qp; ///< pic_init_qp_minus26 + 26
108 int init_qs; ///< pic_init_qs_minus26 + 26
109 int chroma_qp_index_offset;
110 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
111 int constrained_intra_pred; ///< constrained_intra_pred_flag
112 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
113 int transform_8x8_mode; ///< transform_8x8_mode_flag
117 * Memory management control operation opcode.
119 typedef enum MMCOOpcode{
130 * Memory management control operation.
141 typedef struct H264Context{
149 #define NAL_IDR_SLICE 5
153 #define NAL_PICTURE_DELIMITER 9
154 #define NAL_FILTER_DATA 10
155 uint8_t *rbsp_buffer;
156 int rbsp_buffer_size;
159 * Used to parse AVC variant of h264
161 int is_avc; ///< this flag is != 0 if codec is avc1
162 int got_avcC; ///< flag used to parse avcC data only once
163 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
167 int prev_mb_skipped; //FIXME remove (IMHO not used)
170 int chroma_pred_mode;
171 int intra16x16_pred_mode;
176 int8_t intra4x4_pred_mode_cache[5*8];
177 int8_t (*intra4x4_pred_mode)[8];
178 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
179 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
180 void (*pred8x8 [4+3])(uint8_t *src, int stride);
181 void (*pred16x16[4+3])(uint8_t *src, int stride);
182 unsigned int topleft_samples_available;
183 unsigned int top_samples_available;
184 unsigned int topright_samples_available;
185 unsigned int left_samples_available;
186 uint8_t (*top_borders[2])[16+2*8];
187 uint8_t left_border[2*(17+2*9)];
190 * non zero coeff count cache.
191 * is 64 if not available.
193 uint8_t non_zero_count_cache[6*8] __align8;
194 uint8_t (*non_zero_count)[16];
197 * Motion vector cache.
199 int16_t mv_cache[2][5*8][2] __align8;
200 int8_t ref_cache[2][5*8] __align8;
201 #define LIST_NOT_USED -1 //FIXME rename?
202 #define PART_NOT_AVAILABLE -2
205 * is 1 if the specific list MV&references are set to 0,0,-2.
207 int mv_cache_clean[2];
210 * number of neighbors (top and/or left) that used 8x8 dct
212 int neighbor_transform_size;
215 * block_offset[ 0..23] for frame macroblocks
216 * block_offset[24..47] for field macroblocks
218 int block_offset[2*(16+8)];
220 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
222 int b_stride; //FIXME use s->b4_stride
228 int unknown_svq3_flag;
229 int next_slice_index;
231 SPS sps_buffer[MAX_SPS_COUNT];
232 SPS sps; ///< current sps
234 PPS pps_buffer[MAX_PPS_COUNT];
238 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
240 uint16_t (*dequant4_coeff)[16]; // FIXME quant matrices should be per SPS or PPS
241 uint16_t (*dequant8_coeff)[64];
244 uint8_t *slice_table_base;
245 uint8_t *slice_table; ///< slice_table_base + mb_stride + 1
247 int slice_type_fixed;
249 //interlacing specific flags
251 int mb_field_decoding_flag;
258 int delta_poc_bottom;
261 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
262 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
263 int frame_num_offset; ///< for POC type 2
264 int prev_frame_num_offset; ///< for POC type 2
265 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
268 * frame_num for frames or 2*frame_num for field pics.
273 * max_frame_num or 2*max_frame_num for field pics.
277 //Weighted pred stuff
279 int use_weight_chroma;
280 int luma_log2_weight_denom;
281 int chroma_log2_weight_denom;
282 int luma_weight[2][16];
283 int luma_offset[2][16];
284 int chroma_weight[2][16][2];
285 int chroma_offset[2][16][2];
286 int implicit_weight[16][16];
289 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
290 int slice_alpha_c0_offset;
291 int slice_beta_offset;
293 int redundant_pic_count;
295 int direct_spatial_mv_pred;
296 int dist_scale_factor[16];
297 int map_col_to_list0[2][16];
300 * num_ref_idx_l0/1_active_minus1 + 1
302 int ref_count[2];// FIXME split for AFF
303 Picture *short_ref[32];
304 Picture *long_ref[32];
305 Picture default_ref_list[2][32];
306 Picture ref_list[2][32]; //FIXME size?
307 Picture field_ref_list[2][32]; //FIXME size?
308 Picture *delayed_pic[16]; //FIXME size?
309 Picture *delayed_output_pic;
312 * memory management control operations buffer.
314 MMCO mmco[MAX_MMCO_COUNT];
317 int long_ref_count; ///< number of actual long term references
318 int short_ref_count; ///< number of actual short term references
321 GetBitContext intra_gb;
322 GetBitContext inter_gb;
323 GetBitContext *intra_gb_ptr;
324 GetBitContext *inter_gb_ptr;
326 DCTELEM mb[16*24] __align8;
332 uint8_t cabac_state[460];
335 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
339 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
340 uint8_t *chroma_pred_mode_table;
341 int last_qscale_diff;
342 int16_t (*mvd_table[2])[2];
343 int16_t mvd_cache[2][5*8][2] __align8;
344 uint8_t *direct_table;
345 uint8_t direct_cache[5*8];
347 uint8_t zigzag_scan[16];
348 uint8_t field_scan[16];
349 const uint8_t *zigzag_scan_q0;
350 const uint8_t *field_scan_q0;
353 static VLC coeff_token_vlc[4];
354 static VLC chroma_dc_coeff_token_vlc;
356 static VLC total_zeros_vlc[15];
357 static VLC chroma_dc_total_zeros_vlc[3];
359 static VLC run_vlc[6];
362 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
363 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
364 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
366 static inline uint32_t pack16to32(int a, int b){
367 #ifdef WORDS_BIGENDIAN
368 return (b&0xFFFF) + (a<<16);
370 return (a&0xFFFF) + (b<<16);
376 * @param h height of the rectangle, should be a constant
377 * @param w width of the rectangle, should be a constant
378 * @param size the size of val (1 or 4), should be a constant
380 static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
381 uint8_t *p= (uint8_t*)vp;
382 assert(size==1 || size==4);
387 assert((((int)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
388 assert((stride&(w-1))==0);
389 //FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it
392 *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
393 }else if(w==2 && h==4){
394 *(uint16_t*)(p + 0*stride)=
395 *(uint16_t*)(p + 1*stride)=
396 *(uint16_t*)(p + 2*stride)=
397 *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
398 }else if(w==4 && h==1){
399 *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
400 }else if(w==4 && h==2){
401 *(uint32_t*)(p + 0*stride)=
402 *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
403 }else if(w==4 && h==4){
404 *(uint32_t*)(p + 0*stride)=
405 *(uint32_t*)(p + 1*stride)=
406 *(uint32_t*)(p + 2*stride)=
407 *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
408 }else if(w==8 && h==1){
410 *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
411 }else if(w==8 && h==2){
412 *(uint32_t*)(p + 0 + 0*stride)=
413 *(uint32_t*)(p + 4 + 0*stride)=
414 *(uint32_t*)(p + 0 + 1*stride)=
415 *(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101;
416 }else if(w==8 && h==4){
417 *(uint64_t*)(p + 0*stride)=
418 *(uint64_t*)(p + 1*stride)=
419 *(uint64_t*)(p + 2*stride)=
420 *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
421 }else if(w==16 && h==2){
422 *(uint64_t*)(p + 0+0*stride)=
423 *(uint64_t*)(p + 8+0*stride)=
424 *(uint64_t*)(p + 0+1*stride)=
425 *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
426 }else if(w==16 && h==4){
427 *(uint64_t*)(p + 0+0*stride)=
428 *(uint64_t*)(p + 8+0*stride)=
429 *(uint64_t*)(p + 0+1*stride)=
430 *(uint64_t*)(p + 8+1*stride)=
431 *(uint64_t*)(p + 0+2*stride)=
432 *(uint64_t*)(p + 8+2*stride)=
433 *(uint64_t*)(p + 0+3*stride)=
434 *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
439 static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
440 MpegEncContext * const s = &h->s;
441 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
442 int topleft_xy, top_xy, topright_xy, left_xy[2];
443 int topleft_type, top_type, topright_type, left_type[2];
447 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
448 // the actual condition is whether we're on the edge of a slice,
449 // and even then the intra and nnz parts are unnecessary.
450 if(for_deblock && h->slice_num == 1)
453 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
455 top_xy = mb_xy - s->mb_stride;
456 topleft_xy = top_xy - 1;
457 topright_xy= top_xy + 1;
458 left_xy[1] = left_xy[0] = mb_xy-1;
468 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
469 const int top_pair_xy = pair_xy - s->mb_stride;
470 const int topleft_pair_xy = top_pair_xy - 1;
471 const int topright_pair_xy = top_pair_xy + 1;
472 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
473 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
474 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
475 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
476 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
477 const int bottom = (s->mb_y & 1);
478 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
480 ? !curr_mb_frame_flag // bottom macroblock
481 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
483 top_xy -= s->mb_stride;
486 ? !curr_mb_frame_flag // bottom macroblock
487 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
489 topleft_xy -= s->mb_stride;
492 ? !curr_mb_frame_flag // bottom macroblock
493 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
495 topright_xy -= s->mb_stride;
497 if (left_mb_frame_flag != curr_mb_frame_flag) {
498 left_xy[1] = left_xy[0] = pair_xy - 1;
499 if (curr_mb_frame_flag) {
520 left_xy[1] += s->mb_stride;
533 h->top_mb_xy = top_xy;
534 h->left_mb_xy[0] = left_xy[0];
535 h->left_mb_xy[1] = left_xy[1];
537 topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
538 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
539 topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
540 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
541 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
543 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
544 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
545 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
546 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
547 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
550 if(IS_INTRA(mb_type)){
551 h->topleft_samples_available=
552 h->top_samples_available=
553 h->left_samples_available= 0xFFFF;
554 h->topright_samples_available= 0xEEEA;
556 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
557 h->topleft_samples_available= 0xB3FF;
558 h->top_samples_available= 0x33FF;
559 h->topright_samples_available= 0x26EA;
562 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
563 h->topleft_samples_available&= 0xDF5F;
564 h->left_samples_available&= 0x5F5F;
568 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
569 h->topleft_samples_available&= 0x7FFF;
571 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
572 h->topright_samples_available&= 0xFBFF;
574 if(IS_INTRA4x4(mb_type)){
575 if(IS_INTRA4x4(top_type)){
576 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
577 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
578 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
579 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
582 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
587 h->intra4x4_pred_mode_cache[4+8*0]=
588 h->intra4x4_pred_mode_cache[5+8*0]=
589 h->intra4x4_pred_mode_cache[6+8*0]=
590 h->intra4x4_pred_mode_cache[7+8*0]= pred;
593 if(IS_INTRA4x4(left_type[i])){
594 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
595 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
598 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
603 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
604 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
619 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
621 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
622 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
623 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
624 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
626 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
627 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
629 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
630 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
633 h->non_zero_count_cache[4+8*0]=
634 h->non_zero_count_cache[5+8*0]=
635 h->non_zero_count_cache[6+8*0]=
636 h->non_zero_count_cache[7+8*0]=
638 h->non_zero_count_cache[1+8*0]=
639 h->non_zero_count_cache[2+8*0]=
641 h->non_zero_count_cache[1+8*3]=
642 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
646 for (i=0; i<2; i++) {
648 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
649 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
650 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
651 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
653 h->non_zero_count_cache[3+8*1 + 2*8*i]=
654 h->non_zero_count_cache[3+8*2 + 2*8*i]=
655 h->non_zero_count_cache[0+8*1 + 8*i]=
656 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
663 h->top_cbp = h->cbp_table[top_xy];
664 } else if(IS_INTRA(mb_type)) {
671 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
672 } else if(IS_INTRA(mb_type)) {
678 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
681 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
686 //FIXME direct mb can skip much of this
687 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
689 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
690 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
691 /*if(!h->mv_cache_clean[list]){
692 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
693 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
694 h->mv_cache_clean[list]= 1;
698 h->mv_cache_clean[list]= 0;
700 if(IS_INTER(top_type)){
701 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
702 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
703 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
704 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
705 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
706 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
707 h->ref_cache[list][scan8[0] + 0 - 1*8]=
708 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
709 h->ref_cache[list][scan8[0] + 2 - 1*8]=
710 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
712 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
713 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
714 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
715 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
716 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
719 //FIXME unify cleanup or sth
720 if(IS_INTER(left_type[0])){
721 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
722 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
723 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
724 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
725 h->ref_cache[list][scan8[0] - 1 + 0*8]=
726 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
728 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
729 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
730 h->ref_cache[list][scan8[0] - 1 + 0*8]=
731 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
734 if(IS_INTER(left_type[1])){
735 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
736 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
737 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
738 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
739 h->ref_cache[list][scan8[0] - 1 + 2*8]=
740 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
742 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
743 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
744 h->ref_cache[list][scan8[0] - 1 + 2*8]=
745 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
746 assert((!left_type[0]) == (!left_type[1]));
749 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
752 if(IS_INTER(topleft_type)){
753 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
754 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
755 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
756 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
758 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
759 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
762 if(IS_INTER(topright_type)){
763 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
764 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
765 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
766 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
768 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
769 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
773 h->ref_cache[list][scan8[5 ]+1] =
774 h->ref_cache[list][scan8[7 ]+1] =
775 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
776 h->ref_cache[list][scan8[4 ]] =
777 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
778 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
779 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
780 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
781 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
782 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
785 /* XXX beurk, Load mvd */
786 if(IS_INTER(topleft_type)){
787 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
788 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy];
790 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0;
793 if(IS_INTER(top_type)){
794 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
795 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
796 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
797 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
798 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
800 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
801 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
802 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
803 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
805 if(IS_INTER(left_type[0])){
806 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
807 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
808 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
810 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
811 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
813 if(IS_INTER(left_type[1])){
814 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
815 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
816 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
818 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
819 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
821 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
822 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
823 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
824 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
825 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
827 if(h->slice_type == B_TYPE){
828 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
830 if(IS_DIRECT(top_type)){
831 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
832 }else if(IS_8X8(top_type)){
833 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
834 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
835 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
837 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
841 if(IS_DIRECT(left_type[0])){
842 h->direct_cache[scan8[0] - 1 + 0*8]=
843 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
844 }else if(IS_8X8(left_type[0])){
845 int b8_xy = h->mb2b8_xy[left_xy[0]] + 1;
846 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[b8_xy];
847 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[b8_xy + h->b8_stride];
849 h->direct_cache[scan8[0] - 1 + 0*8]=
850 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
858 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
861 static inline void write_back_intra_pred_mode(H264Context *h){
862 MpegEncContext * const s = &h->s;
863 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
865 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
866 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
867 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
868 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
869 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
870 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
871 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
875 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
877 static inline int check_intra4x4_pred_mode(H264Context *h){
878 MpegEncContext * const s = &h->s;
879 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
880 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
883 if(!(h->top_samples_available&0x8000)){
885 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
887 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
890 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
895 if(!(h->left_samples_available&0x8000)){
897 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
899 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
902 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
908 } //FIXME cleanup like next
911 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
913 static inline int check_intra_pred_mode(H264Context *h, int mode){
914 MpegEncContext * const s = &h->s;
915 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
916 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
918 if(mode < 0 || mode > 6) {
919 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
923 if(!(h->top_samples_available&0x8000)){
926 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
931 if(!(h->left_samples_available&0x8000)){
934 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
943 * gets the predicted intra4x4 prediction mode.
945 static inline int pred_intra_mode(H264Context *h, int n){
946 const int index8= scan8[n];
947 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
948 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
949 const int min= FFMIN(left, top);
951 tprintf("mode:%d %d min:%d\n", left ,top, min);
953 if(min<0) return DC_PRED;
957 static inline void write_back_non_zero_count(H264Context *h){
958 MpegEncContext * const s = &h->s;
959 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
961 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
962 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
963 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
964 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
965 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
966 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
967 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
969 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
970 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
971 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
973 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
974 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
975 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
979 * gets the predicted number of non zero coefficients.
980 * @param n block index
982 static inline int pred_non_zero_count(H264Context *h, int n){
983 const int index8= scan8[n];
984 const int left= h->non_zero_count_cache[index8 - 1];
985 const int top = h->non_zero_count_cache[index8 - 8];
988 if(i<64) i= (i+1)>>1;
990 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
995 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
996 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
998 if(topright_ref != PART_NOT_AVAILABLE){
999 *C= h->mv_cache[list][ i - 8 + part_width ];
1000 return topright_ref;
1002 tprintf("topright MV not available\n");
1004 *C= h->mv_cache[list][ i - 8 - 1 ];
1005 return h->ref_cache[list][ i - 8 - 1 ];
1010 * gets the predicted MV.
1011 * @param n the block index
1012 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1013 * @param mx the x component of the predicted motion vector
1014 * @param my the y component of the predicted motion vector
1016 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1017 const int index8= scan8[n];
1018 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1019 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1020 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1021 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1023 int diagonal_ref, match_count;
1025 assert(part_width==1 || part_width==2 || part_width==4);
1035 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1036 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1037 tprintf("pred_motion match_count=%d\n", match_count);
1038 if(match_count > 1){ //most common
1039 *mx= mid_pred(A[0], B[0], C[0]);
1040 *my= mid_pred(A[1], B[1], C[1]);
1041 }else if(match_count==1){
1045 }else if(top_ref==ref){
1053 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1057 *mx= mid_pred(A[0], B[0], C[0]);
1058 *my= mid_pred(A[1], B[1], C[1]);
1062 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1066 * gets the directionally predicted 16x8 MV.
1067 * @param n the block index
1068 * @param mx the x component of the predicted motion vector
1069 * @param my the y component of the predicted motion vector
1071 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1073 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1074 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1076 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1084 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1085 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1087 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1089 if(left_ref == ref){
1097 pred_motion(h, n, 4, list, ref, mx, my);
1101 * gets the directionally predicted 8x16 MV.
1102 * @param n the block index
1103 * @param mx the x component of the predicted motion vector
1104 * @param my the y component of the predicted motion vector
1106 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1108 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1109 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1111 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1113 if(left_ref == ref){
1122 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1124 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1126 if(diagonal_ref == ref){
1134 pred_motion(h, n, 2, list, ref, mx, my);
1137 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1138 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1139 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1141 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1143 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1144 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1145 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1151 pred_motion(h, 0, 4, 0, 0, mx, my);
1156 static inline void direct_dist_scale_factor(H264Context * const h){
1157 const int poc = h->s.current_picture_ptr->poc;
1158 const int poc1 = h->ref_list[1][0].poc;
1160 for(i=0; i<h->ref_count[0]; i++){
1161 int poc0 = h->ref_list[0][i].poc;
1162 int td = clip(poc1 - poc0, -128, 127);
1163 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1164 h->dist_scale_factor[i] = 256;
1166 int tb = clip(poc - poc0, -128, 127);
1167 int tx = (16384 + (ABS(td) >> 1)) / td;
1168 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1172 static inline void direct_ref_list_init(H264Context * const h){
1173 MpegEncContext * const s = &h->s;
1174 Picture * const ref1 = &h->ref_list[1][0];
1175 Picture * const cur = s->current_picture_ptr;
1177 if(cur->pict_type == I_TYPE)
1178 cur->ref_count[0] = 0;
1179 if(cur->pict_type != B_TYPE)
1180 cur->ref_count[1] = 0;
1181 for(list=0; list<2; list++){
1182 cur->ref_count[list] = h->ref_count[list];
1183 for(j=0; j<h->ref_count[list]; j++)
1184 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1186 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1188 for(list=0; list<2; list++){
1189 for(i=0; i<ref1->ref_count[list]; i++){
1190 const int poc = ref1->ref_poc[list][i];
1191 h->map_col_to_list0[list][i] = PART_NOT_AVAILABLE;
1192 for(j=0; j<h->ref_count[list]; j++)
1193 if(h->ref_list[list][j].poc == poc){
1194 h->map_col_to_list0[list][i] = j;
1201 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1202 MpegEncContext * const s = &h->s;
1203 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1204 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1205 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1206 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1207 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1208 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1209 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1210 const int is_b8x8 = IS_8X8(*mb_type);
1214 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1215 /* FIXME save sub mb types from previous frames (or derive from MVs)
1216 * so we know exactly what block size to use */
1217 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1218 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1219 }else if(!is_b8x8 && (IS_16X16(mb_type_col) || IS_INTRA(mb_type_col))){
1220 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1221 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1223 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1224 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1227 *mb_type |= MB_TYPE_DIRECT2;
1229 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1231 if(h->direct_spatial_mv_pred){
1236 /* ref = min(neighbors) */
1237 for(list=0; list<2; list++){
1238 int refa = h->ref_cache[list][scan8[0] - 1];
1239 int refb = h->ref_cache[list][scan8[0] - 8];
1240 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1242 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1244 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1246 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1252 if(ref[0] < 0 && ref[1] < 0){
1253 ref[0] = ref[1] = 0;
1254 mv[0][0] = mv[0][1] =
1255 mv[1][0] = mv[1][1] = 0;
1257 for(list=0; list<2; list++){
1259 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1261 mv[list][0] = mv[list][1] = 0;
1266 *mb_type &= ~MB_TYPE_P0L1;
1267 sub_mb_type &= ~MB_TYPE_P0L1;
1268 }else if(ref[0] < 0){
1269 *mb_type &= ~MB_TYPE_P0L0;
1270 sub_mb_type &= ~MB_TYPE_P0L0;
1273 if(IS_16X16(*mb_type)){
1274 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref[0], 1);
1275 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, ref[1], 1);
1276 if(!IS_INTRA(mb_type_col) && l1ref0[0] == 0 &&
1277 ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1){
1279 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1281 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1283 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1285 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1287 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1288 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1291 for(i8=0; i8<4; i8++){
1292 const int x8 = i8&1;
1293 const int y8 = i8>>1;
1295 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1297 h->sub_mb_type[i8] = sub_mb_type;
1299 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1300 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref[0], 1);
1302 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, ref[1], 1);
1305 if(!IS_INTRA(mb_type_col) && l1ref0[x8 + y8*h->b8_stride] == 0){
1306 for(i4=0; i4<4; i4++){
1307 const int16_t *mv_col = l1mv0[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1308 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1310 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1312 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1318 }else{ /* direct temporal mv pred */
1319 if(IS_16X16(*mb_type)){
1320 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1321 if(IS_INTRA(mb_type_col)){
1322 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1323 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1324 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1326 const int ref0 = l1ref0[0] >= 0 ? h->map_col_to_list0[0][l1ref0[0]]
1327 : h->map_col_to_list0[1][l1ref1[0]];
1328 const int dist_scale_factor = h->dist_scale_factor[ref0];
1329 const int16_t *mv_col = l1mv0[0];
1331 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1332 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1333 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1334 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1335 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1338 for(i8=0; i8<4; i8++){
1339 const int x8 = i8&1;
1340 const int y8 = i8>>1;
1341 int ref0, dist_scale_factor;
1343 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1345 h->sub_mb_type[i8] = sub_mb_type;
1346 if(IS_INTRA(mb_type_col)){
1347 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1348 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1349 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1350 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1354 ref0 = l1ref0[x8 + y8*h->b8_stride];
1356 ref0 = h->map_col_to_list0[0][ref0];
1358 ref0 = h->map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1359 dist_scale_factor = h->dist_scale_factor[ref0];
1361 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1362 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1363 for(i4=0; i4<4; i4++){
1364 const int16_t *mv_col = l1mv0[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1365 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1366 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1367 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1368 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1369 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1376 static inline void write_back_motion(H264Context *h, int mb_type){
1377 MpegEncContext * const s = &h->s;
1378 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1379 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1382 for(list=0; list<2; list++){
1384 if(!USES_LIST(mb_type, list)){
1385 if(1){ //FIXME skip or never read if mb_type doesn't use it
1387 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
1388 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
1390 if( h->pps.cabac ) {
1391 /* FIXME needed ? */
1393 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]=
1394 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= 0;
1398 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]=
1399 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= LIST_NOT_USED;
1406 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1407 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1409 if( h->pps.cabac ) {
1411 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1412 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1416 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
1417 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
1421 if(h->slice_type == B_TYPE && h->pps.cabac){
1422 if(IS_8X8(mb_type)){
1423 h->direct_table[b8_xy+1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1424 h->direct_table[b8_xy+0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1425 h->direct_table[b8_xy+1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1431 * Decodes a network abstraction layer unit.
1432 * @param consumed is the number of bytes used as input
1433 * @param length is the length of the array
1434 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1435 * @returns decoded bytes, might be src+1 if no escapes
1437 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1441 // src[0]&0x80; //forbidden bit
1442 h->nal_ref_idc= src[0]>>5;
1443 h->nal_unit_type= src[0]&0x1F;
1447 for(i=0; i<length; i++)
1448 printf("%2X ", src[i]);
1450 for(i=0; i+1<length; i+=2){
1451 if(src[i]) continue;
1452 if(i>0 && src[i-1]==0) i--;
1453 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1455 /* startcode, so we must be past the end */
1462 if(i>=length-1){ //no escaped 0
1463 *dst_length= length;
1464 *consumed= length+1; //+1 for the header
1468 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1469 dst= h->rbsp_buffer;
1471 //printf("decoding esc\n");
1474 //remove escapes (very rare 1:2^22)
1475 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1476 if(src[si+2]==3){ //escape
1481 }else //next start code
1485 dst[di++]= src[si++];
1489 *consumed= si + 1;//+1 for the header
1490 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1496 * @param src the data which should be escaped
1497 * @param dst the target buffer, dst+1 == src is allowed as a special case
1498 * @param length the length of the src data
1499 * @param dst_length the length of the dst array
1500 * @returns length of escaped data in bytes or -1 if an error occured
1502 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1503 int i, escape_count, si, di;
1507 assert(dst_length>0);
1509 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1511 if(length==0) return 1;
1514 for(i=0; i<length; i+=2){
1515 if(src[i]) continue;
1516 if(i>0 && src[i-1]==0)
1518 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1524 if(escape_count==0){
1526 memcpy(dst+1, src, length);
1530 if(length + escape_count + 1> dst_length)
1533 //this should be damn rare (hopefully)
1535 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1536 temp= h->rbsp_buffer;
1537 //printf("encoding esc\n");
1542 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1543 temp[di++]= 0; si++;
1544 temp[di++]= 0; si++;
1546 temp[di++]= src[si++];
1549 temp[di++]= src[si++];
1551 memcpy(dst+1, temp, length+escape_count);
1553 assert(di == length+escape_count);
1559 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1561 static void encode_rbsp_trailing(PutBitContext *pb){
1564 length= (-put_bits_count(pb))&7;
1565 if(length) put_bits(pb, length, 0);
1570 * identifies the exact end of the bitstream
1571 * @return the length of the trailing, or 0 if damaged
1573 static int decode_rbsp_trailing(uint8_t *src){
1577 tprintf("rbsp trailing %X\n", v);
1587 * idct tranforms the 16 dc values and dequantize them.
1588 * @param qp quantization parameter
1590 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
1591 const int qmul= dequant_coeff[qp][0];
1594 int temp[16]; //FIXME check if this is a good idea
1595 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1596 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1598 //memset(block, 64, 2*256);
1601 const int offset= y_offset[i];
1602 const int z0= block[offset+stride*0] + block[offset+stride*4];
1603 const int z1= block[offset+stride*0] - block[offset+stride*4];
1604 const int z2= block[offset+stride*1] - block[offset+stride*5];
1605 const int z3= block[offset+stride*1] + block[offset+stride*5];
1614 const int offset= x_offset[i];
1615 const int z0= temp[4*0+i] + temp[4*2+i];
1616 const int z1= temp[4*0+i] - temp[4*2+i];
1617 const int z2= temp[4*1+i] - temp[4*3+i];
1618 const int z3= temp[4*1+i] + temp[4*3+i];
1620 block[stride*0 +offset]= ((z0 + z3)*qmul + 2)>>2; //FIXME think about merging this into decode_resdual
1621 block[stride*2 +offset]= ((z1 + z2)*qmul + 2)>>2;
1622 block[stride*8 +offset]= ((z1 - z2)*qmul + 2)>>2;
1623 block[stride*10+offset]= ((z0 - z3)*qmul + 2)>>2;
1629 * dct tranforms the 16 dc values.
1630 * @param qp quantization parameter ??? FIXME
1632 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1633 // const int qmul= dequant_coeff[qp][0];
1635 int temp[16]; //FIXME check if this is a good idea
1636 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1637 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1640 const int offset= y_offset[i];
1641 const int z0= block[offset+stride*0] + block[offset+stride*4];
1642 const int z1= block[offset+stride*0] - block[offset+stride*4];
1643 const int z2= block[offset+stride*1] - block[offset+stride*5];
1644 const int z3= block[offset+stride*1] + block[offset+stride*5];
1653 const int offset= x_offset[i];
1654 const int z0= temp[4*0+i] + temp[4*2+i];
1655 const int z1= temp[4*0+i] - temp[4*2+i];
1656 const int z2= temp[4*1+i] - temp[4*3+i];
1657 const int z3= temp[4*1+i] + temp[4*3+i];
1659 block[stride*0 +offset]= (z0 + z3)>>1;
1660 block[stride*2 +offset]= (z1 + z2)>>1;
1661 block[stride*8 +offset]= (z1 - z2)>>1;
1662 block[stride*10+offset]= (z0 - z3)>>1;
1670 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp){
1671 const int qmul= dequant_coeff[qp][0];
1672 const int stride= 16*2;
1673 const int xStride= 16;
1676 a= block[stride*0 + xStride*0];
1677 b= block[stride*0 + xStride*1];
1678 c= block[stride*1 + xStride*0];
1679 d= block[stride*1 + xStride*1];
1686 block[stride*0 + xStride*0]= ((a+c)*qmul + 0)>>1;
1687 block[stride*0 + xStride*1]= ((e+b)*qmul + 0)>>1;
1688 block[stride*1 + xStride*0]= ((a-c)*qmul + 0)>>1;
1689 block[stride*1 + xStride*1]= ((e-b)*qmul + 0)>>1;
1693 static void chroma_dc_dct_c(DCTELEM *block){
1694 const int stride= 16*2;
1695 const int xStride= 16;
1698 a= block[stride*0 + xStride*0];
1699 b= block[stride*0 + xStride*1];
1700 c= block[stride*1 + xStride*0];
1701 d= block[stride*1 + xStride*1];
1708 block[stride*0 + xStride*0]= (a+c);
1709 block[stride*0 + xStride*1]= (e+b);
1710 block[stride*1 + xStride*0]= (a-c);
1711 block[stride*1 + xStride*1]= (e-b);
1716 * gets the chroma qp.
1718 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1720 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
1725 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1727 //FIXME try int temp instead of block
1730 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1731 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1732 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1733 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1734 const int z0= d0 + d3;
1735 const int z3= d0 - d3;
1736 const int z1= d1 + d2;
1737 const int z2= d1 - d2;
1739 block[0 + 4*i]= z0 + z1;
1740 block[1 + 4*i]= 2*z3 + z2;
1741 block[2 + 4*i]= z0 - z1;
1742 block[3 + 4*i]= z3 - 2*z2;
1746 const int z0= block[0*4 + i] + block[3*4 + i];
1747 const int z3= block[0*4 + i] - block[3*4 + i];
1748 const int z1= block[1*4 + i] + block[2*4 + i];
1749 const int z2= block[1*4 + i] - block[2*4 + i];
1751 block[0*4 + i]= z0 + z1;
1752 block[1*4 + i]= 2*z3 + z2;
1753 block[2*4 + i]= z0 - z1;
1754 block[3*4 + i]= z3 - 2*z2;
1759 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1760 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1761 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1763 const int * const quant_table= quant_coeff[qscale];
1764 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1765 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1766 const unsigned int threshold2= (threshold1<<1);
1772 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1773 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1774 const unsigned int dc_threshold2= (dc_threshold1<<1);
1776 int level= block[0]*quant_coeff[qscale+18][0];
1777 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1779 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1782 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1785 // last_non_zero = i;
1790 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1791 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1792 const unsigned int dc_threshold2= (dc_threshold1<<1);
1794 int level= block[0]*quant_table[0];
1795 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1797 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1800 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1803 // last_non_zero = i;
1816 const int j= scantable[i];
1817 int level= block[j]*quant_table[j];
1819 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1820 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1821 if(((unsigned)(level+threshold1))>threshold2){
1823 level= (bias + level)>>QUANT_SHIFT;
1826 level= (bias - level)>>QUANT_SHIFT;
1835 return last_non_zero;
1838 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1839 const uint32_t a= ((uint32_t*)(src-stride))[0];
1840 ((uint32_t*)(src+0*stride))[0]= a;
1841 ((uint32_t*)(src+1*stride))[0]= a;
1842 ((uint32_t*)(src+2*stride))[0]= a;
1843 ((uint32_t*)(src+3*stride))[0]= a;
1846 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1847 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1848 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1849 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1850 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1853 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1854 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1855 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1857 ((uint32_t*)(src+0*stride))[0]=
1858 ((uint32_t*)(src+1*stride))[0]=
1859 ((uint32_t*)(src+2*stride))[0]=
1860 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1863 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1864 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1866 ((uint32_t*)(src+0*stride))[0]=
1867 ((uint32_t*)(src+1*stride))[0]=
1868 ((uint32_t*)(src+2*stride))[0]=
1869 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1872 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1873 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1875 ((uint32_t*)(src+0*stride))[0]=
1876 ((uint32_t*)(src+1*stride))[0]=
1877 ((uint32_t*)(src+2*stride))[0]=
1878 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1881 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1882 ((uint32_t*)(src+0*stride))[0]=
1883 ((uint32_t*)(src+1*stride))[0]=
1884 ((uint32_t*)(src+2*stride))[0]=
1885 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1889 #define LOAD_TOP_RIGHT_EDGE\
1890 const int t4= topright[0];\
1891 const int t5= topright[1];\
1892 const int t6= topright[2];\
1893 const int t7= topright[3];\
1895 #define LOAD_LEFT_EDGE\
1896 const int l0= src[-1+0*stride];\
1897 const int l1= src[-1+1*stride];\
1898 const int l2= src[-1+2*stride];\
1899 const int l3= src[-1+3*stride];\
1901 #define LOAD_TOP_EDGE\
1902 const int t0= src[ 0-1*stride];\
1903 const int t1= src[ 1-1*stride];\
1904 const int t2= src[ 2-1*stride];\
1905 const int t3= src[ 3-1*stride];\
1907 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1908 const int lt= src[-1-1*stride];
1912 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
1914 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
1917 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
1921 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1924 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1926 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1927 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1930 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1935 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1937 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1940 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1944 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1947 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1949 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1950 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1953 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1954 const int lt= src[-1-1*stride];
1957 const __attribute__((unused)) int unu= l3;
1960 src[1+2*stride]=(lt + t0 + 1)>>1;
1962 src[2+2*stride]=(t0 + t1 + 1)>>1;
1964 src[3+2*stride]=(t1 + t2 + 1)>>1;
1965 src[3+0*stride]=(t2 + t3 + 1)>>1;
1967 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1969 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
1971 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1972 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1973 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1974 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1977 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
1980 const __attribute__((unused)) int unu= t7;
1982 src[0+0*stride]=(t0 + t1 + 1)>>1;
1984 src[0+2*stride]=(t1 + t2 + 1)>>1;
1986 src[1+2*stride]=(t2 + t3 + 1)>>1;
1988 src[2+2*stride]=(t3 + t4+ 1)>>1;
1989 src[3+2*stride]=(t4 + t5+ 1)>>1;
1990 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1992 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1994 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
1996 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
1997 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2000 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2003 src[0+0*stride]=(l0 + l1 + 1)>>1;
2004 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2006 src[0+1*stride]=(l1 + l2 + 1)>>1;
2008 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2010 src[0+2*stride]=(l2 + l3 + 1)>>1;
2012 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2021 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2022 const int lt= src[-1-1*stride];
2025 const __attribute__((unused)) int unu= t3;
2028 src[2+1*stride]=(lt + l0 + 1)>>1;
2030 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2031 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2032 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2034 src[2+2*stride]=(l0 + l1 + 1)>>1;
2036 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2038 src[2+3*stride]=(l1 + l2+ 1)>>1;
2040 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2041 src[0+3*stride]=(l2 + l3 + 1)>>1;
2042 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2045 static void pred16x16_vertical_c(uint8_t *src, int stride){
2047 const uint32_t a= ((uint32_t*)(src-stride))[0];
2048 const uint32_t b= ((uint32_t*)(src-stride))[1];
2049 const uint32_t c= ((uint32_t*)(src-stride))[2];
2050 const uint32_t d= ((uint32_t*)(src-stride))[3];
2052 for(i=0; i<16; i++){
2053 ((uint32_t*)(src+i*stride))[0]= a;
2054 ((uint32_t*)(src+i*stride))[1]= b;
2055 ((uint32_t*)(src+i*stride))[2]= c;
2056 ((uint32_t*)(src+i*stride))[3]= d;
2060 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2063 for(i=0; i<16; i++){
2064 ((uint32_t*)(src+i*stride))[0]=
2065 ((uint32_t*)(src+i*stride))[1]=
2066 ((uint32_t*)(src+i*stride))[2]=
2067 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2071 static void pred16x16_dc_c(uint8_t *src, int stride){
2075 dc+= src[-1+i*stride];
2082 dc= 0x01010101*((dc + 16)>>5);
2084 for(i=0; i<16; i++){
2085 ((uint32_t*)(src+i*stride))[0]=
2086 ((uint32_t*)(src+i*stride))[1]=
2087 ((uint32_t*)(src+i*stride))[2]=
2088 ((uint32_t*)(src+i*stride))[3]= dc;
2092 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2096 dc+= src[-1+i*stride];
2099 dc= 0x01010101*((dc + 8)>>4);
2101 for(i=0; i<16; i++){
2102 ((uint32_t*)(src+i*stride))[0]=
2103 ((uint32_t*)(src+i*stride))[1]=
2104 ((uint32_t*)(src+i*stride))[2]=
2105 ((uint32_t*)(src+i*stride))[3]= dc;
2109 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2115 dc= 0x01010101*((dc + 8)>>4);
2117 for(i=0; i<16; i++){
2118 ((uint32_t*)(src+i*stride))[0]=
2119 ((uint32_t*)(src+i*stride))[1]=
2120 ((uint32_t*)(src+i*stride))[2]=
2121 ((uint32_t*)(src+i*stride))[3]= dc;
2125 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2128 for(i=0; i<16; i++){
2129 ((uint32_t*)(src+i*stride))[0]=
2130 ((uint32_t*)(src+i*stride))[1]=
2131 ((uint32_t*)(src+i*stride))[2]=
2132 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2136 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2139 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2140 const uint8_t * const src0 = src+7-stride;
2141 const uint8_t *src1 = src+8*stride-1;
2142 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2143 int H = src0[1] - src0[-1];
2144 int V = src1[0] - src2[ 0];
2145 for(k=2; k<=8; ++k) {
2146 src1 += stride; src2 -= stride;
2147 H += k*(src0[k] - src0[-k]);
2148 V += k*(src1[0] - src2[ 0]);
2151 H = ( 5*(H/4) ) / 16;
2152 V = ( 5*(V/4) ) / 16;
2154 /* required for 100% accuracy */
2155 i = H; H = V; V = i;
2157 H = ( 5*H+32 ) >> 6;
2158 V = ( 5*V+32 ) >> 6;
2161 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2162 for(j=16; j>0; --j) {
2165 for(i=-16; i<0; i+=4) {
2166 src[16+i] = cm[ (b ) >> 5 ];
2167 src[17+i] = cm[ (b+ H) >> 5 ];
2168 src[18+i] = cm[ (b+2*H) >> 5 ];
2169 src[19+i] = cm[ (b+3*H) >> 5 ];
2176 static void pred16x16_plane_c(uint8_t *src, int stride){
2177 pred16x16_plane_compat_c(src, stride, 0);
2180 static void pred8x8_vertical_c(uint8_t *src, int stride){
2182 const uint32_t a= ((uint32_t*)(src-stride))[0];
2183 const uint32_t b= ((uint32_t*)(src-stride))[1];
2186 ((uint32_t*)(src+i*stride))[0]= a;
2187 ((uint32_t*)(src+i*stride))[1]= b;
2191 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2195 ((uint32_t*)(src+i*stride))[0]=
2196 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2200 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2204 ((uint32_t*)(src+i*stride))[0]=
2205 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2209 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2215 dc0+= src[-1+i*stride];
2216 dc2+= src[-1+(i+4)*stride];
2218 dc0= 0x01010101*((dc0 + 2)>>2);
2219 dc2= 0x01010101*((dc2 + 2)>>2);
2222 ((uint32_t*)(src+i*stride))[0]=
2223 ((uint32_t*)(src+i*stride))[1]= dc0;
2226 ((uint32_t*)(src+i*stride))[0]=
2227 ((uint32_t*)(src+i*stride))[1]= dc2;
2231 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2237 dc0+= src[i-stride];
2238 dc1+= src[4+i-stride];
2240 dc0= 0x01010101*((dc0 + 2)>>2);
2241 dc1= 0x01010101*((dc1 + 2)>>2);
2244 ((uint32_t*)(src+i*stride))[0]= dc0;
2245 ((uint32_t*)(src+i*stride))[1]= dc1;
2248 ((uint32_t*)(src+i*stride))[0]= dc0;
2249 ((uint32_t*)(src+i*stride))[1]= dc1;
2254 static void pred8x8_dc_c(uint8_t *src, int stride){
2256 int dc0, dc1, dc2, dc3;
2260 dc0+= src[-1+i*stride] + src[i-stride];
2261 dc1+= src[4+i-stride];
2262 dc2+= src[-1+(i+4)*stride];
2264 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2265 dc0= 0x01010101*((dc0 + 4)>>3);
2266 dc1= 0x01010101*((dc1 + 2)>>2);
2267 dc2= 0x01010101*((dc2 + 2)>>2);
2270 ((uint32_t*)(src+i*stride))[0]= dc0;
2271 ((uint32_t*)(src+i*stride))[1]= dc1;
2274 ((uint32_t*)(src+i*stride))[0]= dc2;
2275 ((uint32_t*)(src+i*stride))[1]= dc3;
2279 static void pred8x8_plane_c(uint8_t *src, int stride){
2282 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2283 const uint8_t * const src0 = src+3-stride;
2284 const uint8_t *src1 = src+4*stride-1;
2285 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2286 int H = src0[1] - src0[-1];
2287 int V = src1[0] - src2[ 0];
2288 for(k=2; k<=4; ++k) {
2289 src1 += stride; src2 -= stride;
2290 H += k*(src0[k] - src0[-k]);
2291 V += k*(src1[0] - src2[ 0]);
2293 H = ( 17*H+16 ) >> 5;
2294 V = ( 17*V+16 ) >> 5;
2296 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2297 for(j=8; j>0; --j) {
2300 src[0] = cm[ (b ) >> 5 ];
2301 src[1] = cm[ (b+ H) >> 5 ];
2302 src[2] = cm[ (b+2*H) >> 5 ];
2303 src[3] = cm[ (b+3*H) >> 5 ];
2304 src[4] = cm[ (b+4*H) >> 5 ];
2305 src[5] = cm[ (b+5*H) >> 5 ];
2306 src[6] = cm[ (b+6*H) >> 5 ];
2307 src[7] = cm[ (b+7*H) >> 5 ];
2312 #define SRC(x,y) src[(x)+(y)*stride]
2314 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2315 #define PREDICT_8x8_LOAD_LEFT \
2316 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2317 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2318 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2319 const int l7 = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2322 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2323 #define PREDICT_8x8_LOAD_TOP \
2324 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2325 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2326 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2327 const int t7 = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2328 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2331 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2332 #define PREDICT_8x8_LOAD_TOPRIGHT \
2333 int t8, t9, t10, t11, t12, t13, t14, t15; \
2334 if(has_topright) { \
2335 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2336 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2337 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2339 #define PREDICT_8x8_LOAD_TOPLEFT \
2340 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2342 #define PREDICT_8x8_DC(v) \
2344 for( y = 0; y < 8; y++ ) { \
2345 ((uint32_t*)src)[0] = \
2346 ((uint32_t*)src)[1] = v; \
2350 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2352 PREDICT_8x8_DC(0x80808080);
2354 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2356 PREDICT_8x8_LOAD_LEFT;
2357 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2360 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2362 PREDICT_8x8_LOAD_TOP;
2363 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2366 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2368 PREDICT_8x8_LOAD_LEFT;
2369 PREDICT_8x8_LOAD_TOP;
2370 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2371 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2374 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2376 PREDICT_8x8_LOAD_LEFT;
2377 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2378 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2379 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2382 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2385 PREDICT_8x8_LOAD_TOP;
2394 for( y = 1; y < 8; y++ )
2395 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2397 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2399 PREDICT_8x8_LOAD_TOP;
2400 PREDICT_8x8_LOAD_TOPRIGHT;
2401 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2402 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2403 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2404 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2405 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2406 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2407 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2408 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2409 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2410 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2411 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2412 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2413 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2414 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2415 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2417 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2419 PREDICT_8x8_LOAD_TOP;
2420 PREDICT_8x8_LOAD_LEFT;
2421 PREDICT_8x8_LOAD_TOPLEFT;
2422 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2423 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2424 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2425 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2426 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2427 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2428 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2429 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2430 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2431 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2432 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2433 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2434 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2435 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2436 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2439 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2441 PREDICT_8x8_LOAD_TOP;
2442 PREDICT_8x8_LOAD_LEFT;
2443 PREDICT_8x8_LOAD_TOPLEFT;
2444 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2445 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2446 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2447 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2448 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2449 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2450 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2451 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2452 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2453 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2454 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2455 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2456 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2457 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2458 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2459 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2460 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2461 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2462 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2463 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2464 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2465 SRC(7,0)= (t6 + t7 + 1) >> 1;
2467 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2469 PREDICT_8x8_LOAD_TOP;
2470 PREDICT_8x8_LOAD_LEFT;
2471 PREDICT_8x8_LOAD_TOPLEFT;
2472 SRC(0,7)= (l6 + l7 + 1) >> 1;
2473 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2474 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2475 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2476 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2477 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2478 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2479 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2480 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2481 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2482 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2483 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2484 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2485 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2486 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2487 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2488 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2489 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2490 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2491 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2492 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2493 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2495 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2497 PREDICT_8x8_LOAD_TOP;
2498 PREDICT_8x8_LOAD_TOPRIGHT;
2499 SRC(0,0)= (t0 + t1 + 1) >> 1;
2500 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2501 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2502 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2503 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2504 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2505 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2506 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2507 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2508 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2509 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2510 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2511 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2512 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2513 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2514 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2515 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2516 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2517 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2518 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2519 SRC(7,6)= (t10 + t11 + 1) >> 1;
2520 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2522 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2524 PREDICT_8x8_LOAD_LEFT;
2525 SRC(0,0)= (l0 + l1 + 1) >> 1;
2526 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2527 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2528 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2529 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2530 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2531 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2532 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2533 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2534 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2535 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2536 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2537 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2538 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2539 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2540 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2541 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2542 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2544 #undef PREDICT_8x8_LOAD_LEFT
2545 #undef PREDICT_8x8_LOAD_TOP
2546 #undef PREDICT_8x8_LOAD_TOPLEFT
2547 #undef PREDICT_8x8_LOAD_TOPRIGHT
2548 #undef PREDICT_8x8_DC
2554 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2555 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2556 int src_x_offset, int src_y_offset,
2557 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2558 MpegEncContext * const s = &h->s;
2559 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2560 const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2561 const int luma_xy= (mx&3) + ((my&3)<<2);
2562 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
2563 uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
2564 uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
2565 int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
2566 int extra_height= extra_width;
2568 const int full_mx= mx>>2;
2569 const int full_my= my>>2;
2571 assert(pic->data[0]);
2573 if(mx&7) extra_width -= 3;
2574 if(my&7) extra_height -= 3;
2576 if( full_mx < 0-extra_width
2577 || full_my < 0-extra_height
2578 || full_mx + 16/*FIXME*/ > s->width + extra_width
2579 || full_my + 16/*FIXME*/ > s->height + extra_height){
2580 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, s->width, s->height);
2581 src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
2585 qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
2587 qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
2590 if(s->flags&CODEC_FLAG_GRAY) return;
2593 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
2594 src_cb= s->edge_emu_buffer;
2596 chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
2599 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
2600 src_cr= s->edge_emu_buffer;
2602 chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
2605 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2606 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2607 int x_offset, int y_offset,
2608 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2609 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2610 int list0, int list1){
2611 MpegEncContext * const s = &h->s;
2612 qpel_mc_func *qpix_op= qpix_put;
2613 h264_chroma_mc_func chroma_op= chroma_put;
2615 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2616 dest_cb += x_offset + y_offset*s->uvlinesize;
2617 dest_cr += x_offset + y_offset*s->uvlinesize;
2618 x_offset += 8*s->mb_x;
2619 y_offset += 8*s->mb_y;
2622 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2623 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2624 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2625 qpix_op, chroma_op);
2628 chroma_op= chroma_avg;
2632 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2633 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2634 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2635 qpix_op, chroma_op);
2639 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2640 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2641 int x_offset, int y_offset,
2642 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2643 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2644 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2645 int list0, int list1){
2646 MpegEncContext * const s = &h->s;
2648 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2649 dest_cb += x_offset + y_offset*s->uvlinesize;
2650 dest_cr += x_offset + y_offset*s->uvlinesize;
2651 x_offset += 8*s->mb_x;
2652 y_offset += 8*s->mb_y;
2655 /* don't optimize for luma-only case, since B-frames usually
2656 * use implicit weights => chroma too. */
2657 uint8_t *tmp_cb = s->obmc_scratchpad;
2658 uint8_t *tmp_cr = tmp_cb + 8*s->uvlinesize;
2659 uint8_t *tmp_y = tmp_cr + 8*s->uvlinesize;
2660 int refn0 = h->ref_cache[0][ scan8[n] ];
2661 int refn1 = h->ref_cache[1][ scan8[n] ];
2663 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2664 dest_y, dest_cb, dest_cr,
2665 x_offset, y_offset, qpix_put, chroma_put);
2666 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2667 tmp_y, tmp_cb, tmp_cr,
2668 x_offset, y_offset, qpix_put, chroma_put);
2670 if(h->use_weight == 2){
2671 int weight0 = h->implicit_weight[refn0][refn1];
2672 int weight1 = 64 - weight0;
2673 luma_weight_avg( dest_y, tmp_y, s-> linesize, 5, weight0, weight1, 0, 0);
2674 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0, 0);
2675 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0, 0);
2677 luma_weight_avg(dest_y, tmp_y, s->linesize, h->luma_log2_weight_denom,
2678 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2679 h->luma_offset[0][refn0], h->luma_offset[1][refn1]);
2680 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2681 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2682 h->chroma_offset[0][refn0][0], h->chroma_offset[1][refn1][0]);
2683 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2684 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2685 h->chroma_offset[0][refn0][1], h->chroma_offset[1][refn1][1]);
2688 int list = list1 ? 1 : 0;
2689 int refn = h->ref_cache[list][ scan8[n] ];
2690 Picture *ref= &h->ref_list[list][refn];
2691 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2692 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2693 qpix_put, chroma_put);
2695 luma_weight_op(dest_y, s->linesize, h->luma_log2_weight_denom,
2696 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2697 if(h->use_weight_chroma){
2698 chroma_weight_op(dest_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2699 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2700 chroma_weight_op(dest_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2701 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2706 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2707 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2708 int x_offset, int y_offset,
2709 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2710 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2711 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2712 int list0, int list1){
2713 if((h->use_weight==2 && list0 && list1
2714 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2715 || h->use_weight==1)
2716 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2717 x_offset, y_offset, qpix_put, chroma_put,
2718 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2720 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2721 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2724 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2725 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2726 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2727 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2728 MpegEncContext * const s = &h->s;
2729 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2730 const int mb_type= s->current_picture.mb_type[mb_xy];
2732 assert(IS_INTER(mb_type));
2734 if(IS_16X16(mb_type)){
2735 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2736 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2737 &weight_op[0], &weight_avg[0],
2738 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2739 }else if(IS_16X8(mb_type)){
2740 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2741 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2742 &weight_op[1], &weight_avg[1],
2743 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2744 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2745 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2746 &weight_op[1], &weight_avg[1],
2747 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2748 }else if(IS_8X16(mb_type)){
2749 mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
2750 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2751 &weight_op[2], &weight_avg[2],
2752 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2753 mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
2754 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2755 &weight_op[2], &weight_avg[2],
2756 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2760 assert(IS_8X8(mb_type));
2763 const int sub_mb_type= h->sub_mb_type[i];
2765 int x_offset= (i&1)<<2;
2766 int y_offset= (i&2)<<1;
2768 if(IS_SUB_8X8(sub_mb_type)){
2769 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2770 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2771 &weight_op[3], &weight_avg[3],
2772 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2773 }else if(IS_SUB_8X4(sub_mb_type)){
2774 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2775 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2776 &weight_op[4], &weight_avg[4],
2777 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2778 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2779 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2780 &weight_op[4], &weight_avg[4],
2781 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2782 }else if(IS_SUB_4X8(sub_mb_type)){
2783 mc_part(h, n , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2784 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2785 &weight_op[5], &weight_avg[5],
2786 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2787 mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2788 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2789 &weight_op[5], &weight_avg[5],
2790 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2793 assert(IS_SUB_4X4(sub_mb_type));
2795 int sub_x_offset= x_offset + 2*(j&1);
2796 int sub_y_offset= y_offset + (j&2);
2797 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2798 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2799 &weight_op[6], &weight_avg[6],
2800 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2807 static void decode_init_vlc(H264Context *h){
2808 static int done = 0;
2814 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
2815 &chroma_dc_coeff_token_len [0], 1, 1,
2816 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
2819 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
2820 &coeff_token_len [i][0], 1, 1,
2821 &coeff_token_bits[i][0], 1, 1, 1);
2825 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2826 &chroma_dc_total_zeros_len [i][0], 1, 1,
2827 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
2829 for(i=0; i<15; i++){
2830 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
2831 &total_zeros_len [i][0], 1, 1,
2832 &total_zeros_bits[i][0], 1, 1, 1);
2836 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2837 &run_len [i][0], 1, 1,
2838 &run_bits[i][0], 1, 1, 1);
2840 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2841 &run_len [6][0], 1, 1,
2842 &run_bits[6][0], 1, 1, 1);
2847 * Sets the intra prediction function pointers.
2849 static void init_pred_ptrs(H264Context *h){
2850 // MpegEncContext * const s = &h->s;
2852 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
2853 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
2854 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
2855 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2856 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2857 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
2858 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
2859 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
2860 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
2861 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
2862 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
2863 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
2865 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
2866 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
2867 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
2868 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
2869 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
2870 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
2871 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
2872 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
2873 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
2874 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
2875 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
2876 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
2878 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
2879 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
2880 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
2881 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
2882 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
2883 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
2884 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
2886 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
2887 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
2888 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
2889 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
2890 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
2891 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
2892 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
2895 static void free_tables(H264Context *h){
2896 av_freep(&h->intra4x4_pred_mode);
2897 av_freep(&h->chroma_pred_mode_table);
2898 av_freep(&h->cbp_table);
2899 av_freep(&h->mvd_table[0]);
2900 av_freep(&h->mvd_table[1]);
2901 av_freep(&h->direct_table);
2902 av_freep(&h->non_zero_count);
2903 av_freep(&h->slice_table_base);
2904 av_freep(&h->top_borders[1]);
2905 av_freep(&h->top_borders[0]);
2906 h->slice_table= NULL;
2908 av_freep(&h->mb2b_xy);
2909 av_freep(&h->mb2b8_xy);
2911 av_freep(&h->dequant4_coeff);
2912 av_freep(&h->dequant8_coeff);
2914 av_freep(&h->s.obmc_scratchpad);
2919 * needs width/height
2921 static int alloc_tables(H264Context *h){
2922 MpegEncContext * const s = &h->s;
2923 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2926 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2928 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2929 CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t))
2930 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2931 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2932 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2934 if( h->pps.cabac ) {
2935 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2936 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2937 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2938 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2941 memset(h->slice_table_base, -1, big_mb_num * sizeof(uint8_t));
2942 h->slice_table= h->slice_table_base + s->mb_stride + 1;
2944 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2945 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2946 for(y=0; y<s->mb_height; y++){
2947 for(x=0; x<s->mb_width; x++){
2948 const int mb_xy= x + y*s->mb_stride;
2949 const int b_xy = 4*x + 4*y*h->b_stride;
2950 const int b8_xy= 2*x + 2*y*h->b8_stride;
2952 h->mb2b_xy [mb_xy]= b_xy;
2953 h->mb2b8_xy[mb_xy]= b8_xy;
2957 CHECKED_ALLOCZ(h->dequant4_coeff, 52*16 * sizeof(uint16_t));
2958 CHECKED_ALLOCZ(h->dequant8_coeff, 52*64 * sizeof(uint16_t));
2959 memcpy(h->dequant4_coeff, dequant_coeff, 52*16 * sizeof(uint16_t));
2960 for(q=0; q<52; q++){
2961 int shift = div6[q];
2963 if(shift >= 2) // qp<12 are shifted during dequant
2966 h->dequant8_coeff[q][x] = dequant8_coeff_init[idx][
2967 dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] << shift;
2969 if(h->sps.transform_bypass){
2971 h->dequant4_coeff[0][x] = 1;
2973 h->dequant8_coeff[0][x] = 1<<2;
2976 s->obmc_scratchpad = NULL;
2984 static void common_init(H264Context *h){
2985 MpegEncContext * const s = &h->s;
2987 s->width = s->avctx->width;
2988 s->height = s->avctx->height;
2989 s->codec_id= s->avctx->codec->id;
2993 s->unrestricted_mv=1;
2994 s->decode=1; //FIXME
2997 static int decode_init(AVCodecContext *avctx){
2998 H264Context *h= avctx->priv_data;
2999 MpegEncContext * const s = &h->s;
3001 MPV_decode_defaults(s);
3006 s->out_format = FMT_H264;
3007 s->workaround_bugs= avctx->workaround_bugs;
3010 // s->decode_mb= ff_h263_decode_mb;
3012 avctx->pix_fmt= PIX_FMT_YUV420P;
3016 if(avctx->extradata_size > 0 && avctx->extradata &&
3017 *(char *)avctx->extradata == 1){
3027 static void frame_start(H264Context *h){
3028 MpegEncContext * const s = &h->s;
3031 MPV_frame_start(s, s->avctx);
3032 ff_er_frame_start(s);
3034 assert(s->linesize && s->uvlinesize);
3036 for(i=0; i<16; i++){
3037 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3038 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3041 h->block_offset[16+i]=
3042 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3043 h->block_offset[24+16+i]=
3044 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3047 /* can't be in alloc_tables because linesize isn't known there.
3048 * FIXME: redo bipred weight to not require extra buffer? */
3049 if(!s->obmc_scratchpad)
3050 s->obmc_scratchpad = av_malloc(16*s->linesize + 2*8*s->uvlinesize);
3052 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3055 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3056 MpegEncContext * const s = &h->s;
3060 src_cb -= uvlinesize;
3061 src_cr -= uvlinesize;
3063 // There are two lines saved, the line above the the top macroblock of a pair,
3064 // and the line above the bottom macroblock
3065 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3066 for(i=1; i<17; i++){
3067 h->left_border[i]= src_y[15+i* linesize];
3070 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3071 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3073 if(!(s->flags&CODEC_FLAG_GRAY)){
3074 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3075 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3077 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3078 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3080 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3081 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3085 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3086 MpegEncContext * const s = &h->s;
3089 int deblock_left = (s->mb_x > 0);
3090 int deblock_top = (s->mb_y > 0);
3092 src_y -= linesize + 1;
3093 src_cb -= uvlinesize + 1;
3094 src_cr -= uvlinesize + 1;
3096 #define XCHG(a,b,t,xchg)\
3103 for(i = !deblock_top; i<17; i++){
3104 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3109 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3110 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3111 if(s->mb_x < s->mb_width){
3112 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3116 if(!(s->flags&CODEC_FLAG_GRAY)){
3118 for(i = !deblock_top; i<9; i++){
3119 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3120 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3124 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3125 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3130 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3131 MpegEncContext * const s = &h->s;
3134 src_y -= 2 * linesize;
3135 src_cb -= 2 * uvlinesize;
3136 src_cr -= 2 * uvlinesize;
3138 // There are two lines saved, the line above the the top macroblock of a pair,
3139 // and the line above the bottom macroblock
3140 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3141 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3142 for(i=2; i<34; i++){
3143 h->left_border[i]= src_y[15+i* linesize];
3146 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3147 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3148 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3149 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3151 if(!(s->flags&CODEC_FLAG_GRAY)){
3152 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3153 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3154 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3155 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3156 for(i=2; i<18; i++){
3157 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3158 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3160 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3161 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3162 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3163 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3167 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3168 MpegEncContext * const s = &h->s;
3171 int deblock_left = (s->mb_x > 0);
3172 int deblock_top = (s->mb_y > 0);
3174 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3176 src_y -= 2 * linesize + 1;
3177 src_cb -= 2 * uvlinesize + 1;
3178 src_cr -= 2 * uvlinesize + 1;
3180 #define XCHG(a,b,t,xchg)\
3187 for(i = (!deblock_top)<<1; i<34; i++){
3188 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3193 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3194 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3195 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3196 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3199 if(!(s->flags&CODEC_FLAG_GRAY)){
3201 for(i = (!deblock_top) << 1; i<18; i++){
3202 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3203 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3207 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3208 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3209 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3210 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3215 static void hl_decode_mb(H264Context *h){
3216 MpegEncContext * const s = &h->s;
3217 const int mb_x= s->mb_x;
3218 const int mb_y= s->mb_y;
3219 const int mb_xy= mb_x + mb_y*s->mb_stride;
3220 const int mb_type= s->current_picture.mb_type[mb_xy];
3221 uint8_t *dest_y, *dest_cb, *dest_cr;
3222 int linesize, uvlinesize /*dct_offset*/;
3224 int *block_offset = &h->block_offset[0];
3225 const unsigned int bottom = mb_y & 1;
3226 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3227 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3232 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3233 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3234 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3236 if (h->mb_field_decoding_flag) {
3237 linesize = s->linesize * 2;
3238 uvlinesize = s->uvlinesize * 2;
3239 block_offset = &h->block_offset[24];
3240 if(mb_y&1){ //FIXME move out of this func?
3241 dest_y -= s->linesize*15;
3242 dest_cb-= s->uvlinesize*7;
3243 dest_cr-= s->uvlinesize*7;
3246 linesize = s->linesize;
3247 uvlinesize = s->uvlinesize;
3248 // dct_offset = s->linesize * 16;
3251 idct_add = transform_bypass
3252 ? IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4
3253 : IS_8x8DCT(mb_type) ? s->dsp.h264_idct8_add : s->dsp.h264_idct_add;
3255 if (IS_INTRA_PCM(mb_type)) {
3258 // The pixels are stored in h->mb array in the same order as levels,
3259 // copy them in output in the correct order.
3260 for(i=0; i<16; i++) {
3261 for (y=0; y<4; y++) {
3262 for (x=0; x<4; x++) {
3263 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3267 for(i=16; i<16+4; i++) {
3268 for (y=0; y<4; y++) {
3269 for (x=0; x<4; x++) {
3270 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3274 for(i=20; i<20+4; i++) {
3275 for (y=0; y<4; y++) {
3276 for (x=0; x<4; x++) {
3277 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3282 if(IS_INTRA(mb_type)){
3283 if(h->deblocking_filter) {
3284 if (h->mb_aff_frame) {
3286 xchg_pair_border(h, dest_y, dest_cb, dest_cr, s->linesize, s->uvlinesize, 1);
3288 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3292 if(!(s->flags&CODEC_FLAG_GRAY)){
3293 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3294 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3297 if(IS_INTRA4x4(mb_type)){
3299 if(IS_8x8DCT(mb_type)){
3300 for(i=0; i<16; i+=4){
3301 uint8_t * const ptr= dest_y + block_offset[i];
3302 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3303 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3304 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3305 if(h->non_zero_count_cache[ scan8[i] ])
3306 idct_add(ptr, h->mb + i*16, linesize);
3309 for(i=0; i<16; i++){
3310 uint8_t * const ptr= dest_y + block_offset[i];
3312 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3315 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3316 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3317 assert(mb_y || linesize <= block_offset[i]);
3318 if(!topright_avail){
3319 tr= ptr[3 - linesize]*0x01010101;
3320 topright= (uint8_t*) &tr;
3322 topright= ptr + 4 - linesize;
3326 h->pred4x4[ dir ](ptr, topright, linesize);
3327 if(h->non_zero_count_cache[ scan8[i] ]){
3328 if(s->codec_id == CODEC_ID_H264)
3329 idct_add(ptr, h->mb + i*16, linesize);
3331 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3336 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3337 if(s->codec_id == CODEC_ID_H264){
3338 if(!transform_bypass)
3339 h264_luma_dc_dequant_idct_c(h->mb, s->qscale);
3341 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3343 if(h->deblocking_filter) {
3344 if (h->mb_aff_frame) {
3346 uint8_t *pair_dest_y = s->current_picture.data[0] + ((mb_y-1) * 16* s->linesize ) + mb_x * 16;
3347 uint8_t *pair_dest_cb = s->current_picture.data[1] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3348 uint8_t *pair_dest_cr = s->current_picture.data[2] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3350 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3354 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3357 }else if(s->codec_id == CODEC_ID_H264){
3358 hl_motion(h, dest_y, dest_cb, dest_cr,
3359 s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
3360 s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
3361 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3365 if(!IS_INTRA4x4(mb_type)){
3366 if(s->codec_id == CODEC_ID_H264){
3367 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3368 for(i=0; i<16; i+=di){
3369 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3370 uint8_t * const ptr= dest_y + block_offset[i];
3371 idct_add(ptr, h->mb + i*16, linesize);
3375 for(i=0; i<16; i++){
3376 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3377 uint8_t * const ptr= dest_y + block_offset[i];
3378 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3384 if(!(s->flags&CODEC_FLAG_GRAY)){
3385 idct_add = transform_bypass ? s->dsp.add_pixels4 : s->dsp.h264_idct_add;
3386 if(!transform_bypass){
3387 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
3388 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
3390 if(s->codec_id == CODEC_ID_H264){
3391 for(i=16; i<16+4; i++){
3392 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3393 uint8_t * const ptr= dest_cb + block_offset[i];
3394 idct_add(ptr, h->mb + i*16, uvlinesize);
3397 for(i=20; i<20+4; i++){
3398 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3399 uint8_t * const ptr= dest_cr + block_offset[i];
3400 idct_add(ptr, h->mb + i*16, uvlinesize);
3404 for(i=16; i<16+4; i++){
3405 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3406 uint8_t * const ptr= dest_cb + block_offset[i];
3407 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3410 for(i=20; i<20+4; i++){
3411 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3412 uint8_t * const ptr= dest_cr + block_offset[i];
3413 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3419 if(h->deblocking_filter) {
3420 if (h->mb_aff_frame) {
3421 const int mb_y = s->mb_y - 1;
3422 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3423 const int mb_xy= mb_x + mb_y*s->mb_stride;
3424 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3425 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3426 uint8_t tmp = s->current_picture.data[1][384];
3427 if (!bottom) return;
3428 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3429 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3430 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3432 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3433 // TODO deblock a pair
3436 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3437 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3438 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3439 if (tmp != s->current_picture.data[1][384]) {
3440 tprintf("modified pixel 8,1 (1)\n");
3444 tprintf("call mbaff filter_mb\n");
3445 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3446 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3447 if (tmp != s->current_picture.data[1][384]) {
3448 tprintf("modified pixel 8,1 (2)\n");
3451 tprintf("call filter_mb\n");
3452 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3453 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3454 filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3460 * fills the default_ref_list.
3462 static int fill_default_ref_list(H264Context *h){
3463 MpegEncContext * const s = &h->s;
3465 int smallest_poc_greater_than_current = -1;
3466 Picture sorted_short_ref[32];
3468 if(h->slice_type==B_TYPE){
3472 /* sort frame according to poc in B slice */
3473 for(out_i=0; out_i<h->short_ref_count; out_i++){
3475 int best_poc=INT_MAX;
3477 for(i=0; i<h->short_ref_count; i++){
3478 const int poc= h->short_ref[i]->poc;
3479 if(poc > limit && poc < best_poc){
3485 assert(best_i != INT_MIN);
3488 sorted_short_ref[out_i]= *h->short_ref[best_i];
3489 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3490 if (-1 == smallest_poc_greater_than_current) {
3491 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3492 smallest_poc_greater_than_current = out_i;
3498 if(s->picture_structure == PICT_FRAME){
3499 if(h->slice_type==B_TYPE){
3501 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3503 // find the largest poc
3504 for(list=0; list<2; list++){
3507 int step= list ? -1 : 1;
3509 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3510 while(j<0 || j>= h->short_ref_count){
3511 if(j != -99 && step == (list ? -1 : 1))
3514 j= smallest_poc_greater_than_current + (step>>1);
3516 if(sorted_short_ref[j].reference != 3) continue;
3517 h->default_ref_list[list][index ]= sorted_short_ref[j];
3518 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3521 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3522 if(h->long_ref[i] == NULL) continue;
3523 if(h->long_ref[i]->reference != 3) continue;
3525 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3526 h->default_ref_list[ list ][index++].pic_id= i;;
3529 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3530 // swap the two first elements of L1 when
3531 // L0 and L1 are identical
3532 Picture temp= h->default_ref_list[1][0];
3533 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3534 h->default_ref_list[1][1] = temp;
3537 if(index < h->ref_count[ list ])
3538 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3542 for(i=0; i<h->short_ref_count; i++){
3543 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3544 h->default_ref_list[0][index ]= *h->short_ref[i];
3545 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3547 for(i = 0; i < 16; i++){
3548 if(h->long_ref[i] == NULL) continue;
3549 if(h->long_ref[i]->reference != 3) continue;
3550 h->default_ref_list[0][index ]= *h->long_ref[i];
3551 h->default_ref_list[0][index++].pic_id= i;;
3553 if(index < h->ref_count[0])
3554 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3557 if(h->slice_type==B_TYPE){
3559 //FIXME second field balh
3563 for (i=0; i<h->ref_count[0]; i++) {
3564 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3566 if(h->slice_type==B_TYPE){
3567 for (i=0; i<h->ref_count[1]; i++) {
3568 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3575 static void print_short_term(H264Context *h);
3576 static void print_long_term(H264Context *h);
3578 static int decode_ref_pic_list_reordering(H264Context *h){
3579 MpegEncContext * const s = &h->s;
3582 print_short_term(h);
3584 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3586 for(list=0; list<2; list++){
3587 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3589 if(get_bits1(&s->gb)){
3590 int pred= h->curr_pic_num;
3592 for(index=0; ; index++){
3593 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3596 Picture *ref = NULL;
3598 if(reordering_of_pic_nums_idc==3)
3601 if(index >= h->ref_count[list]){
3602 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3606 if(reordering_of_pic_nums_idc<3){
3607 if(reordering_of_pic_nums_idc<2){
3608 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3610 if(abs_diff_pic_num >= h->max_pic_num){
3611 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3615 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3616 else pred+= abs_diff_pic_num;
3617 pred &= h->max_pic_num - 1;
3619 for(i= h->short_ref_count-1; i>=0; i--){
3620 ref = h->short_ref[i];
3621 assert(ref->reference == 3);
3622 assert(!ref->long_ref);
3623 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3627 ref->pic_id= ref->frame_num;
3629 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3630 ref = h->long_ref[pic_id];
3631 ref->pic_id= pic_id;
3632 assert(ref->reference == 3);
3633 assert(ref->long_ref);
3638 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3639 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3641 for(i=index; i+1<h->ref_count[list]; i++){
3642 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3645 for(; i > index; i--){
3646 h->ref_list[list][i]= h->ref_list[list][i-1];
3648 h->ref_list[list][index]= *ref;
3651 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3657 if(h->slice_type!=B_TYPE) break;
3659 for(list=0; list<2; list++){
3660 for(index= 0; index < h->ref_count[list]; index++){
3661 if(!h->ref_list[list][index].data[0])
3662 h->ref_list[list][index]= s->current_picture;
3664 if(h->slice_type!=B_TYPE) break;
3667 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3668 direct_dist_scale_factor(h);
3669 direct_ref_list_init(h);
3673 static int pred_weight_table(H264Context *h){
3674 MpegEncContext * const s = &h->s;
3676 int luma_def, chroma_def;
3679 h->use_weight_chroma= 0;
3680 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3681 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3682 luma_def = 1<<h->luma_log2_weight_denom;
3683 chroma_def = 1<<h->chroma_log2_weight_denom;
3685 for(list=0; list<2; list++){
3686 for(i=0; i<h->ref_count[list]; i++){
3687 int luma_weight_flag, chroma_weight_flag;
3689 luma_weight_flag= get_bits1(&s->gb);
3690 if(luma_weight_flag){
3691 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3692 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3693 if( h->luma_weight[list][i] != luma_def
3694 || h->luma_offset[list][i] != 0)
3697 h->luma_weight[list][i]= luma_def;
3698 h->luma_offset[list][i]= 0;
3701 chroma_weight_flag= get_bits1(&s->gb);
3702 if(chroma_weight_flag){
3705 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3706 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3707 if( h->chroma_weight[list][i][j] != chroma_def
3708 || h->chroma_offset[list][i][j] != 0)
3709 h->use_weight_chroma= 1;
3714 h->chroma_weight[list][i][j]= chroma_def;
3715 h->chroma_offset[list][i][j]= 0;
3719 if(h->slice_type != B_TYPE) break;
3721 h->use_weight= h->use_weight || h->use_weight_chroma;
3725 static void implicit_weight_table(H264Context *h){
3726 MpegEncContext * const s = &h->s;
3728 int cur_poc = s->current_picture_ptr->poc;
3730 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3731 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3733 h->use_weight_chroma= 0;
3738 h->use_weight_chroma= 2;
3739 h->luma_log2_weight_denom= 5;
3740 h->chroma_log2_weight_denom= 5;
3743 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3744 int poc0 = h->ref_list[0][ref0].poc;
3745 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3746 int poc1 = h->ref_list[1][ref1].poc;
3747 int td = clip(poc1 - poc0, -128, 127);
3749 int tb = clip(cur_poc - poc0, -128, 127);
3750 int tx = (16384 + (ABS(td) >> 1)) / td;
3751 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3752 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3753 h->implicit_weight[ref0][ref1] = 32;
3755 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3757 h->implicit_weight[ref0][ref1] = 32;
3762 static inline void unreference_pic(H264Context *h, Picture *pic){
3765 if(pic == h->delayed_output_pic)
3768 for(i = 0; h->delayed_pic[i]; i++)
3769 if(pic == h->delayed_pic[i]){
3777 * instantaneous decoder refresh.
3779 static void idr(H264Context *h){
3782 for(i=0; i<16; i++){
3783 if (h->long_ref[i] != NULL) {
3784 unreference_pic(h, h->long_ref[i]);
3785 h->long_ref[i]= NULL;
3788 h->long_ref_count=0;
3790 for(i=0; i<h->short_ref_count; i++){
3791 unreference_pic(h, h->short_ref[i]);
3792 h->short_ref[i]= NULL;
3794 h->short_ref_count=0;
3797 /* forget old pics after a seek */
3798 static void flush_dpb(AVCodecContext *avctx){
3799 H264Context *h= avctx->priv_data;
3802 h->delayed_pic[i]= NULL;
3803 h->delayed_output_pic= NULL;
3805 if(h->s.current_picture_ptr)
3806 h->s.current_picture_ptr->reference= 0;
3811 * @return the removed picture or NULL if an error occurs
3813 static Picture * remove_short(H264Context *h, int frame_num){
3814 MpegEncContext * const s = &h->s;
3817 if(s->avctx->debug&FF_DEBUG_MMCO)
3818 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3820 for(i=0; i<h->short_ref_count; i++){
3821 Picture *pic= h->short_ref[i];
3822 if(s->avctx->debug&FF_DEBUG_MMCO)
3823 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3824 if(pic->frame_num == frame_num){
3825 h->short_ref[i]= NULL;
3826 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
3827 h->short_ref_count--;
3836 * @return the removed picture or NULL if an error occurs
3838 static Picture * remove_long(H264Context *h, int i){
3841 pic= h->long_ref[i];
3842 h->long_ref[i]= NULL;
3843 if(pic) h->long_ref_count--;
3849 * print short term list
3851 static void print_short_term(H264Context *h) {
3853 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3854 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3855 for(i=0; i<h->short_ref_count; i++){
3856 Picture *pic= h->short_ref[i];
3857 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3863 * print long term list
3865 static void print_long_term(H264Context *h) {
3867 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3868 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3869 for(i = 0; i < 16; i++){
3870 Picture *pic= h->long_ref[i];
3872 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3879 * Executes the reference picture marking (memory management control operations).
3881 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3882 MpegEncContext * const s = &h->s;
3884 int current_is_long=0;
3887 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3888 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3890 for(i=0; i<mmco_count; i++){
3891 if(s->avctx->debug&FF_DEBUG_MMCO)
3892 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
3894 switch(mmco[i].opcode){
3895 case MMCO_SHORT2UNUSED:
3896 pic= remove_short(h, mmco[i].short_frame_num);
3898 unreference_pic(h, pic);
3899 else if(s->avctx->debug&FF_DEBUG_MMCO)
3900 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
3902 case MMCO_SHORT2LONG:
3903 pic= remove_long(h, mmco[i].long_index);
3904 if(pic) unreference_pic(h, pic);
3906 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
3907 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3908 h->long_ref_count++;
3910 case MMCO_LONG2UNUSED:
3911 pic= remove_long(h, mmco[i].long_index);
3913 unreference_pic(h, pic);
3914 else if(s->avctx->debug&FF_DEBUG_MMCO)
3915 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
3918 pic= remove_long(h, mmco[i].long_index);
3919 if(pic) unreference_pic(h, pic);
3921 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
3922 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3923 h->long_ref_count++;
3927 case MMCO_SET_MAX_LONG:
3928 assert(mmco[i].long_index <= 16);
3929 // just remove the long term which index is greater than new max
3930 for(j = mmco[i].long_index; j<16; j++){
3931 pic = remove_long(h, j);
3932 if (pic) unreference_pic(h, pic);
3936 while(h->short_ref_count){
3937 pic= remove_short(h, h->short_ref[0]->frame_num);
3938 unreference_pic(h, pic);
3940 for(j = 0; j < 16; j++) {
3941 pic= remove_long(h, j);
3942 if(pic) unreference_pic(h, pic);
3949 if(!current_is_long){
3950 pic= remove_short(h, s->current_picture_ptr->frame_num);
3952 unreference_pic(h, pic);
3953 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3956 if(h->short_ref_count)
3957 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3959 h->short_ref[0]= s->current_picture_ptr;
3960 h->short_ref[0]->long_ref=0;
3961 h->short_ref_count++;
3964 print_short_term(h);
3969 static int decode_ref_pic_marking(H264Context *h){
3970 MpegEncContext * const s = &h->s;
3973 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3974 s->broken_link= get_bits1(&s->gb) -1;
3975 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
3976 if(h->mmco[0].long_index == -1)
3979 h->mmco[0].opcode= MMCO_LONG;
3983 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
3984 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3985 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
3987 h->mmco[i].opcode= opcode;
3988 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3989 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
3990 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
3991 fprintf(stderr, "illegal short ref in memory management control operation %d\n", mmco);
3995 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3996 h->mmco[i].long_index= get_ue_golomb(&s->gb);
3997 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
3998 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4003 if(opcode > MMCO_LONG){
4004 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4007 if(opcode == MMCO_END)
4012 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4014 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4015 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4016 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4026 static int init_poc(H264Context *h){
4027 MpegEncContext * const s = &h->s;
4028 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4031 if(h->nal_unit_type == NAL_IDR_SLICE){
4032 h->frame_num_offset= 0;
4034 if(h->frame_num < h->prev_frame_num)
4035 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4037 h->frame_num_offset= h->prev_frame_num_offset;
4040 if(h->sps.poc_type==0){
4041 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4043 if(h->nal_unit_type == NAL_IDR_SLICE){
4048 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4049 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4050 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4051 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4053 h->poc_msb = h->prev_poc_msb;
4054 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4056 field_poc[1] = h->poc_msb + h->poc_lsb;
4057 if(s->picture_structure == PICT_FRAME)
4058 field_poc[1] += h->delta_poc_bottom;
4059 }else if(h->sps.poc_type==1){
4060 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4063 if(h->sps.poc_cycle_length != 0)
4064 abs_frame_num = h->frame_num_offset + h->frame_num;
4068 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4071 expected_delta_per_poc_cycle = 0;
4072 for(i=0; i < h->sps.poc_cycle_length; i++)
4073 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4075 if(abs_frame_num > 0){
4076 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4077 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4079 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4080 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4081 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4085 if(h->nal_ref_idc == 0)
4086 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4088 field_poc[0] = expectedpoc + h->delta_poc[0];
4089 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4091 if(s->picture_structure == PICT_FRAME)
4092 field_poc[1] += h->delta_poc[1];
4095 if(h->nal_unit_type == NAL_IDR_SLICE){
4098 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4099 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4105 if(s->picture_structure != PICT_BOTTOM_FIELD)
4106 s->current_picture_ptr->field_poc[0]= field_poc[0];
4107 if(s->picture_structure != PICT_TOP_FIELD)
4108 s->current_picture_ptr->field_poc[1]= field_poc[1];
4109 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4110 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4116 * decodes a slice header.
4117 * this will allso call MPV_common_init() and frame_start() as needed
4119 static int decode_slice_header(H264Context *h){
4120 MpegEncContext * const s = &h->s;
4121 int first_mb_in_slice, pps_id;
4122 int num_ref_idx_active_override_flag;
4123 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4125 int default_ref_list_done = 0;
4127 s->current_picture.reference= h->nal_ref_idc != 0;
4128 s->dropable= h->nal_ref_idc == 0;
4130 first_mb_in_slice= get_ue_golomb(&s->gb);
4132 slice_type= get_ue_golomb(&s->gb);
4134 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4139 h->slice_type_fixed=1;
4141 h->slice_type_fixed=0;
4143 slice_type= slice_type_map[ slice_type ];
4144 if (slice_type == I_TYPE
4145 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4146 default_ref_list_done = 1;
4148 h->slice_type= slice_type;
4150 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4152 pps_id= get_ue_golomb(&s->gb);
4154 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4157 h->pps= h->pps_buffer[pps_id];
4158 if(h->pps.slice_group_count == 0){
4159 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4163 h->sps= h->sps_buffer[ h->pps.sps_id ];
4164 if(h->sps.log2_max_frame_num == 0){
4165 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4169 s->mb_width= h->sps.mb_width;
4170 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4172 h->b_stride= s->mb_width*4 + 1;
4173 h->b8_stride= s->mb_width*2 + 1;
4175 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4176 if(h->sps.frame_mbs_only_flag)
4177 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4179 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4181 if (s->context_initialized
4182 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4186 if (!s->context_initialized) {
4187 if (MPV_common_init(s) < 0)
4190 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4191 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4192 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4195 for(i=0; i<16; i++){
4196 #define T(x) (x>>2) | ((x<<2) & 0xF)
4197 h->zigzag_scan[i] = T(zigzag_scan[i]);
4198 h-> field_scan[i] = T( field_scan[i]);
4201 if(h->sps.transform_bypass){ //FIXME same ugly
4202 h->zigzag_scan_q0 = zigzag_scan;
4203 h->field_scan_q0 = field_scan;
4205 h->zigzag_scan_q0 = h->zigzag_scan;
4206 h->field_scan_q0 = h->field_scan;
4211 s->avctx->width = s->width;
4212 s->avctx->height = s->height;
4213 s->avctx->sample_aspect_ratio= h->sps.sar;
4214 if(!s->avctx->sample_aspect_ratio.den)
4215 s->avctx->sample_aspect_ratio.den = 1;
4217 if(h->sps.timing_info_present_flag){
4218 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
4222 if(h->slice_num == 0){
4226 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4227 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4229 h->mb_aff_frame = 0;
4230 if(h->sps.frame_mbs_only_flag){
4231 s->picture_structure= PICT_FRAME;
4233 if(get_bits1(&s->gb)) { //field_pic_flag
4234 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4236 s->picture_structure= PICT_FRAME;
4237 first_mb_in_slice <<= h->sps.mb_aff;
4238 h->mb_aff_frame = h->sps.mb_aff;
4242 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4243 s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width;
4244 if(s->mb_y >= s->mb_height){
4248 if(s->picture_structure==PICT_FRAME){
4249 h->curr_pic_num= h->frame_num;
4250 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4252 h->curr_pic_num= 2*h->frame_num;
4253 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4256 if(h->nal_unit_type == NAL_IDR_SLICE){
4257 get_ue_golomb(&s->gb); /* idr_pic_id */
4260 if(h->sps.poc_type==0){
4261 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4263 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4264 h->delta_poc_bottom= get_se_golomb(&s->gb);
4268 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4269 h->delta_poc[0]= get_se_golomb(&s->gb);
4271 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4272 h->delta_poc[1]= get_se_golomb(&s->gb);
4277 if(h->pps.redundant_pic_cnt_present){
4278 h->redundant_pic_count= get_ue_golomb(&s->gb);
4281 //set defaults, might be overriden a few line later
4282 h->ref_count[0]= h->pps.ref_count[0];
4283 h->ref_count[1]= h->pps.ref_count[1];
4285 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4286 if(h->slice_type == B_TYPE){
4287 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4289 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4291 if(num_ref_idx_active_override_flag){
4292 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4293 if(h->slice_type==B_TYPE)
4294 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4296 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4297 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4303 if(!default_ref_list_done){
4304 fill_default_ref_list(h);
4307 if(decode_ref_pic_list_reordering(h) < 0)
4310 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4311 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4312 pred_weight_table(h);
4313 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4314 implicit_weight_table(h);
4318 if(s->current_picture.reference)
4319 decode_ref_pic_marking(h);
4321 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4322 h->cabac_init_idc = get_ue_golomb(&s->gb);
4324 h->last_qscale_diff = 0;
4325 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4326 if(s->qscale<0 || s->qscale>51){
4327 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4330 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4331 //FIXME qscale / qp ... stuff
4332 if(h->slice_type == SP_TYPE){
4333 get_bits1(&s->gb); /* sp_for_switch_flag */
4335 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4336 get_se_golomb(&s->gb); /* slice_qs_delta */
4339 h->deblocking_filter = 1;
4340 h->slice_alpha_c0_offset = 0;
4341 h->slice_beta_offset = 0;
4342 if( h->pps.deblocking_filter_parameters_present ) {
4343 h->deblocking_filter= get_ue_golomb(&s->gb);
4344 if(h->deblocking_filter < 2)
4345 h->deblocking_filter^= 1; // 1<->0
4347 if( h->deblocking_filter ) {
4348 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4349 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4352 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4353 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4354 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4355 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4356 h->deblocking_filter= 0;
4359 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4360 slice_group_change_cycle= get_bits(&s->gb, ?);
4365 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4366 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4368 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4370 av_get_pict_type_char(h->slice_type),
4371 pps_id, h->frame_num,
4372 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4373 h->ref_count[0], h->ref_count[1],
4375 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4377 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4387 static inline int get_level_prefix(GetBitContext *gb){
4391 OPEN_READER(re, gb);
4392 UPDATE_CACHE(re, gb);
4393 buf=GET_CACHE(re, gb);
4395 log= 32 - av_log2(buf);
4397 print_bin(buf>>(32-log), log);
4398 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4401 LAST_SKIP_BITS(re, gb, log);
4402 CLOSE_READER(re, gb);
4407 static inline int get_dct8x8_allowed(H264Context *h){
4410 if(!IS_SUB_8X8(h->sub_mb_type[i])
4411 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4418 * decodes a residual block.
4419 * @param n block index
4420 * @param scantable scantable
4421 * @param max_coeff number of coefficients in the block
4422 * @return <0 if an error occured
4424 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff){
4425 MpegEncContext * const s = &h->s;
4426 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4427 int level[16], run[16];
4428 int suffix_length, zeros_left, coeff_num, coeff_token, total_coeff, i, trailing_ones;
4430 //FIXME put trailing_onex into the context
4432 if(n == CHROMA_DC_BLOCK_INDEX){
4433 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4434 total_coeff= coeff_token>>2;
4436 if(n == LUMA_DC_BLOCK_INDEX){
4437 total_coeff= pred_non_zero_count(h, 0);
4438 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4439 total_coeff= coeff_token>>2;
4441 total_coeff= pred_non_zero_count(h, n);
4442 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4443 total_coeff= coeff_token>>2;
4444 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4448 //FIXME set last_non_zero?
4453 trailing_ones= coeff_token&3;
4454 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4455 assert(total_coeff<=16);
4457 for(i=0; i<trailing_ones; i++){
4458 level[i]= 1 - 2*get_bits1(gb);
4461 suffix_length= total_coeff > 10 && trailing_ones < 3;
4463 for(; i<total_coeff; i++){
4464 const int prefix= get_level_prefix(gb);
4465 int level_code, mask;
4467 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4469 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4471 level_code= (prefix<<suffix_length); //part
4472 }else if(prefix==14){
4474 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4476 level_code= prefix + get_bits(gb, 4); //part
4477 }else if(prefix==15){
4478 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4479 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4481 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4485 if(i==trailing_ones && i<3) level_code+= 2; //FIXME split first iteration
4487 mask= -(level_code&1);
4488 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4490 if(suffix_length==0) suffix_length=1; //FIXME split first iteration
4493 if(ABS(level[i]) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
4495 if((2+level_code)>>1) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
4496 /* ? == prefix > 2 or sth */
4498 tprintf("level: %d suffix_length:%d\n", level[i], suffix_length);
4501 if(total_coeff == max_coeff)
4504 if(n == CHROMA_DC_BLOCK_INDEX)
4505 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4507 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4510 for(i=0; i<total_coeff-1; i++){
4513 else if(zeros_left < 7){
4514 run[i]= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4516 run[i]= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4518 zeros_left -= run[i];
4522 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4526 for(; i<total_coeff-1; i++){
4534 for(i=total_coeff-1; i>=0; i--){ //FIXME merge into rundecode?
4537 coeff_num += run[i] + 1; //FIXME add 1 earlier ?
4538 j= scantable[ coeff_num ];
4543 for(i=total_coeff-1; i>=0; i--){ //FIXME merge into rundecode?
4546 coeff_num += run[i] + 1; //FIXME add 1 earlier ?
4547 j= scantable[ coeff_num ];
4549 block[j]= level[i] * qmul[j];
4550 // printf("%d %d ", block[j], qmul[j]);
4557 * decodes a P_SKIP or B_SKIP macroblock
4559 static void decode_mb_skip(H264Context *h){
4560 MpegEncContext * const s = &h->s;
4561 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4564 memset(h->non_zero_count[mb_xy], 0, 16);
4565 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4567 if(h->mb_aff_frame && s->mb_skip_run==0 && (s->mb_y&1)==0){
4568 h->mb_field_decoding_flag= get_bits1(&s->gb);
4570 if(h->mb_field_decoding_flag)
4571 mb_type|= MB_TYPE_INTERLACED;
4573 if( h->slice_type == B_TYPE )
4575 // just for fill_caches. pred_direct_motion will set the real mb_type
4576 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4578 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4579 pred_direct_motion(h, &mb_type);
4581 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4582 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
4588 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4590 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4591 pred_pskip_motion(h, &mx, &my);
4592 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4593 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4595 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4598 write_back_motion(h, mb_type);
4599 s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP;
4600 s->current_picture.qscale_table[mb_xy]= s->qscale;
4601 h->slice_table[ mb_xy ]= h->slice_num;
4602 h->prev_mb_skipped= 1;
4606 * decodes a macroblock
4607 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4609 static int decode_mb_cavlc(H264Context *h){
4610 MpegEncContext * const s = &h->s;
4611 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4612 int mb_type, partition_count, cbp;
4613 int dct8x8_allowed= h->pps.transform_8x8_mode;
4615 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4617 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4618 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4620 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4621 if(s->mb_skip_run==-1)
4622 s->mb_skip_run= get_ue_golomb(&s->gb);
4624 if (s->mb_skip_run--) {
4629 if(h->mb_aff_frame){
4630 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
4631 h->mb_field_decoding_flag = get_bits1(&s->gb);
4633 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4635 h->prev_mb_skipped= 0;
4637 mb_type= get_ue_golomb(&s->gb);
4638 if(h->slice_type == B_TYPE){
4640 partition_count= b_mb_type_info[mb_type].partition_count;
4641 mb_type= b_mb_type_info[mb_type].type;
4644 goto decode_intra_mb;
4646 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4648 partition_count= p_mb_type_info[mb_type].partition_count;
4649 mb_type= p_mb_type_info[mb_type].type;
4652 goto decode_intra_mb;
4655 assert(h->slice_type == I_TYPE);
4658 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4662 cbp= i_mb_type_info[mb_type].cbp;
4663 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4664 mb_type= i_mb_type_info[mb_type].type;
4667 if(h->mb_field_decoding_flag)
4668 mb_type |= MB_TYPE_INTERLACED;
4670 h->slice_table[ mb_xy ]= h->slice_num;
4672 if(IS_INTRA_PCM(mb_type)){
4675 // we assume these blocks are very rare so we dont optimize it
4676 align_get_bits(&s->gb);
4678 // The pixels are stored in the same order as levels in h->mb array.
4679 for(y=0; y<16; y++){
4680 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4681 for(x=0; x<16; x++){
4682 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4683 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4687 const int index= 256 + 4*(y&3) + 32*(y>>2);
4689 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4690 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4694 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4696 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4697 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4701 // In deblocking, the quantizer is 0
4702 s->current_picture.qscale_table[mb_xy]= 0;
4703 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
4704 // All coeffs are present
4705 memset(h->non_zero_count[mb_xy], 16, 16);
4707 s->current_picture.mb_type[mb_xy]= mb_type;
4711 fill_caches(h, mb_type, 0);
4714 if(IS_INTRA(mb_type)){
4715 // init_top_left_availability(h);
4716 if(IS_INTRA4x4(mb_type)){
4719 if(dct8x8_allowed && get_bits1(&s->gb)){
4720 mb_type |= MB_TYPE_8x8DCT;
4724 // fill_intra4x4_pred_table(h);
4725 for(i=0; i<16; i+=di){
4726 const int mode_coded= !get_bits1(&s->gb);
4727 const int predicted_mode= pred_intra_mode(h, i);
4731 const int rem_mode= get_bits(&s->gb, 3);
4732 if(rem_mode<predicted_mode)
4737 mode= predicted_mode;
4741 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4743 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4745 write_back_intra_pred_mode(h);
4746 if( check_intra4x4_pred_mode(h) < 0)
4749 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4750 if(h->intra16x16_pred_mode < 0)
4753 h->chroma_pred_mode= get_ue_golomb(&s->gb);
4755 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
4756 if(h->chroma_pred_mode < 0)
4758 }else if(partition_count==4){
4759 int i, j, sub_partition_count[4], list, ref[2][4];
4761 if(h->slice_type == B_TYPE){
4763 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4764 if(h->sub_mb_type[i] >=13){
4765 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4768 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4769 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4771 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4772 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3]))
4773 pred_direct_motion(h, &mb_type);
4775 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4777 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4778 if(h->sub_mb_type[i] >=4){
4779 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4782 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4783 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4787 for(list=0; list<2; list++){
4788 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4789 if(ref_count == 0) continue;
4790 if (h->mb_aff_frame && h->mb_field_decoding_flag) {
4794 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4795 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4796 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4805 dct8x8_allowed = get_dct8x8_allowed(h);
4807 for(list=0; list<2; list++){
4808 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4809 if(ref_count == 0) continue;
4812 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4813 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4814 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4816 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4817 const int sub_mb_type= h->sub_mb_type[i];
4818 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4819 for(j=0; j<sub_partition_count[i]; j++){
4821 const int index= 4*i + block_width*j;
4822 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4823 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4824 mx += get_se_golomb(&s->gb);
4825 my += get_se_golomb(&s->gb);
4826 tprintf("final mv:%d %d\n", mx, my);
4828 if(IS_SUB_8X8(sub_mb_type)){
4829 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
4830 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4831 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
4832 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4833 }else if(IS_SUB_8X4(sub_mb_type)){
4834 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
4835 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
4836 }else if(IS_SUB_4X8(sub_mb_type)){
4837 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
4838 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
4840 assert(IS_SUB_4X4(sub_mb_type));
4841 mv_cache[ 0 ][0]= mx;
4842 mv_cache[ 0 ][1]= my;
4846 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4852 }else if(IS_DIRECT(mb_type)){
4853 pred_direct_motion(h, &mb_type);
4854 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4856 int list, mx, my, i;
4857 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4858 if(IS_16X16(mb_type)){
4859 for(list=0; list<2; list++){
4860 if(h->ref_count[list]>0){
4861 if(IS_DIR(mb_type, 0, list)){
4862 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4863 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4865 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
4868 for(list=0; list<2; list++){
4869 if(IS_DIR(mb_type, 0, list)){
4870 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4871 mx += get_se_golomb(&s->gb);
4872 my += get_se_golomb(&s->gb);
4873 tprintf("final mv:%d %d\n", mx, my);
4875 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
4877 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
4880 else if(IS_16X8(mb_type)){
4881 for(list=0; list<2; list++){
4882 if(h->ref_count[list]>0){
4884 if(IS_DIR(mb_type, i, list)){
4885 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4886 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4888 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
4892 for(list=0; list<2; list++){
4894 if(IS_DIR(mb_type, i, list)){
4895 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4896 mx += get_se_golomb(&s->gb);
4897 my += get_se_golomb(&s->gb);
4898 tprintf("final mv:%d %d\n", mx, my);
4900 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
4902 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
4906 assert(IS_8X16(mb_type));
4907 for(list=0; list<2; list++){
4908 if(h->ref_count[list]>0){
4910 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4911 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4912 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4914 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
4918 for(list=0; list<2; list++){
4920 if(IS_DIR(mb_type, i, list)){
4921 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4922 mx += get_se_golomb(&s->gb);
4923 my += get_se_golomb(&s->gb);
4924 tprintf("final mv:%d %d\n", mx, my);
4926 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
4928 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
4934 if(IS_INTER(mb_type))
4935 write_back_motion(h, mb_type);
4937 if(!IS_INTRA16x16(mb_type)){
4938 cbp= get_ue_golomb(&s->gb);
4940 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
4944 if(IS_INTRA4x4(mb_type))
4945 cbp= golomb_to_intra4x4_cbp[cbp];
4947 cbp= golomb_to_inter_cbp[cbp];
4950 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4951 if(get_bits1(&s->gb))
4952 mb_type |= MB_TYPE_8x8DCT;
4954 s->current_picture.mb_type[mb_xy]= mb_type;
4956 if(cbp || IS_INTRA16x16(mb_type)){
4957 int i8x8, i4x4, chroma_idx;
4958 int chroma_qp, dquant;
4959 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4960 const uint8_t *scan, *dc_scan;
4962 // fill_non_zero_count_cache(h);
4964 if(IS_INTERLACED(mb_type)){
4965 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4966 dc_scan= luma_dc_field_scan;
4968 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4969 dc_scan= luma_dc_zigzag_scan;
4972 dquant= get_se_golomb(&s->gb);
4974 if( dquant > 25 || dquant < -26 ){
4975 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4979 s->qscale += dquant;
4980 if(((unsigned)s->qscale) > 51){
4981 if(s->qscale<0) s->qscale+= 52;
4982 else s->qscale-= 52;
4985 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4986 if(IS_INTRA16x16(mb_type)){
4987 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[s->qscale], 16) < 0){
4988 return -1; //FIXME continue if partitioned and other return -1 too
4991 assert((cbp&15) == 0 || (cbp&15) == 15);
4994 for(i8x8=0; i8x8<4; i8x8++){
4995 for(i4x4=0; i4x4<4; i4x4++){
4996 const int index= i4x4 + 4*i8x8;
4997 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[s->qscale], 15) < 0 ){
5003 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5006 for(i8x8=0; i8x8<4; i8x8++){
5007 if(cbp & (1<<i8x8)){
5008 if(IS_8x8DCT(mb_type)){
5009 DCTELEM *buf = &h->mb[64*i8x8];
5011 for(i4x4=0; i4x4<4; i4x4++){
5012 if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4,
5013 h->dequant8_coeff[s->qscale], 16) <0 )
5019 buf[i] = (buf[i] + 2) >> 2;
5021 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5022 nnz[0] |= nnz[1] | nnz[8] | nnz[9];
5024 for(i4x4=0; i4x4<4; i4x4++){
5025 const int index= i4x4 + 4*i8x8;
5027 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[s->qscale], 16) <0 ){
5033 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5034 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5040 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5041 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, h->dequant4_coeff[chroma_qp], 4) < 0){
5047 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5048 for(i4x4=0; i4x4<4; i4x4++){
5049 const int index= 16 + 4*chroma_idx + i4x4;
5050 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_qp], 15) < 0){
5056 uint8_t * const nnz= &h->non_zero_count_cache[0];
5057 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5058 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5061 uint8_t * const nnz= &h->non_zero_count_cache[0];
5062 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5063 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5064 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5066 s->current_picture.qscale_table[mb_xy]= s->qscale;
5067 write_back_non_zero_count(h);
5072 static int decode_cabac_field_decoding_flag(H264Context *h) {
5073 MpegEncContext * const s = &h->s;
5074 const int mb_x = s->mb_x;
5075 const int mb_y = s->mb_y & ~1;
5076 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5077 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5079 unsigned int ctx = 0;
5081 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5084 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5088 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5091 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5092 uint8_t *state= &h->cabac_state[ctx_base];
5096 MpegEncContext * const s = &h->s;
5097 const int mba_xy = h->left_mb_xy[0];
5098 const int mbb_xy = h->top_mb_xy;
5100 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5102 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5104 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5105 return 0; /* I4x4 */
5108 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5109 return 0; /* I4x4 */
5112 if( get_cabac_terminate( &h->cabac ) )
5113 return 25; /* PCM */
5115 mb_type = 1; /* I16x16 */
5116 if( get_cabac( &h->cabac, &state[1] ) )
5117 mb_type += 12; /* cbp_luma != 0 */
5119 if( get_cabac( &h->cabac, &state[2] ) ) {
5120 if( get_cabac( &h->cabac, &state[2+intra_slice] ) )
5121 mb_type += 4 * 2; /* cbp_chroma == 2 */
5123 mb_type += 4 * 1; /* cbp_chroma == 1 */
5125 if( get_cabac( &h->cabac, &state[3+intra_slice] ) )
5127 if( get_cabac( &h->cabac, &state[3+2*intra_slice] ) )
5132 static int decode_cabac_mb_type( H264Context *h ) {
5133 MpegEncContext * const s = &h->s;
5135 if( h->slice_type == I_TYPE ) {
5136 return decode_cabac_intra_mb_type(h, 3, 1);
5137 } else if( h->slice_type == P_TYPE ) {
5138 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5140 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5141 if( get_cabac( &h->cabac, &h->cabac_state[16] ) == 0 )
5142 return 0; /* P_L0_D16x16; */
5144 return 3; /* P_8x8; */
5146 if( get_cabac( &h->cabac, &h->cabac_state[17] ) == 0 )
5147 return 2; /* P_L0_D8x16; */
5149 return 1; /* P_L0_D16x8; */
5152 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5154 } else if( h->slice_type == B_TYPE ) {
5155 const int mba_xy = h->left_mb_xy[0];
5156 const int mbb_xy = h->top_mb_xy;
5160 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] )
5161 && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5163 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )
5164 && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5167 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5168 return 0; /* B_Direct_16x16 */
5170 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5171 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5174 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5175 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5176 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5177 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5179 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5180 else if( bits == 13 ) {
5181 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5182 } else if( bits == 14 )
5183 return 11; /* B_L1_L0_8x16 */
5184 else if( bits == 15 )
5185 return 22; /* B_8x8 */
5187 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5188 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5190 /* TODO SI/SP frames? */
5195 static int decode_cabac_mb_skip( H264Context *h) {
5196 MpegEncContext * const s = &h->s;
5197 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5198 const int mba_xy = mb_xy - 1;
5199 const int mbb_xy = mb_xy - s->mb_stride;
5202 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5204 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5207 if( h->slice_type == P_TYPE || h->slice_type == SP_TYPE)
5208 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5210 return get_cabac( &h->cabac, &h->cabac_state[24+ctx] );
5213 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5216 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5219 if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
5221 if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
5223 if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
5225 if( mode >= pred_mode )
5231 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5232 const int mba_xy = h->left_mb_xy[0];
5233 const int mbb_xy = h->top_mb_xy;
5237 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5238 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5241 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5244 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5247 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5249 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5255 static const uint8_t block_idx_x[16] = {
5256 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5258 static const uint8_t block_idx_y[16] = {
5259 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5261 static const uint8_t block_idx_xy[4][4] = {
5268 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5269 MpegEncContext * const s = &h->s;
5274 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5280 x = block_idx_x[4*i8x8];
5281 y = block_idx_y[4*i8x8];
5285 else if( s->mb_x > 0 && (h->slice_table[h->left_mb_xy[0]] == h->slice_num)) {
5286 cbp_a = h->left_cbp;
5287 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5292 else if( s->mb_y > 0 && (h->slice_table[h->top_mb_xy] == h->slice_num)) {
5294 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5297 /* No need to test for skip as we put 0 for skip block */
5298 /* No need to test for IPCM as we put 1 for IPCM block */
5300 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5301 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5306 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5307 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5311 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5317 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5321 cbp_a = (h->left_cbp>>4)&0x03;
5322 cbp_b = (h-> top_cbp>>4)&0x03;
5325 if( cbp_a > 0 ) ctx++;
5326 if( cbp_b > 0 ) ctx += 2;
5327 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5331 if( cbp_a == 2 ) ctx++;
5332 if( cbp_b == 2 ) ctx += 2;
5333 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5335 static int decode_cabac_mb_dqp( H264Context *h) {
5336 MpegEncContext * const s = &h->s;
5342 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5344 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5346 if( h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) )
5349 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5360 return -(val + 1)/2;
5362 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5363 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5365 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5367 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5371 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5373 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5374 return 0; /* B_Direct_8x8 */
5375 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5376 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5378 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5379 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5380 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5383 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5384 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5388 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5389 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5392 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5393 int refa = h->ref_cache[list][scan8[n] - 1];
5394 int refb = h->ref_cache[list][scan8[n] - 8];
5398 if( h->slice_type == B_TYPE) {
5399 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5401 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5410 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5420 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5421 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5422 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5423 int ctxbase = (l == 0) ? 40 : 47;
5428 else if( amvd > 32 )
5433 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5438 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5446 while( get_cabac_bypass( &h->cabac ) ) {
5451 if( get_cabac_bypass( &h->cabac ) )
5455 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
5459 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5464 nza = h->left_cbp&0x100;
5465 nzb = h-> top_cbp&0x100;
5466 } else if( cat == 1 || cat == 2 ) {
5467 nza = h->non_zero_count_cache[scan8[idx] - 1];
5468 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5469 } else if( cat == 3 ) {
5470 nza = (h->left_cbp>>(6+idx))&0x01;
5471 nzb = (h-> top_cbp>>(6+idx))&0x01;
5474 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5475 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5484 return ctx + 4 * cat;
5487 static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff) {
5488 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5489 static const int significant_coeff_flag_field_offset[2] = { 105, 277 };
5490 static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 };
5491 static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 };
5492 static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 };
5493 static const int coeff_abs_level_m1_offset[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 };
5494 static const int identity[15] = {
5495 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
5497 static const int significant_coeff_flag_offset_8x8[63] = {
5498 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5499 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5500 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5501 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
5503 static const int last_coeff_flag_offset_8x8[63] = {
5504 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5505 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5506 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5507 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5513 int coeff_count = 0;
5516 int abslevelgt1 = 0;
5518 const int* significant_coeff_ctx_offset;
5519 const int* last_coeff_ctx_offset;
5520 const int significant_coeff_ctx_base = significant_coeff_flag_offset[cat]
5521 + significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5522 const int last_coeff_ctx_base = last_significant_coeff_flag_offset[cat]
5523 + last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5525 /* cat: 0-> DC 16x16 n = 0
5526 * 1-> AC 16x16 n = luma4x4idx
5527 * 2-> Luma4x4 n = luma4x4idx
5528 * 3-> DC Chroma n = iCbCr
5529 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5530 * 5-> Luma8x8 n = 4 * luma8x8idx
5533 /* read coded block flag */
5535 significant_coeff_ctx_offset = significant_coeff_flag_offset_8x8;
5536 last_coeff_ctx_offset = last_coeff_flag_offset_8x8;
5538 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5539 if( cat == 1 || cat == 2 )
5540 h->non_zero_count_cache[scan8[n]] = 0;
5542 h->non_zero_count_cache[scan8[16+n]] = 0;
5547 significant_coeff_ctx_offset =
5548 last_coeff_ctx_offset = identity;
5551 for(last= 0; last < max_coeff - 1; last++) {
5552 int sig_ctx = significant_coeff_ctx_base + significant_coeff_ctx_offset[last];
5553 if( get_cabac( &h->cabac, &h->cabac_state[sig_ctx] )) {
5554 int last_ctx = last_coeff_ctx_base + last_coeff_ctx_offset[last];
5555 index[coeff_count++] = last;
5556 if( get_cabac( &h->cabac, &h->cabac_state[last_ctx] ) ) {
5562 if( last == max_coeff -1 ) {
5563 index[coeff_count++] = last;
5565 assert(coeff_count > 0);
5568 h->cbp_table[mb_xy] |= 0x100;
5569 else if( cat == 1 || cat == 2 )
5570 h->non_zero_count_cache[scan8[n]] = coeff_count;
5572 h->cbp_table[mb_xy] |= 0x40 << n;
5574 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5577 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, 1, 1);
5580 for( i = coeff_count - 1; i >= 0; i-- ) {
5581 int ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + coeff_abs_level_m1_offset[cat];
5582 int j= scantable[index[i]];
5584 if( get_cabac( &h->cabac, &h->cabac_state[ctx] ) == 0 ) {
5585 if( cat == 0 || cat == 3 ) {
5586 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
5589 if( get_cabac_bypass( &h->cabac ) ) block[j] = -qmul[j];
5590 else block[j] = qmul[j];
5596 ctx = 5 + FFMIN( 4, abslevelgt1 ) + coeff_abs_level_m1_offset[cat];
5597 while( coeff_abs < 15 && get_cabac( &h->cabac, &h->cabac_state[ctx] ) ) {
5601 if( coeff_abs >= 15 ) {
5603 while( get_cabac_bypass( &h->cabac ) ) {
5604 coeff_abs += 1 << j;
5609 if( get_cabac_bypass( &h->cabac ) )
5610 coeff_abs += 1 << j ;
5614 if( cat == 0 || cat == 3 ) {
5615 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
5616 else block[j] = coeff_abs;
5618 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs * qmul[j];
5619 else block[j] = coeff_abs * qmul[j];
5628 void inline compute_mb_neighboors(H264Context *h)
5630 MpegEncContext * const s = &h->s;
5631 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5632 h->top_mb_xy = mb_xy - s->mb_stride;
5633 h->left_mb_xy[0] = mb_xy - 1;
5634 if(h->mb_aff_frame){
5635 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5636 const int top_pair_xy = pair_xy - s->mb_stride;
5637 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5638 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5639 const int curr_mb_frame_flag = !h->mb_field_decoding_flag;
5640 const int bottom = (s->mb_y & 1);
5642 ? !curr_mb_frame_flag // bottom macroblock
5643 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5645 h->top_mb_xy -= s->mb_stride;
5647 if (left_mb_frame_flag != curr_mb_frame_flag) {
5648 h->left_mb_xy[0] = pair_xy - 1;
5655 * decodes a macroblock
5656 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5658 static int decode_mb_cabac(H264Context *h) {
5659 MpegEncContext * const s = &h->s;
5660 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5661 int mb_type, partition_count, cbp = 0;
5662 int dct8x8_allowed= h->pps.transform_8x8_mode;
5664 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5666 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5667 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5668 /* read skip flags */
5669 if( decode_cabac_mb_skip( h ) ) {
5672 h->cbp_table[mb_xy] = 0;
5673 h->chroma_pred_mode_table[mb_xy] = 0;
5674 h->last_qscale_diff = 0;
5680 if(h->mb_aff_frame){
5681 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
5682 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5684 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5686 h->prev_mb_skipped = 0;
5688 compute_mb_neighboors(h);
5689 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5690 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5694 if( h->slice_type == B_TYPE ) {
5696 partition_count= b_mb_type_info[mb_type].partition_count;
5697 mb_type= b_mb_type_info[mb_type].type;
5700 goto decode_intra_mb;
5702 } else if( h->slice_type == P_TYPE ) {
5704 partition_count= p_mb_type_info[mb_type].partition_count;
5705 mb_type= p_mb_type_info[mb_type].type;
5708 goto decode_intra_mb;
5711 assert(h->slice_type == I_TYPE);
5713 partition_count = 0;
5714 cbp= i_mb_type_info[mb_type].cbp;
5715 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5716 mb_type= i_mb_type_info[mb_type].type;
5718 if(h->mb_field_decoding_flag)
5719 mb_type |= MB_TYPE_INTERLACED;
5721 h->slice_table[ mb_xy ]= h->slice_num;
5723 if(IS_INTRA_PCM(mb_type)) {
5727 // We assume these blocks are very rare so we dont optimize it.
5728 // FIXME The two following lines get the bitstream position in the cabac
5729 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5730 ptr= h->cabac.bytestream;
5731 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
5733 // The pixels are stored in the same order as levels in h->mb array.
5734 for(y=0; y<16; y++){
5735 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5736 for(x=0; x<16; x++){
5737 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
5738 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5742 const int index= 256 + 4*(y&3) + 32*(y>>2);
5744 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5745 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5749 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5751 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5752 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5756 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5758 // All blocks are present
5759 h->cbp_table[mb_xy] = 0x1ef;
5760 h->chroma_pred_mode_table[mb_xy] = 0;
5761 // In deblocking, the quantizer is 0
5762 s->current_picture.qscale_table[mb_xy]= 0;
5763 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5764 // All coeffs are present
5765 memset(h->non_zero_count[mb_xy], 16, 16);
5766 s->current_picture.mb_type[mb_xy]= mb_type;
5770 fill_caches(h, mb_type, 0);
5772 if( IS_INTRA( mb_type ) ) {
5774 if( IS_INTRA4x4( mb_type ) ) {
5775 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5776 mb_type |= MB_TYPE_8x8DCT;
5777 for( i = 0; i < 16; i+=4 ) {
5778 int pred = pred_intra_mode( h, i );
5779 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5780 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5783 for( i = 0; i < 16; i++ ) {
5784 int pred = pred_intra_mode( h, i );
5785 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5787 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5790 write_back_intra_pred_mode(h);
5791 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5793 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5794 if( h->intra16x16_pred_mode < 0 ) return -1;
5796 h->chroma_pred_mode_table[mb_xy] =
5797 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5799 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
5800 if( h->chroma_pred_mode < 0 ) return -1;
5801 } else if( partition_count == 4 ) {
5802 int i, j, sub_partition_count[4], list, ref[2][4];
5804 if( h->slice_type == B_TYPE ) {
5805 for( i = 0; i < 4; i++ ) {
5806 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5807 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5808 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5810 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5811 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5812 pred_direct_motion(h, &mb_type);
5813 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5814 for( i = 0; i < 4; i++ )
5815 if( IS_DIRECT(h->sub_mb_type[i]) )
5816 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5820 for( i = 0; i < 4; i++ ) {
5821 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5822 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5823 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5827 for( list = 0; list < 2; list++ ) {
5828 if( h->ref_count[list] > 0 ) {
5829 for( i = 0; i < 4; i++ ) {
5830 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5831 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5832 if( h->ref_count[list] > 1 )
5833 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5839 h->ref_cache[list][ scan8[4*i]+1 ]=
5840 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5846 dct8x8_allowed = get_dct8x8_allowed(h);
5848 for(list=0; list<2; list++){
5850 if(IS_DIRECT(h->sub_mb_type[i])){
5851 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5854 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5856 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5857 const int sub_mb_type= h->sub_mb_type[i];
5858 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5859 for(j=0; j<sub_partition_count[i]; j++){
5862 const int index= 4*i + block_width*j;
5863 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5864 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5865 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5867 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5868 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5869 tprintf("final mv:%d %d\n", mx, my);
5871 if(IS_SUB_8X8(sub_mb_type)){
5872 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5873 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5874 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5875 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5877 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
5878 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5879 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
5880 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5881 }else if(IS_SUB_8X4(sub_mb_type)){
5882 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5883 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5885 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
5886 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
5887 }else if(IS_SUB_4X8(sub_mb_type)){
5888 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5889 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5891 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
5892 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
5894 assert(IS_SUB_4X4(sub_mb_type));
5895 mv_cache[ 0 ][0]= mx;
5896 mv_cache[ 0 ][1]= my;
5898 mvd_cache[ 0 ][0]= mx - mpx;
5899 mvd_cache[ 0 ][1]= my - mpy;
5903 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5904 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5905 p[0] = p[1] = p[8] = p[9] = 0;
5906 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5910 } else if( IS_DIRECT(mb_type) ) {
5911 pred_direct_motion(h, &mb_type);
5912 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5913 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5914 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5916 int list, mx, my, i, mpx, mpy;
5917 if(IS_16X16(mb_type)){
5918 for(list=0; list<2; list++){
5919 if(IS_DIR(mb_type, 0, list)){
5920 if(h->ref_count[list] > 0 ){
5921 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5922 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5925 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
5927 for(list=0; list<2; list++){
5928 if(IS_DIR(mb_type, 0, list)){
5929 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5931 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5932 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5933 tprintf("final mv:%d %d\n", mx, my);
5935 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5936 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5938 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5941 else if(IS_16X8(mb_type)){
5942 for(list=0; list<2; list++){
5943 if(h->ref_count[list]>0){
5945 if(IS_DIR(mb_type, i, list)){
5946 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5947 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5949 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5953 for(list=0; list<2; list++){
5955 if(IS_DIR(mb_type, i, list)){
5956 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5957 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5958 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5959 tprintf("final mv:%d %d\n", mx, my);
5961 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5962 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5964 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5965 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5970 assert(IS_8X16(mb_type));
5971 for(list=0; list<2; list++){
5972 if(h->ref_count[list]>0){
5974 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5975 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5976 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5978 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5982 for(list=0; list<2; list++){
5984 if(IS_DIR(mb_type, i, list)){
5985 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5986 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5987 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5989 tprintf("final mv:%d %d\n", mx, my);
5990 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5991 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5993 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5994 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6001 if( IS_INTER( mb_type ) ) {
6002 h->chroma_pred_mode_table[mb_xy] = 0;
6003 write_back_motion( h, mb_type );
6006 if( !IS_INTRA16x16( mb_type ) ) {
6007 cbp = decode_cabac_mb_cbp_luma( h );
6008 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6011 h->cbp_table[mb_xy] = cbp;
6013 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6014 if( decode_cabac_mb_transform_size( h ) )
6015 mb_type |= MB_TYPE_8x8DCT;
6017 s->current_picture.mb_type[mb_xy]= mb_type;
6019 if( cbp || IS_INTRA16x16( mb_type ) ) {
6020 const uint8_t *scan, *dc_scan;
6023 if(IS_INTERLACED(mb_type)){
6024 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6025 dc_scan= luma_dc_field_scan;
6027 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6028 dc_scan= luma_dc_zigzag_scan;
6031 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6033 if(((unsigned)s->qscale) > 51){
6034 if(s->qscale<0) s->qscale+= 52;
6035 else s->qscale-= 52;
6037 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6039 if( IS_INTRA16x16( mb_type ) ) {
6041 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6042 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, h->dequant4_coeff[s->qscale], 16) < 0)
6045 for( i = 0; i < 16; i++ ) {
6046 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6047 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[s->qscale], 15) < 0 )
6051 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6055 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6056 if( cbp & (1<<i8x8) ) {
6057 if( IS_8x8DCT(mb_type) ) {
6058 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6059 zigzag_scan8x8, h->dequant8_coeff[s->qscale], 64) < 0 )
6064 h->mb[64*i8x8+i] = (h->mb[64*i8x8+i] + 2) >> 2;
6067 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6068 const int index = 4*i8x8 + i4x4;
6069 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6070 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[s->qscale], 16) < 0 )
6074 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6075 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6082 for( c = 0; c < 2; c++ ) {
6083 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6084 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, h->dequant4_coeff[h->chroma_qp], 4) < 0)
6091 for( c = 0; c < 2; c++ ) {
6092 for( i = 0; i < 4; i++ ) {
6093 const int index = 16 + 4 * c + i;
6094 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6095 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[h->chroma_qp], 15) < 0)
6100 uint8_t * const nnz= &h->non_zero_count_cache[0];
6101 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6102 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6105 uint8_t * const nnz= &h->non_zero_count_cache[0];
6106 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6107 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6108 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6111 s->current_picture.qscale_table[mb_xy]= s->qscale;
6112 write_back_non_zero_count(h);
6118 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6120 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6121 const int alpha = alpha_table[index_a];
6122 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6127 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6128 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6130 /* 16px edge length, because bS=4 is triggered by being at
6131 * the edge of an intra MB, so all 4 bS are the same */
6132 for( d = 0; d < 16; d++ ) {
6133 const int p0 = pix[-1];
6134 const int p1 = pix[-2];
6135 const int p2 = pix[-3];
6137 const int q0 = pix[0];
6138 const int q1 = pix[1];
6139 const int q2 = pix[2];
6141 if( ABS( p0 - q0 ) < alpha &&
6142 ABS( p1 - p0 ) < beta &&
6143 ABS( q1 - q0 ) < beta ) {
6145 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6146 if( ABS( p2 - p0 ) < beta)
6148 const int p3 = pix[-4];
6150 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6151 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6152 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6155 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6157 if( ABS( q2 - q0 ) < beta)
6159 const int q3 = pix[3];
6161 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6162 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6163 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6166 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6170 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6171 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6173 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6179 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6181 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6182 const int alpha = alpha_table[index_a];
6183 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6188 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6189 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6191 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6195 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6197 for( i = 0; i < 16; i++, pix += stride) {
6203 int bS_index = (i >> 1);
6204 if (h->mb_field_decoding_flag) {
6206 bS_index |= (i & 1);
6209 if( bS[bS_index] == 0 ) {
6213 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6214 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6215 alpha = alpha_table[index_a];
6216 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6219 if( bS[bS_index] < 4 ) {
6220 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6221 /* 4px edge length */
6222 const int p0 = pix[-1];
6223 const int p1 = pix[-2];
6224 const int p2 = pix[-3];
6225 const int q0 = pix[0];
6226 const int q1 = pix[1];
6227 const int q2 = pix[2];
6229 if( ABS( p0 - q0 ) < alpha &&
6230 ABS( p1 - p0 ) < beta &&
6231 ABS( q1 - q0 ) < beta ) {
6235 if( ABS( p2 - p0 ) < beta ) {
6236 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6239 if( ABS( q2 - q0 ) < beta ) {
6240 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6244 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6245 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6246 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6247 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6250 /* 4px edge length */
6251 const int p0 = pix[-1];
6252 const int p1 = pix[-2];
6253 const int p2 = pix[-3];
6255 const int q0 = pix[0];
6256 const int q1 = pix[1];
6257 const int q2 = pix[2];
6259 if( ABS( p0 - q0 ) < alpha &&
6260 ABS( p1 - p0 ) < beta &&
6261 ABS( q1 - q0 ) < beta ) {
6263 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6264 if( ABS( p2 - p0 ) < beta)
6266 const int p3 = pix[-4];
6268 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6269 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6270 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6273 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6275 if( ABS( q2 - q0 ) < beta)
6277 const int q3 = pix[3];
6279 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6280 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6281 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6284 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6288 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6289 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6291 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6296 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp[2] ) {
6298 for( i = 0; i < 8; i++, pix += stride) {
6306 if( bS[bS_index] == 0 ) {
6310 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6311 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6312 alpha = alpha_table[index_a];
6313 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6314 if( bS[bS_index] < 4 ) {
6315 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6316 /* 2px edge length (because we use same bS than the one for luma) */
6317 const int p0 = pix[-1];
6318 const int p1 = pix[-2];
6319 const int q0 = pix[0];
6320 const int q1 = pix[1];
6322 if( ABS( p0 - q0 ) < alpha &&
6323 ABS( p1 - p0 ) < beta &&
6324 ABS( q1 - q0 ) < beta ) {
6325 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6327 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6328 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6329 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6332 const int p0 = pix[-1];
6333 const int p1 = pix[-2];
6334 const int q0 = pix[0];
6335 const int q1 = pix[1];
6337 if( ABS( p0 - q0 ) < alpha &&
6338 ABS( p1 - p0 ) < beta &&
6339 ABS( q1 - q0 ) < beta ) {
6341 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6342 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6343 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6349 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6351 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6352 const int alpha = alpha_table[index_a];
6353 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6354 const int pix_next = stride;
6359 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6360 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6362 /* 16px edge length, see filter_mb_edgev */
6363 for( d = 0; d < 16; d++ ) {
6364 const int p0 = pix[-1*pix_next];
6365 const int p1 = pix[-2*pix_next];
6366 const int p2 = pix[-3*pix_next];
6367 const int q0 = pix[0];
6368 const int q1 = pix[1*pix_next];
6369 const int q2 = pix[2*pix_next];
6371 if( ABS( p0 - q0 ) < alpha &&
6372 ABS( p1 - p0 ) < beta &&
6373 ABS( q1 - q0 ) < beta ) {
6375 const int p3 = pix[-4*pix_next];
6376 const int q3 = pix[ 3*pix_next];
6378 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6379 if( ABS( p2 - p0 ) < beta) {
6381 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6382 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6383 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6386 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6388 if( ABS( q2 - q0 ) < beta) {
6390 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6391 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6392 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6395 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6399 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6400 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6402 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6409 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6411 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6412 const int alpha = alpha_table[index_a];
6413 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6418 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6419 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6421 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6425 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6426 MpegEncContext * const s = &h->s;
6427 const int mb_xy= mb_x + mb_y*s->mb_stride;
6428 int first_vertical_edge_done = 0;
6430 /* FIXME: A given frame may occupy more than one position in
6431 * the reference list. So ref2frm should be populated with
6432 * frame numbers, not indices. */
6433 static const int ref2frm[18] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6436 // left mb is in picture
6437 && h->slice_table[mb_xy-1] != 255
6438 // and current and left pair do not have the same interlaced type
6439 && (IS_INTERLACED(s->current_picture.mb_type[mb_xy]) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6440 // and left mb is in the same slice if deblocking_filter == 2
6441 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6442 /* First vertical edge is different in MBAFF frames
6443 * There are 8 different bS to compute and 2 different Qp
6450 first_vertical_edge_done = 1;
6451 for( i = 0; i < 8; i++ ) {
6453 int b_idx= 8 + 4 + 8*y;
6454 int bn_idx= b_idx - 1;
6456 int mbn_xy = h->mb_field_decoding_flag ? h->left_mb_xy[i>>2] : h->left_mb_xy[i&1];
6458 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6459 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6461 } else if( h->non_zero_count_cache[b_idx] != 0 ||
6462 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6463 h->non_zero_count_cache[bn_idx] != 0 ) {
6468 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6469 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6470 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6471 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6478 if(bS[0]+bS[1]+bS[2]+bS[3] != 0) {
6479 // Do not use s->qscale as luma quantizer because it has not the same
6480 // value in IPCM macroblocks.
6481 qp[0] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[0]] + 1 ) >> 1;
6482 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6483 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[0]] ) + 1 ) >> 1;
6484 qp[1] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[1]] + 1 ) >> 1;
6485 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6486 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[1]] ) + 1 ) >> 1;
6489 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
6490 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6491 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6492 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
6493 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
6496 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6497 for( dir = 0; dir < 2; dir++ )
6500 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6501 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6503 if (first_vertical_edge_done) {
6505 first_vertical_edge_done = 0;
6508 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6512 for( edge = start; edge < 4; edge++ ) {
6513 /* mbn_xy: neighbor macroblock */
6514 int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6518 if( (edge&1) && IS_8x8DCT(s->current_picture.mb_type[mb_xy]) )
6521 if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0)
6522 && !IS_INTERLACED(s->current_picture.mb_type[mb_xy])
6523 && IS_INTERLACED(s->current_picture.mb_type[mbn_xy])
6525 // This is a special case in the norm where the filtering must
6526 // be done twice (one each of the field) even if we are in a
6527 // frame macroblock.
6529 unsigned int tmp_linesize = 2 * linesize;
6530 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6531 int mbn_xy = mb_xy - 2 * s->mb_stride;
6535 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6536 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6537 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6543 // Do not use s->qscale as luma quantizer because it has not the same
6544 // value in IPCM macroblocks.
6545 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6546 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6547 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6548 filter_mb_edgeh( h, &img_y[0], tmp_linesize, bS, qp );
6549 chroma_qp = ( h->chroma_qp +
6550 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6551 filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp );
6552 filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp );
6555 mbn_xy += s->mb_stride;
6556 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6557 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6558 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6564 // Do not use s->qscale as luma quantizer because it has not the same
6565 // value in IPCM macroblocks.
6566 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6567 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6568 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6569 filter_mb_edgeh( h, &img_y[linesize], tmp_linesize, bS, qp );
6570 chroma_qp = ( h->chroma_qp +
6571 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6572 filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6573 filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6576 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6577 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6580 if ( (!IS_INTERLACED(s->current_picture.mb_type[mb_xy]) && !IS_INTERLACED(s->current_picture.mb_type[mbm_xy]))
6581 || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6590 bS[0] = bS[1] = bS[2] = bS[3] = value;
6593 for( i = 0; i < 4; i++ ) {
6594 int x = dir == 0 ? edge : i;
6595 int y = dir == 0 ? i : edge;
6596 int b_idx= 8 + 4 + x + 8*y;
6597 int bn_idx= b_idx - (dir ? 8:1);
6599 if( h->non_zero_count_cache[b_idx] != 0 ||
6600 h->non_zero_count_cache[bn_idx] != 0 ) {
6607 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6608 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6609 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6610 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6618 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6623 // Do not use s->qscale as luma quantizer because it has not the same
6624 // value in IPCM macroblocks.
6625 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6626 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6627 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6628 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6630 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6631 if( (edge&1) == 0 ) {
6632 int chroma_qp = ( h->chroma_qp +
6633 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6634 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
6635 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
6638 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6639 if( (edge&1) == 0 ) {
6640 int chroma_qp = ( h->chroma_qp +
6641 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6642 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6643 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6650 static int decode_slice(H264Context *h){
6651 MpegEncContext * const s = &h->s;
6652 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6656 if( h->pps.cabac ) {
6660 align_get_bits( &s->gb );
6663 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
6664 ff_init_cabac_decoder( &h->cabac,
6665 s->gb.buffer + get_bits_count(&s->gb)/8,
6666 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6667 /* calculate pre-state */
6668 for( i= 0; i < 460; i++ ) {
6670 if( h->slice_type == I_TYPE )
6671 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6673 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6676 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6678 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6682 int ret = decode_mb_cabac(h);
6685 if(ret>=0) hl_decode_mb(h);
6687 /* XXX: useless as decode_mb_cabac it doesn't support that ... */
6688 if( ret >= 0 && h->mb_aff_frame ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6691 if(ret>=0) ret = decode_mb_cabac(h);
6696 eos = get_cabac_terminate( &h->cabac );
6698 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
6699 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6700 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6704 if( ++s->mb_x >= s->mb_width ) {
6706 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6708 if(h->mb_aff_frame) {
6713 if( eos || s->mb_y >= s->mb_height ) {
6714 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6715 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6722 int ret = decode_mb_cavlc(h);
6724 if(ret>=0) hl_decode_mb(h);
6726 if(ret>=0 && h->mb_aff_frame){ //FIXME optimal? or let mb_decode decode 16x32 ?
6728 ret = decode_mb_cavlc(h);
6730 if(ret>=0) hl_decode_mb(h);
6735 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6736 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6741 if(++s->mb_x >= s->mb_width){
6743 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6745 if(h->mb_aff_frame) {
6748 if(s->mb_y >= s->mb_height){
6749 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6751 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6752 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6756 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6763 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6764 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6765 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6766 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6770 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6779 for(;s->mb_y < s->mb_height; s->mb_y++){
6780 for(;s->mb_x < s->mb_width; s->mb_x++){
6781 int ret= decode_mb(h);
6786 fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6787 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6792 if(++s->mb_x >= s->mb_width){
6794 if(++s->mb_y >= s->mb_height){
6795 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6796 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6800 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6807 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6808 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6809 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6813 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6820 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6823 return -1; //not reached
6826 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6827 MpegEncContext * const s = &h->s;
6829 cpb_count = get_ue_golomb(&s->gb) + 1;
6830 get_bits(&s->gb, 4); /* bit_rate_scale */
6831 get_bits(&s->gb, 4); /* cpb_size_scale */
6832 for(i=0; i<cpb_count; i++){
6833 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6834 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6835 get_bits1(&s->gb); /* cbr_flag */
6837 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6838 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6839 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6840 get_bits(&s->gb, 5); /* time_offset_length */
6843 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6844 MpegEncContext * const s = &h->s;
6845 int aspect_ratio_info_present_flag, aspect_ratio_idc;
6846 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6848 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6850 if( aspect_ratio_info_present_flag ) {
6851 aspect_ratio_idc= get_bits(&s->gb, 8);
6852 if( aspect_ratio_idc == EXTENDED_SAR ) {
6853 sps->sar.num= get_bits(&s->gb, 16);
6854 sps->sar.den= get_bits(&s->gb, 16);
6855 }else if(aspect_ratio_idc < 16){
6856 sps->sar= pixel_aspect[aspect_ratio_idc];
6858 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6865 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6867 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6868 get_bits1(&s->gb); /* overscan_appropriate_flag */
6871 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6872 get_bits(&s->gb, 3); /* video_format */
6873 get_bits1(&s->gb); /* video_full_range_flag */
6874 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6875 get_bits(&s->gb, 8); /* colour_primaries */
6876 get_bits(&s->gb, 8); /* transfer_characteristics */
6877 get_bits(&s->gb, 8); /* matrix_coefficients */
6881 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6882 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6883 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6886 sps->timing_info_present_flag = get_bits1(&s->gb);
6887 if(sps->timing_info_present_flag){
6888 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6889 sps->time_scale = get_bits_long(&s->gb, 32);
6890 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6893 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6894 if(nal_hrd_parameters_present_flag)
6895 decode_hrd_parameters(h, sps);
6896 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6897 if(vcl_hrd_parameters_present_flag)
6898 decode_hrd_parameters(h, sps);
6899 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6900 get_bits1(&s->gb); /* low_delay_hrd_flag */
6901 get_bits1(&s->gb); /* pic_struct_present_flag */
6903 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6904 if(sps->bitstream_restriction_flag){
6905 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6906 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6907 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6908 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6909 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6910 sps->num_reorder_frames = get_ue_golomb(&s->gb);
6911 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
6917 static inline int decode_seq_parameter_set(H264Context *h){
6918 MpegEncContext * const s = &h->s;
6919 int profile_idc, level_idc;
6923 profile_idc= get_bits(&s->gb, 8);
6924 get_bits1(&s->gb); //constraint_set0_flag
6925 get_bits1(&s->gb); //constraint_set1_flag
6926 get_bits1(&s->gb); //constraint_set2_flag
6927 get_bits1(&s->gb); //constraint_set3_flag
6928 get_bits(&s->gb, 4); // reserved
6929 level_idc= get_bits(&s->gb, 8);
6930 sps_id= get_ue_golomb(&s->gb);
6932 sps= &h->sps_buffer[ sps_id ];
6933 sps->profile_idc= profile_idc;
6934 sps->level_idc= level_idc;
6936 if(sps->profile_idc >= 100){ //high profile
6937 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
6938 get_bits1(&s->gb); //residual_color_transform_flag
6939 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
6940 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
6941 sps->transform_bypass = get_bits1(&s->gb);
6942 if(get_bits1(&s->gb)){ //seq_scaling_matrix_present_flag
6943 av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n");
6948 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
6949 sps->poc_type= get_ue_golomb(&s->gb);
6951 if(sps->poc_type == 0){ //FIXME #define
6952 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
6953 } else if(sps->poc_type == 1){//FIXME #define
6954 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
6955 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
6956 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
6957 sps->poc_cycle_length= get_ue_golomb(&s->gb);
6959 for(i=0; i<sps->poc_cycle_length; i++)
6960 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
6962 if(sps->poc_type > 2){
6963 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
6967 sps->ref_frame_count= get_ue_golomb(&s->gb);
6968 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
6969 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
6971 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
6972 sps->mb_width= get_ue_golomb(&s->gb) + 1;
6973 sps->mb_height= get_ue_golomb(&s->gb) + 1;
6974 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
6975 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
6978 sps->frame_mbs_only_flag= get_bits1(&s->gb);
6979 if(!sps->frame_mbs_only_flag)
6980 sps->mb_aff= get_bits1(&s->gb);
6984 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
6986 sps->crop= get_bits1(&s->gb);
6988 sps->crop_left = get_ue_golomb(&s->gb);
6989 sps->crop_right = get_ue_golomb(&s->gb);
6990 sps->crop_top = get_ue_golomb(&s->gb);
6991 sps->crop_bottom= get_ue_golomb(&s->gb);
6992 if(sps->crop_left || sps->crop_top){
6993 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
6999 sps->crop_bottom= 0;
7002 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7003 if( sps->vui_parameters_present_flag )
7004 decode_vui_parameters(h, sps);
7006 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7007 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7008 sps_id, sps->profile_idc, sps->level_idc,
7010 sps->ref_frame_count,
7011 sps->mb_width, sps->mb_height,
7012 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7013 sps->direct_8x8_inference_flag ? "8B8" : "",
7014 sps->crop_left, sps->crop_right,
7015 sps->crop_top, sps->crop_bottom,
7016 sps->vui_parameters_present_flag ? "VUI" : ""
7022 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7023 MpegEncContext * const s = &h->s;
7024 int pps_id= get_ue_golomb(&s->gb);
7025 PPS *pps= &h->pps_buffer[pps_id];
7027 pps->sps_id= get_ue_golomb(&s->gb);
7028 pps->cabac= get_bits1(&s->gb);
7029 pps->pic_order_present= get_bits1(&s->gb);
7030 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7031 if(pps->slice_group_count > 1 ){
7032 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7033 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7034 switch(pps->mb_slice_group_map_type){
7037 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7038 | run_length[ i ] |1 |ue(v) |
7043 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7045 | top_left_mb[ i ] |1 |ue(v) |
7046 | bottom_right_mb[ i ] |1 |ue(v) |
7054 | slice_group_change_direction_flag |1 |u(1) |
7055 | slice_group_change_rate_minus1 |1 |ue(v) |
7060 | slice_group_id_cnt_minus1 |1 |ue(v) |
7061 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7063 | slice_group_id[ i ] |1 |u(v) |
7068 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7069 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7070 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7071 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7075 pps->weighted_pred= get_bits1(&s->gb);
7076 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7077 pps->init_qp= get_se_golomb(&s->gb) + 26;
7078 pps->init_qs= get_se_golomb(&s->gb) + 26;
7079 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7080 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7081 pps->constrained_intra_pred= get_bits1(&s->gb);
7082 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7084 if(get_bits_count(&s->gb) < bit_length){
7085 pps->transform_8x8_mode= get_bits1(&s->gb);
7086 if(get_bits1(&s->gb)){ //pic_scaling_matrix_present_flag
7087 av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n");
7090 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7093 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7094 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7095 pps_id, pps->sps_id,
7096 pps->cabac ? "CABAC" : "CAVLC",
7097 pps->slice_group_count,
7098 pps->ref_count[0], pps->ref_count[1],
7099 pps->weighted_pred ? "weighted" : "",
7100 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7101 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7102 pps->constrained_intra_pred ? "CONSTR" : "",
7103 pps->redundant_pic_cnt_present ? "REDU" : "",
7104 pps->transform_8x8_mode ? "8x8DCT" : ""
7112 * finds the end of the current frame in the bitstream.
7113 * @return the position of the first byte of the next frame, or -1
7115 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7118 ParseContext *pc = &(h->s.parse_context);
7119 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7120 // mb_addr= pc->mb_addr - 1;
7122 for(i=0; i<=buf_size; i++){
7123 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7124 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7125 if(pc->frame_start_found){
7126 // If there isn't one more byte in the buffer
7127 // the test on first_mb_in_slice cannot be done yet
7128 // do it at next call.
7129 if (i >= buf_size) break;
7130 if (buf[i] & 0x80) {
7131 // first_mb_in_slice is 0, probably the first nal of a new
7133 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7135 pc->frame_start_found= 0;
7139 pc->frame_start_found = 1;
7141 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7142 if(pc->frame_start_found){
7144 pc->frame_start_found= 0;
7149 state= (state<<8) | buf[i];
7153 return END_NOT_FOUND;
7156 static int h264_parse(AVCodecParserContext *s,
7157 AVCodecContext *avctx,
7158 uint8_t **poutbuf, int *poutbuf_size,
7159 const uint8_t *buf, int buf_size)
7161 H264Context *h = s->priv_data;
7162 ParseContext *pc = &h->s.parse_context;
7165 next= find_frame_end(h, buf, buf_size);
7167 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
7173 *poutbuf = (uint8_t *)buf;
7174 *poutbuf_size = buf_size;
7178 static int h264_split(AVCodecContext *avctx,
7179 const uint8_t *buf, int buf_size)
7182 uint32_t state = -1;
7185 for(i=0; i<=buf_size; i++){
7186 if((state&0xFFFFFF1F) == 0x107)
7188 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7190 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
7192 while(i>4 && buf[i-5]==0) i--;
7197 state= (state<<8) | buf[i];
7203 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7204 MpegEncContext * const s = &h->s;
7205 AVCodecContext * const avctx= s->avctx;
7209 for(i=0; i<32; i++){
7210 printf("%X ", buf[i]);
7214 s->current_picture_ptr= NULL;
7223 if(buf_index >= buf_size) break;
7225 for(i = 0; i < h->nal_length_size; i++)
7226 nalsize = (nalsize << 8) | buf[buf_index++];
7228 // start code prefix search
7229 for(; buf_index + 3 < buf_size; buf_index++){
7230 // this should allways succeed in the first iteration
7231 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7235 if(buf_index+3 >= buf_size) break;
7240 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7241 if(ptr[dst_length - 1] == 0) dst_length--;
7242 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
7244 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7245 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7248 if (h->is_avc && (nalsize != consumed))
7249 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7251 buf_index += consumed;
7253 if( (s->hurry_up == 1 && h->nal_ref_idc == 0)
7254 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7257 switch(h->nal_unit_type){
7259 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7261 init_get_bits(&s->gb, ptr, bit_length);
7263 h->inter_gb_ptr= &s->gb;
7264 s->data_partitioning = 0;
7266 if(decode_slice_header(h) < 0){
7267 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7270 if(h->redundant_pic_count==0 && s->hurry_up < 5
7271 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7272 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7273 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7274 && avctx->skip_frame < AVDISCARD_ALL)
7278 init_get_bits(&s->gb, ptr, bit_length);
7280 h->inter_gb_ptr= NULL;
7281 s->data_partitioning = 1;
7283 if(decode_slice_header(h) < 0){
7284 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7288 init_get_bits(&h->intra_gb, ptr, bit_length);
7289 h->intra_gb_ptr= &h->intra_gb;
7292 init_get_bits(&h->inter_gb, ptr, bit_length);
7293 h->inter_gb_ptr= &h->inter_gb;
7295 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
7297 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
7298 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
7299 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7300 && avctx->skip_frame < AVDISCARD_ALL)
7306 init_get_bits(&s->gb, ptr, bit_length);
7307 decode_seq_parameter_set(h);
7309 if(s->flags& CODEC_FLAG_LOW_DELAY)
7312 if(avctx->has_b_frames < 2)
7313 avctx->has_b_frames= !s->low_delay;
7316 init_get_bits(&s->gb, ptr, bit_length);
7318 decode_picture_parameter_set(h, bit_length);
7321 case NAL_PICTURE_DELIMITER:
7323 case NAL_FILTER_DATA:
7326 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
7330 if(!s->current_picture_ptr) return buf_index; //no frame
7332 s->current_picture_ptr->pict_type= s->pict_type;
7333 s->current_picture_ptr->key_frame= s->pict_type == I_TYPE && h->nal_unit_type == NAL_IDR_SLICE;
7335 h->prev_frame_num_offset= h->frame_num_offset;
7336 h->prev_frame_num= h->frame_num;
7337 if(s->current_picture_ptr->reference){
7338 h->prev_poc_msb= h->poc_msb;
7339 h->prev_poc_lsb= h->poc_lsb;
7341 if(s->current_picture_ptr->reference)
7342 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7352 * returns the number of bytes consumed for building the current frame
7354 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7355 if(s->flags&CODEC_FLAG_TRUNCATED){
7356 pos -= s->parse_context.last_index;
7357 if(pos<0) pos=0; // FIXME remove (unneeded?)
7361 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
7362 if(pos+10>buf_size) pos=buf_size; // oops ;)
7368 static int decode_frame(AVCodecContext *avctx,
7369 void *data, int *data_size,
7370 uint8_t *buf, int buf_size)
7372 H264Context *h = avctx->priv_data;
7373 MpegEncContext *s = &h->s;
7374 AVFrame *pict = data;
7377 s->flags= avctx->flags;
7378 s->flags2= avctx->flags2;
7380 /* no supplementary picture */
7381 if (buf_size == 0) {
7385 if(s->flags&CODEC_FLAG_TRUNCATED){
7386 int next= find_frame_end(h, buf, buf_size);
7388 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7390 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7393 if(h->is_avc && !h->got_avcC) {
7394 int i, cnt, nalsize;
7395 unsigned char *p = avctx->extradata;
7396 if(avctx->extradata_size < 7) {
7397 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7401 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7404 /* sps and pps in the avcC always have length coded with 2 bytes,
7405 so put a fake nal_length_size = 2 while parsing them */
7406 h->nal_length_size = 2;
7407 // Decode sps from avcC
7408 cnt = *(p+5) & 0x1f; // Number of sps
7410 for (i = 0; i < cnt; i++) {
7411 nalsize = BE_16(p) + 2;
7412 if(decode_nal_units(h, p, nalsize) != nalsize) {
7413 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7418 // Decode pps from avcC
7419 cnt = *(p++); // Number of pps
7420 for (i = 0; i < cnt; i++) {
7421 nalsize = BE_16(p) + 2;
7422 if(decode_nal_units(h, p, nalsize) != nalsize) {
7423 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7428 // Now store right nal length size, that will be use to parse all other nals
7429 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7430 // Do not reparse avcC
7434 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
7435 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7439 buf_index=decode_nal_units(h, buf, buf_size);
7443 //FIXME do something with unavailable reference frames
7445 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
7446 if(!s->current_picture_ptr){
7447 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
7452 Picture *out = s->current_picture_ptr;
7453 #if 0 //decode order
7454 *data_size = sizeof(AVFrame);
7456 /* Sort B-frames into display order */
7457 Picture *cur = s->current_picture_ptr;
7458 Picture *prev = h->delayed_output_pic;
7463 int dropped_frame = 0;
7466 if(h->sps.bitstream_restriction_flag
7467 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7468 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7472 while(h->delayed_pic[pics]) pics++;
7473 h->delayed_pic[pics++] = cur;
7474 if(cur->reference == 0)
7477 for(i=0; h->delayed_pic[i]; i++)
7478 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7481 out = h->delayed_pic[0];
7482 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7483 if(h->delayed_pic[i]->poc < out->poc){
7484 out = h->delayed_pic[i];
7488 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7489 if(prev && pics <= s->avctx->has_b_frames)
7491 else if((out_of_order && pics-1 == s->avctx->has_b_frames)
7493 ((!cross_idr && prev && out->poc > prev->poc + 2)
7494 || cur->pict_type == B_TYPE)))
7497 s->avctx->has_b_frames++;
7500 else if(out_of_order)
7503 if(out_of_order || pics > s->avctx->has_b_frames){
7504 dropped_frame = (out != h->delayed_pic[out_idx]);
7505 for(i=out_idx; h->delayed_pic[i]; i++)
7506 h->delayed_pic[i] = h->delayed_pic[i+1];
7509 if(prev == out && !dropped_frame)
7512 *data_size = sizeof(AVFrame);
7513 if(prev && prev != out && prev->reference == 1)
7514 prev->reference = 0;
7515 h->delayed_output_pic = out;
7518 *pict= *(AVFrame*)out;
7521 assert(pict->data[0]);
7522 ff_print_debug_info(s, pict);
7523 //printf("out %d\n", (int)pict->data[0]);
7526 /* Return the Picture timestamp as the frame number */
7527 /* we substract 1 because it is added on utils.c */
7528 avctx->frame_number = s->picture_number - 1;
7530 return get_consumed_bytes(s, buf_index, buf_size);
7533 static inline void fill_mb_avail(H264Context *h){
7534 MpegEncContext * const s = &h->s;
7535 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7538 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7539 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7540 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7546 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7547 h->mb_avail[4]= 1; //FIXME move out
7548 h->mb_avail[5]= 0; //FIXME move out
7554 #define SIZE (COUNT*40)
7560 // int int_temp[10000];
7562 AVCodecContext avctx;
7564 dsputil_init(&dsp, &avctx);
7566 init_put_bits(&pb, temp, SIZE);
7567 printf("testing unsigned exp golomb\n");
7568 for(i=0; i<COUNT; i++){
7570 set_ue_golomb(&pb, i);
7571 STOP_TIMER("set_ue_golomb");
7573 flush_put_bits(&pb);
7575 init_get_bits(&gb, temp, 8*SIZE);
7576 for(i=0; i<COUNT; i++){
7579 s= show_bits(&gb, 24);
7582 j= get_ue_golomb(&gb);
7584 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7587 STOP_TIMER("get_ue_golomb");
7591 init_put_bits(&pb, temp, SIZE);
7592 printf("testing signed exp golomb\n");
7593 for(i=0; i<COUNT; i++){
7595 set_se_golomb(&pb, i - COUNT/2);
7596 STOP_TIMER("set_se_golomb");
7598 flush_put_bits(&pb);
7600 init_get_bits(&gb, temp, 8*SIZE);
7601 for(i=0; i<COUNT; i++){
7604 s= show_bits(&gb, 24);
7607 j= get_se_golomb(&gb);
7608 if(j != i - COUNT/2){
7609 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7612 STOP_TIMER("get_se_golomb");
7615 printf("testing 4x4 (I)DCT\n");
7618 uint8_t src[16], ref[16];
7619 uint64_t error= 0, max_error=0;
7621 for(i=0; i<COUNT; i++){
7623 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7624 for(j=0; j<16; j++){
7625 ref[j]= random()%255;
7626 src[j]= random()%255;
7629 h264_diff_dct_c(block, src, ref, 4);
7632 for(j=0; j<16; j++){
7633 // printf("%d ", block[j]);
7634 block[j]= block[j]*4;
7635 if(j&1) block[j]= (block[j]*4 + 2)/5;
7636 if(j&4) block[j]= (block[j]*4 + 2)/5;
7640 s->dsp.h264_idct_add(ref, block, 4);
7641 /* for(j=0; j<16; j++){
7642 printf("%d ", ref[j]);
7646 for(j=0; j<16; j++){
7647 int diff= ABS(src[j] - ref[j]);
7650 max_error= FFMAX(max_error, diff);
7653 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7655 printf("testing quantizer\n");
7656 for(qp=0; qp<52; qp++){
7658 src1_block[i]= src2_block[i]= random()%255;
7662 printf("Testing NAL layer\n");
7664 uint8_t bitstream[COUNT];
7665 uint8_t nal[COUNT*2];
7667 memset(&h, 0, sizeof(H264Context));
7669 for(i=0; i<COUNT; i++){
7677 for(j=0; j<COUNT; j++){
7678 bitstream[j]= (random() % 255) + 1;
7681 for(j=0; j<zeros; j++){
7682 int pos= random() % COUNT;
7683 while(bitstream[pos] == 0){
7692 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7694 printf("encoding failed\n");
7698 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7702 if(out_length != COUNT){
7703 printf("incorrect length %d %d\n", out_length, COUNT);
7707 if(consumed != nal_length){
7708 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7712 if(memcmp(bitstream, out, COUNT)){
7713 printf("missmatch\n");
7718 printf("Testing RBSP\n");
7726 static int decode_end(AVCodecContext *avctx)
7728 H264Context *h = avctx->priv_data;
7729 MpegEncContext *s = &h->s;
7731 free_tables(h); //FIXME cleanup init stuff perhaps
7734 // memset(h, 0, sizeof(H264Context));
7740 AVCodec h264_decoder = {
7744 sizeof(H264Context),
7749 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
7753 AVCodecParser h264_parser = {
7755 sizeof(H264Context),