2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
58 * Sequence parameter set
64 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
65 int poc_type; ///< pic_order_cnt_type
66 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
67 int delta_pic_order_always_zero_flag;
68 int offset_for_non_ref_pic;
69 int offset_for_top_to_bottom_field;
70 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
71 int ref_frame_count; ///< num_ref_frames
72 int gaps_in_frame_num_allowed_flag;
73 int mb_width; ///< frame_width_in_mbs_minus1 + 1
74 int mb_height; ///< frame_height_in_mbs_minus1 + 1
75 int frame_mbs_only_flag;
76 int mb_aff; ///<mb_adaptive_frame_field_flag
77 int direct_8x8_inference_flag;
78 int crop; ///< frame_cropping_flag
79 int crop_left; ///< frame_cropping_rect_left_offset
80 int crop_right; ///< frame_cropping_rect_right_offset
81 int crop_top; ///< frame_cropping_rect_top_offset
82 int crop_bottom; ///< frame_cropping_rect_bottom_offset
83 int vui_parameters_present_flag;
85 int timing_info_present_flag;
86 uint32_t num_units_in_tick;
88 int fixed_frame_rate_flag;
89 short offset_for_ref_frame[256]; //FIXME dyn aloc?
90 int bitstream_restriction_flag;
91 int num_reorder_frames;
95 * Picture parameter set
99 int cabac; ///< entropy_coding_mode_flag
100 int pic_order_present; ///< pic_order_present_flag
101 int slice_group_count; ///< num_slice_groups_minus1 + 1
102 int mb_slice_group_map_type;
103 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
104 int weighted_pred; ///< weighted_pred_flag
105 int weighted_bipred_idc;
106 int init_qp; ///< pic_init_qp_minus26 + 26
107 int init_qs; ///< pic_init_qs_minus26 + 26
108 int chroma_qp_index_offset;
109 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
110 int constrained_intra_pred; ///< constrained_intra_pred_flag
111 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
112 int transform_8x8_mode; ///< transform_8x8_mode_flag
116 * Memory management control operation opcode.
118 typedef enum MMCOOpcode{
129 * Memory management control operation.
140 typedef struct H264Context{
148 #define NAL_IDR_SLICE 5
152 #define NAL_PICTURE_DELIMITER 9
153 #define NAL_FILTER_DATA 10
154 uint8_t *rbsp_buffer;
155 int rbsp_buffer_size;
158 * Used to parse AVC variant of h264
160 int is_avc; ///< this flag is != 0 if codec is avc1
161 int got_avcC; ///< flag used to parse avcC data only once
162 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
166 int prev_mb_skipped; //FIXME remove (IMHO not used)
169 int chroma_pred_mode;
170 int intra16x16_pred_mode;
175 int8_t intra4x4_pred_mode_cache[5*8];
176 int8_t (*intra4x4_pred_mode)[8];
177 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
178 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
179 void (*pred8x8 [4+3])(uint8_t *src, int stride);
180 void (*pred16x16[4+3])(uint8_t *src, int stride);
181 unsigned int topleft_samples_available;
182 unsigned int top_samples_available;
183 unsigned int topright_samples_available;
184 unsigned int left_samples_available;
185 uint8_t (*top_borders[2])[16+2*8];
186 uint8_t left_border[2*(17+2*9)];
189 * non zero coeff count cache.
190 * is 64 if not available.
192 uint8_t non_zero_count_cache[6*8] __align8;
193 uint8_t (*non_zero_count)[16];
196 * Motion vector cache.
198 int16_t mv_cache[2][5*8][2] __align8;
199 int8_t ref_cache[2][5*8] __align8;
200 #define LIST_NOT_USED -1 //FIXME rename?
201 #define PART_NOT_AVAILABLE -2
204 * is 1 if the specific list MV&references are set to 0,0,-2.
206 int mv_cache_clean[2];
209 * number of neighbors (top and/or left) that used 8x8 dct
211 int neighbor_transform_size;
214 * block_offset[ 0..23] for frame macroblocks
215 * block_offset[24..47] for field macroblocks
217 int block_offset[2*(16+8)];
219 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
221 int b_stride; //FIXME use s->b4_stride
227 int unknown_svq3_flag;
228 int next_slice_index;
230 SPS sps_buffer[MAX_SPS_COUNT];
231 SPS sps; ///< current sps
233 PPS pps_buffer[MAX_PPS_COUNT];
237 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
239 uint16_t (*dequant8_coeff)[64];
242 uint8_t *slice_table_base;
243 uint8_t *slice_table; ///< slice_table_base + mb_stride + 1
245 int slice_type_fixed;
247 //interlacing specific flags
249 int mb_field_decoding_flag;
256 int delta_poc_bottom;
259 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
260 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
261 int frame_num_offset; ///< for POC type 2
262 int prev_frame_num_offset; ///< for POC type 2
263 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
266 * frame_num for frames or 2*frame_num for field pics.
271 * max_frame_num or 2*max_frame_num for field pics.
275 //Weighted pred stuff
277 int use_weight_chroma;
278 int luma_log2_weight_denom;
279 int chroma_log2_weight_denom;
280 int luma_weight[2][16];
281 int luma_offset[2][16];
282 int chroma_weight[2][16][2];
283 int chroma_offset[2][16][2];
284 int implicit_weight[16][16];
287 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
288 int slice_alpha_c0_offset;
289 int slice_beta_offset;
291 int redundant_pic_count;
293 int direct_spatial_mv_pred;
294 int dist_scale_factor[16];
295 int map_col_to_list0[2][16];
298 * num_ref_idx_l0/1_active_minus1 + 1
300 int ref_count[2];// FIXME split for AFF
301 Picture *short_ref[32];
302 Picture *long_ref[32];
303 Picture default_ref_list[2][32];
304 Picture ref_list[2][32]; //FIXME size?
305 Picture field_ref_list[2][32]; //FIXME size?
306 Picture *delayed_pic[16]; //FIXME size?
307 Picture *delayed_output_pic;
310 * memory management control operations buffer.
312 MMCO mmco[MAX_MMCO_COUNT];
315 int long_ref_count; ///< number of actual long term references
316 int short_ref_count; ///< number of actual short term references
319 GetBitContext intra_gb;
320 GetBitContext inter_gb;
321 GetBitContext *intra_gb_ptr;
322 GetBitContext *inter_gb_ptr;
324 DCTELEM mb[16*24] __align8;
330 uint8_t cabac_state[460];
333 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
337 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
338 uint8_t *chroma_pred_mode_table;
339 int last_qscale_diff;
340 int16_t (*mvd_table[2])[2];
341 int16_t mvd_cache[2][5*8][2] __align8;
342 uint8_t *direct_table;
343 uint8_t direct_cache[5*8];
345 uint8_t zigzag_scan[16];
346 uint8_t field_scan[16];
349 static VLC coeff_token_vlc[4];
350 static VLC chroma_dc_coeff_token_vlc;
352 static VLC total_zeros_vlc[15];
353 static VLC chroma_dc_total_zeros_vlc[3];
355 static VLC run_vlc[6];
358 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
359 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
360 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
362 static inline uint32_t pack16to32(int a, int b){
363 #ifdef WORDS_BIGENDIAN
364 return (b&0xFFFF) + (a<<16);
366 return (a&0xFFFF) + (b<<16);
372 * @param h height of the rectangle, should be a constant
373 * @param w width of the rectangle, should be a constant
374 * @param size the size of val (1 or 4), should be a constant
376 static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
377 uint8_t *p= (uint8_t*)vp;
378 assert(size==1 || size==4);
383 assert((((int)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
384 assert((stride&(w-1))==0);
385 //FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it
388 *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
389 }else if(w==2 && h==4){
390 *(uint16_t*)(p + 0*stride)=
391 *(uint16_t*)(p + 1*stride)=
392 *(uint16_t*)(p + 2*stride)=
393 *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
394 }else if(w==4 && h==1){
395 *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
396 }else if(w==4 && h==2){
397 *(uint32_t*)(p + 0*stride)=
398 *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
399 }else if(w==4 && h==4){
400 *(uint32_t*)(p + 0*stride)=
401 *(uint32_t*)(p + 1*stride)=
402 *(uint32_t*)(p + 2*stride)=
403 *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
404 }else if(w==8 && h==1){
406 *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
407 }else if(w==8 && h==2){
408 *(uint32_t*)(p + 0 + 0*stride)=
409 *(uint32_t*)(p + 4 + 0*stride)=
410 *(uint32_t*)(p + 0 + 1*stride)=
411 *(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101;
412 }else if(w==8 && h==4){
413 *(uint64_t*)(p + 0*stride)=
414 *(uint64_t*)(p + 1*stride)=
415 *(uint64_t*)(p + 2*stride)=
416 *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
417 }else if(w==16 && h==2){
418 *(uint64_t*)(p + 0+0*stride)=
419 *(uint64_t*)(p + 8+0*stride)=
420 *(uint64_t*)(p + 0+1*stride)=
421 *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
422 }else if(w==16 && h==4){
423 *(uint64_t*)(p + 0+0*stride)=
424 *(uint64_t*)(p + 8+0*stride)=
425 *(uint64_t*)(p + 0+1*stride)=
426 *(uint64_t*)(p + 8+1*stride)=
427 *(uint64_t*)(p + 0+2*stride)=
428 *(uint64_t*)(p + 8+2*stride)=
429 *(uint64_t*)(p + 0+3*stride)=
430 *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
435 static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
436 MpegEncContext * const s = &h->s;
437 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
438 int topleft_xy, top_xy, topright_xy, left_xy[2];
439 int topleft_type, top_type, topright_type, left_type[2];
443 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
444 // the actual condition is whether we're on the edge of a slice,
445 // and even then the intra and nnz parts are unnecessary.
446 if(for_deblock && h->slice_num == 1)
449 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
451 top_xy = mb_xy - s->mb_stride;
452 topleft_xy = top_xy - 1;
453 topright_xy= top_xy + 1;
454 left_xy[1] = left_xy[0] = mb_xy-1;
464 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
465 const int top_pair_xy = pair_xy - s->mb_stride;
466 const int topleft_pair_xy = top_pair_xy - 1;
467 const int topright_pair_xy = top_pair_xy + 1;
468 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
469 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
470 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
471 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
472 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
473 const int bottom = (s->mb_y & 1);
474 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
476 ? !curr_mb_frame_flag // bottom macroblock
477 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
479 top_xy -= s->mb_stride;
482 ? !curr_mb_frame_flag // bottom macroblock
483 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
485 topleft_xy -= s->mb_stride;
488 ? !curr_mb_frame_flag // bottom macroblock
489 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
491 topright_xy -= s->mb_stride;
493 if (left_mb_frame_flag != curr_mb_frame_flag) {
494 left_xy[1] = left_xy[0] = pair_xy - 1;
495 if (curr_mb_frame_flag) {
516 left_xy[1] += s->mb_stride;
529 h->top_mb_xy = top_xy;
530 h->left_mb_xy[0] = left_xy[0];
531 h->left_mb_xy[1] = left_xy[1];
533 topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
534 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
535 topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
536 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
537 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
539 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
540 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
541 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
542 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
543 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
546 if(IS_INTRA(mb_type)){
547 h->topleft_samples_available=
548 h->top_samples_available=
549 h->left_samples_available= 0xFFFF;
550 h->topright_samples_available= 0xEEEA;
552 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
553 h->topleft_samples_available= 0xB3FF;
554 h->top_samples_available= 0x33FF;
555 h->topright_samples_available= 0x26EA;
558 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
559 h->topleft_samples_available&= 0xDF5F;
560 h->left_samples_available&= 0x5F5F;
564 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
565 h->topleft_samples_available&= 0x7FFF;
567 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
568 h->topright_samples_available&= 0xFBFF;
570 if(IS_INTRA4x4(mb_type)){
571 if(IS_INTRA4x4(top_type)){
572 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
573 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
574 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
575 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
578 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
583 h->intra4x4_pred_mode_cache[4+8*0]=
584 h->intra4x4_pred_mode_cache[5+8*0]=
585 h->intra4x4_pred_mode_cache[6+8*0]=
586 h->intra4x4_pred_mode_cache[7+8*0]= pred;
589 if(IS_INTRA4x4(left_type[i])){
590 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
591 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
594 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
599 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
600 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
615 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
617 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
618 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
619 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
620 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
622 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
623 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
625 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
626 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
629 h->non_zero_count_cache[4+8*0]=
630 h->non_zero_count_cache[5+8*0]=
631 h->non_zero_count_cache[6+8*0]=
632 h->non_zero_count_cache[7+8*0]=
634 h->non_zero_count_cache[1+8*0]=
635 h->non_zero_count_cache[2+8*0]=
637 h->non_zero_count_cache[1+8*3]=
638 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
642 for (i=0; i<2; i++) {
644 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
645 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
646 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
647 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
649 h->non_zero_count_cache[3+8*1 + 2*8*i]=
650 h->non_zero_count_cache[3+8*2 + 2*8*i]=
651 h->non_zero_count_cache[0+8*1 + 8*i]=
652 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
659 h->top_cbp = h->cbp_table[top_xy];
660 } else if(IS_INTRA(mb_type)) {
667 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
668 } else if(IS_INTRA(mb_type)) {
674 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
677 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
682 //FIXME direct mb can skip much of this
683 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
685 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
686 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
687 /*if(!h->mv_cache_clean[list]){
688 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
689 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
690 h->mv_cache_clean[list]= 1;
694 h->mv_cache_clean[list]= 0;
696 if(IS_INTER(top_type)){
697 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
698 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
699 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
700 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
701 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
702 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
703 h->ref_cache[list][scan8[0] + 0 - 1*8]=
704 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
705 h->ref_cache[list][scan8[0] + 2 - 1*8]=
706 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
708 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
709 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
710 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
711 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
712 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
715 //FIXME unify cleanup or sth
716 if(IS_INTER(left_type[0])){
717 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
718 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
719 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
720 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
721 h->ref_cache[list][scan8[0] - 1 + 0*8]=
722 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
724 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
725 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
726 h->ref_cache[list][scan8[0] - 1 + 0*8]=
727 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
730 if(IS_INTER(left_type[1])){
731 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
732 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
733 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
734 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
735 h->ref_cache[list][scan8[0] - 1 + 2*8]=
736 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
738 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
739 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
740 h->ref_cache[list][scan8[0] - 1 + 2*8]=
741 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
742 assert((!left_type[0]) == (!left_type[1]));
745 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
748 if(IS_INTER(topleft_type)){
749 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
750 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
751 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
752 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
754 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
755 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
758 if(IS_INTER(topright_type)){
759 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
760 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
761 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
762 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
764 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
765 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
769 h->ref_cache[list][scan8[5 ]+1] =
770 h->ref_cache[list][scan8[7 ]+1] =
771 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
772 h->ref_cache[list][scan8[4 ]] =
773 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
774 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
775 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
776 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
777 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
778 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
781 /* XXX beurk, Load mvd */
782 if(IS_INTER(topleft_type)){
783 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
784 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy];
786 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0;
789 if(IS_INTER(top_type)){
790 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
791 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
792 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
793 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
794 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
796 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
797 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
798 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
799 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
801 if(IS_INTER(left_type[0])){
802 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
803 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
804 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
806 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
807 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
809 if(IS_INTER(left_type[1])){
810 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
811 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
812 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
814 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
815 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
817 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
818 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
819 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
820 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
821 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
823 if(h->slice_type == B_TYPE){
824 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
826 if(IS_DIRECT(top_type)){
827 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
828 }else if(IS_8X8(top_type)){
829 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
830 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
831 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
833 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
837 if(IS_DIRECT(left_type[0])){
838 h->direct_cache[scan8[0] - 1 + 0*8]=
839 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
840 }else if(IS_8X8(left_type[0])){
841 int b8_xy = h->mb2b8_xy[left_xy[0]] + 1;
842 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[b8_xy];
843 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[b8_xy + h->b8_stride];
845 h->direct_cache[scan8[0] - 1 + 0*8]=
846 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
854 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
857 static inline void write_back_intra_pred_mode(H264Context *h){
858 MpegEncContext * const s = &h->s;
859 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
861 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
862 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
863 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
864 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
865 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
866 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
867 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
871 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
873 static inline int check_intra4x4_pred_mode(H264Context *h){
874 MpegEncContext * const s = &h->s;
875 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
876 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
879 if(!(h->top_samples_available&0x8000)){
881 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
883 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
886 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
891 if(!(h->left_samples_available&0x8000)){
893 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
895 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
898 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
904 } //FIXME cleanup like next
907 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
909 static inline int check_intra_pred_mode(H264Context *h, int mode){
910 MpegEncContext * const s = &h->s;
911 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
912 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
914 if(mode < 0 || mode > 6) {
915 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
919 if(!(h->top_samples_available&0x8000)){
922 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
927 if(!(h->left_samples_available&0x8000)){
930 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
939 * gets the predicted intra4x4 prediction mode.
941 static inline int pred_intra_mode(H264Context *h, int n){
942 const int index8= scan8[n];
943 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
944 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
945 const int min= FFMIN(left, top);
947 tprintf("mode:%d %d min:%d\n", left ,top, min);
949 if(min<0) return DC_PRED;
953 static inline void write_back_non_zero_count(H264Context *h){
954 MpegEncContext * const s = &h->s;
955 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
957 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
958 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
959 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
960 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
961 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
962 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
963 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
965 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
966 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
967 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
969 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
970 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
971 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
975 * gets the predicted number of non zero coefficients.
976 * @param n block index
978 static inline int pred_non_zero_count(H264Context *h, int n){
979 const int index8= scan8[n];
980 const int left= h->non_zero_count_cache[index8 - 1];
981 const int top = h->non_zero_count_cache[index8 - 8];
984 if(i<64) i= (i+1)>>1;
986 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
991 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
992 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
994 if(topright_ref != PART_NOT_AVAILABLE){
995 *C= h->mv_cache[list][ i - 8 + part_width ];
998 tprintf("topright MV not available\n");
1000 *C= h->mv_cache[list][ i - 8 - 1 ];
1001 return h->ref_cache[list][ i - 8 - 1 ];
1006 * gets the predicted MV.
1007 * @param n the block index
1008 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1009 * @param mx the x component of the predicted motion vector
1010 * @param my the y component of the predicted motion vector
1012 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1013 const int index8= scan8[n];
1014 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1015 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1016 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1017 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1019 int diagonal_ref, match_count;
1021 assert(part_width==1 || part_width==2 || part_width==4);
1031 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1032 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1033 tprintf("pred_motion match_count=%d\n", match_count);
1034 if(match_count > 1){ //most common
1035 *mx= mid_pred(A[0], B[0], C[0]);
1036 *my= mid_pred(A[1], B[1], C[1]);
1037 }else if(match_count==1){
1041 }else if(top_ref==ref){
1049 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1053 *mx= mid_pred(A[0], B[0], C[0]);
1054 *my= mid_pred(A[1], B[1], C[1]);
1058 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1062 * gets the directionally predicted 16x8 MV.
1063 * @param n the block index
1064 * @param mx the x component of the predicted motion vector
1065 * @param my the y component of the predicted motion vector
1067 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1069 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1070 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1072 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1080 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1081 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1083 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1085 if(left_ref == ref){
1093 pred_motion(h, n, 4, list, ref, mx, my);
1097 * gets the directionally predicted 8x16 MV.
1098 * @param n the block index
1099 * @param mx the x component of the predicted motion vector
1100 * @param my the y component of the predicted motion vector
1102 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1104 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1105 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1107 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1109 if(left_ref == ref){
1118 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1120 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1122 if(diagonal_ref == ref){
1130 pred_motion(h, n, 2, list, ref, mx, my);
1133 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1134 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1135 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1137 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1139 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1140 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1141 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1147 pred_motion(h, 0, 4, 0, 0, mx, my);
1152 static inline void direct_dist_scale_factor(H264Context * const h){
1153 const int poc = h->s.current_picture_ptr->poc;
1154 const int poc1 = h->ref_list[1][0].poc;
1156 for(i=0; i<h->ref_count[0]; i++){
1157 int poc0 = h->ref_list[0][i].poc;
1158 int td = clip(poc1 - poc0, -128, 127);
1159 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1160 h->dist_scale_factor[i] = 256;
1162 int tb = clip(poc - poc0, -128, 127);
1163 int tx = (16384 + (ABS(td) >> 1)) / td;
1164 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1168 static inline void direct_ref_list_init(H264Context * const h){
1169 MpegEncContext * const s = &h->s;
1170 Picture * const ref1 = &h->ref_list[1][0];
1171 Picture * const cur = s->current_picture_ptr;
1173 if(cur->pict_type == I_TYPE)
1174 cur->ref_count[0] = 0;
1175 if(cur->pict_type != B_TYPE)
1176 cur->ref_count[1] = 0;
1177 for(list=0; list<2; list++){
1178 cur->ref_count[list] = h->ref_count[list];
1179 for(j=0; j<h->ref_count[list]; j++)
1180 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1182 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1184 for(list=0; list<2; list++){
1185 for(i=0; i<ref1->ref_count[list]; i++){
1186 const int poc = ref1->ref_poc[list][i];
1187 h->map_col_to_list0[list][i] = PART_NOT_AVAILABLE;
1188 for(j=0; j<h->ref_count[list]; j++)
1189 if(h->ref_list[list][j].poc == poc){
1190 h->map_col_to_list0[list][i] = j;
1197 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1198 MpegEncContext * const s = &h->s;
1199 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1200 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1201 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1202 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1203 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1204 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1205 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1206 const int is_b8x8 = IS_8X8(*mb_type);
1210 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1211 /* FIXME save sub mb types from previous frames (or derive from MVs)
1212 * so we know exactly what block size to use */
1213 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1214 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1215 }else if(!is_b8x8 && (IS_16X16(mb_type_col) || IS_INTRA(mb_type_col))){
1216 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1217 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1219 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1220 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1223 *mb_type |= MB_TYPE_DIRECT2;
1225 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1227 if(h->direct_spatial_mv_pred){
1232 /* ref = min(neighbors) */
1233 for(list=0; list<2; list++){
1234 int refa = h->ref_cache[list][scan8[0] - 1];
1235 int refb = h->ref_cache[list][scan8[0] - 8];
1236 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1238 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1240 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1242 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1248 if(ref[0] < 0 && ref[1] < 0){
1249 ref[0] = ref[1] = 0;
1250 mv[0][0] = mv[0][1] =
1251 mv[1][0] = mv[1][1] = 0;
1253 for(list=0; list<2; list++){
1255 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1257 mv[list][0] = mv[list][1] = 0;
1262 *mb_type &= ~MB_TYPE_P0L1;
1263 sub_mb_type &= ~MB_TYPE_P0L1;
1264 }else if(ref[0] < 0){
1265 *mb_type &= ~MB_TYPE_P0L0;
1266 sub_mb_type &= ~MB_TYPE_P0L0;
1269 if(IS_16X16(*mb_type)){
1270 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref[0], 1);
1271 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, ref[1], 1);
1272 if(!IS_INTRA(mb_type_col) && l1ref0[0] == 0 &&
1273 ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1){
1275 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1277 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1279 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1281 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1283 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1284 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1287 for(i8=0; i8<4; i8++){
1288 const int x8 = i8&1;
1289 const int y8 = i8>>1;
1291 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1293 h->sub_mb_type[i8] = sub_mb_type;
1295 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1296 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1297 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref[0], 1);
1298 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, ref[1], 1);
1301 if(!IS_INTRA(mb_type_col) && l1ref0[x8 + y8*h->b8_stride] == 0){
1302 for(i4=0; i4<4; i4++){
1303 const int16_t *mv_col = l1mv0[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1304 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1306 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1308 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1314 }else{ /* direct temporal mv pred */
1315 if(IS_16X16(*mb_type)){
1316 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1317 if(IS_INTRA(mb_type_col)){
1318 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1319 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1320 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1322 const int ref0 = l1ref0[0] >= 0 ? h->map_col_to_list0[0][l1ref0[0]]
1323 : h->map_col_to_list0[1][l1ref1[0]];
1324 const int dist_scale_factor = h->dist_scale_factor[ref0];
1325 const int16_t *mv_col = l1mv0[0];
1327 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1328 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1329 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1330 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1331 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1334 for(i8=0; i8<4; i8++){
1335 const int x8 = i8&1;
1336 const int y8 = i8>>1;
1337 int ref0, dist_scale_factor;
1339 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1341 h->sub_mb_type[i8] = sub_mb_type;
1342 if(IS_INTRA(mb_type_col)){
1343 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1344 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1345 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1346 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1350 ref0 = l1ref0[x8 + y8*h->b8_stride];
1352 ref0 = h->map_col_to_list0[0][ref0];
1354 ref0 = h->map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1355 dist_scale_factor = h->dist_scale_factor[ref0];
1357 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1358 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1359 for(i4=0; i4<4; i4++){
1360 const int16_t *mv_col = l1mv0[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1361 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1362 mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1363 mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1364 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1365 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1372 static inline void write_back_motion(H264Context *h, int mb_type){
1373 MpegEncContext * const s = &h->s;
1374 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1375 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1378 for(list=0; list<2; list++){
1380 if(!USES_LIST(mb_type, list)){
1381 if(1){ //FIXME skip or never read if mb_type doesn't use it
1383 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
1384 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
1386 if( h->pps.cabac ) {
1387 /* FIXME needed ? */
1389 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]=
1390 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= 0;
1394 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]=
1395 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= LIST_NOT_USED;
1402 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1403 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1405 if( h->pps.cabac ) {
1407 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1408 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1412 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
1413 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
1417 if(h->slice_type == B_TYPE && h->pps.cabac){
1418 if(IS_8X8(mb_type)){
1419 h->direct_table[b8_xy+1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1420 h->direct_table[b8_xy+0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1421 h->direct_table[b8_xy+1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1427 * Decodes a network abstraction layer unit.
1428 * @param consumed is the number of bytes used as input
1429 * @param length is the length of the array
1430 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1431 * @returns decoded bytes, might be src+1 if no escapes
1433 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1437 // src[0]&0x80; //forbidden bit
1438 h->nal_ref_idc= src[0]>>5;
1439 h->nal_unit_type= src[0]&0x1F;
1443 for(i=0; i<length; i++)
1444 printf("%2X ", src[i]);
1446 for(i=0; i+1<length; i+=2){
1447 if(src[i]) continue;
1448 if(i>0 && src[i-1]==0) i--;
1449 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1451 /* startcode, so we must be past the end */
1458 if(i>=length-1){ //no escaped 0
1459 *dst_length= length;
1460 *consumed= length+1; //+1 for the header
1464 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1465 dst= h->rbsp_buffer;
1467 //printf("decoding esc\n");
1470 //remove escapes (very rare 1:2^22)
1471 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1472 if(src[si+2]==3){ //escape
1477 }else //next start code
1481 dst[di++]= src[si++];
1485 *consumed= si + 1;//+1 for the header
1486 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1492 * @param src the data which should be escaped
1493 * @param dst the target buffer, dst+1 == src is allowed as a special case
1494 * @param length the length of the src data
1495 * @param dst_length the length of the dst array
1496 * @returns length of escaped data in bytes or -1 if an error occured
1498 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1499 int i, escape_count, si, di;
1503 assert(dst_length>0);
1505 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1507 if(length==0) return 1;
1510 for(i=0; i<length; i+=2){
1511 if(src[i]) continue;
1512 if(i>0 && src[i-1]==0)
1514 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1520 if(escape_count==0){
1522 memcpy(dst+1, src, length);
1526 if(length + escape_count + 1> dst_length)
1529 //this should be damn rare (hopefully)
1531 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1532 temp= h->rbsp_buffer;
1533 //printf("encoding esc\n");
1538 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1539 temp[di++]= 0; si++;
1540 temp[di++]= 0; si++;
1542 temp[di++]= src[si++];
1545 temp[di++]= src[si++];
1547 memcpy(dst+1, temp, length+escape_count);
1549 assert(di == length+escape_count);
1555 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1557 static void encode_rbsp_trailing(PutBitContext *pb){
1560 length= (-put_bits_count(pb))&7;
1561 if(length) put_bits(pb, length, 0);
1566 * identifies the exact end of the bitstream
1567 * @return the length of the trailing, or 0 if damaged
1569 static int decode_rbsp_trailing(uint8_t *src){
1573 tprintf("rbsp trailing %X\n", v);
1583 * idct tranforms the 16 dc values and dequantize them.
1584 * @param qp quantization parameter
1586 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
1587 const int qmul= dequant_coeff[qp][0];
1590 int temp[16]; //FIXME check if this is a good idea
1591 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1592 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1594 //memset(block, 64, 2*256);
1597 const int offset= y_offset[i];
1598 const int z0= block[offset+stride*0] + block[offset+stride*4];
1599 const int z1= block[offset+stride*0] - block[offset+stride*4];
1600 const int z2= block[offset+stride*1] - block[offset+stride*5];
1601 const int z3= block[offset+stride*1] + block[offset+stride*5];
1610 const int offset= x_offset[i];
1611 const int z0= temp[4*0+i] + temp[4*2+i];
1612 const int z1= temp[4*0+i] - temp[4*2+i];
1613 const int z2= temp[4*1+i] - temp[4*3+i];
1614 const int z3= temp[4*1+i] + temp[4*3+i];
1616 block[stride*0 +offset]= ((z0 + z3)*qmul + 2)>>2; //FIXME think about merging this into decode_resdual
1617 block[stride*2 +offset]= ((z1 + z2)*qmul + 2)>>2;
1618 block[stride*8 +offset]= ((z1 - z2)*qmul + 2)>>2;
1619 block[stride*10+offset]= ((z0 - z3)*qmul + 2)>>2;
1625 * dct tranforms the 16 dc values.
1626 * @param qp quantization parameter ??? FIXME
1628 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1629 // const int qmul= dequant_coeff[qp][0];
1631 int temp[16]; //FIXME check if this is a good idea
1632 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1633 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1636 const int offset= y_offset[i];
1637 const int z0= block[offset+stride*0] + block[offset+stride*4];
1638 const int z1= block[offset+stride*0] - block[offset+stride*4];
1639 const int z2= block[offset+stride*1] - block[offset+stride*5];
1640 const int z3= block[offset+stride*1] + block[offset+stride*5];
1649 const int offset= x_offset[i];
1650 const int z0= temp[4*0+i] + temp[4*2+i];
1651 const int z1= temp[4*0+i] - temp[4*2+i];
1652 const int z2= temp[4*1+i] - temp[4*3+i];
1653 const int z3= temp[4*1+i] + temp[4*3+i];
1655 block[stride*0 +offset]= (z0 + z3)>>1;
1656 block[stride*2 +offset]= (z1 + z2)>>1;
1657 block[stride*8 +offset]= (z1 - z2)>>1;
1658 block[stride*10+offset]= (z0 - z3)>>1;
1666 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp){
1667 const int qmul= dequant_coeff[qp][0];
1668 const int stride= 16*2;
1669 const int xStride= 16;
1672 a= block[stride*0 + xStride*0];
1673 b= block[stride*0 + xStride*1];
1674 c= block[stride*1 + xStride*0];
1675 d= block[stride*1 + xStride*1];
1682 block[stride*0 + xStride*0]= ((a+c)*qmul + 0)>>1;
1683 block[stride*0 + xStride*1]= ((e+b)*qmul + 0)>>1;
1684 block[stride*1 + xStride*0]= ((a-c)*qmul + 0)>>1;
1685 block[stride*1 + xStride*1]= ((e-b)*qmul + 0)>>1;
1689 static void chroma_dc_dct_c(DCTELEM *block){
1690 const int stride= 16*2;
1691 const int xStride= 16;
1694 a= block[stride*0 + xStride*0];
1695 b= block[stride*0 + xStride*1];
1696 c= block[stride*1 + xStride*0];
1697 d= block[stride*1 + xStride*1];
1704 block[stride*0 + xStride*0]= (a+c);
1705 block[stride*0 + xStride*1]= (e+b);
1706 block[stride*1 + xStride*0]= (a-c);
1707 block[stride*1 + xStride*1]= (e-b);
1712 * gets the chroma qp.
1714 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
1716 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
1721 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1723 //FIXME try int temp instead of block
1726 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1727 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1728 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1729 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1730 const int z0= d0 + d3;
1731 const int z3= d0 - d3;
1732 const int z1= d1 + d2;
1733 const int z2= d1 - d2;
1735 block[0 + 4*i]= z0 + z1;
1736 block[1 + 4*i]= 2*z3 + z2;
1737 block[2 + 4*i]= z0 - z1;
1738 block[3 + 4*i]= z3 - 2*z2;
1742 const int z0= block[0*4 + i] + block[3*4 + i];
1743 const int z3= block[0*4 + i] - block[3*4 + i];
1744 const int z1= block[1*4 + i] + block[2*4 + i];
1745 const int z2= block[1*4 + i] - block[2*4 + i];
1747 block[0*4 + i]= z0 + z1;
1748 block[1*4 + i]= 2*z3 + z2;
1749 block[2*4 + i]= z0 - z1;
1750 block[3*4 + i]= z3 - 2*z2;
1755 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1756 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1757 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1759 const int * const quant_table= quant_coeff[qscale];
1760 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1761 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1762 const unsigned int threshold2= (threshold1<<1);
1768 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1769 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1770 const unsigned int dc_threshold2= (dc_threshold1<<1);
1772 int level= block[0]*quant_coeff[qscale+18][0];
1773 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1775 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1778 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1781 // last_non_zero = i;
1786 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1787 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1788 const unsigned int dc_threshold2= (dc_threshold1<<1);
1790 int level= block[0]*quant_table[0];
1791 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1793 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1796 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1799 // last_non_zero = i;
1812 const int j= scantable[i];
1813 int level= block[j]*quant_table[j];
1815 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1816 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1817 if(((unsigned)(level+threshold1))>threshold2){
1819 level= (bias + level)>>QUANT_SHIFT;
1822 level= (bias - level)>>QUANT_SHIFT;
1831 return last_non_zero;
1834 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1835 const uint32_t a= ((uint32_t*)(src-stride))[0];
1836 ((uint32_t*)(src+0*stride))[0]= a;
1837 ((uint32_t*)(src+1*stride))[0]= a;
1838 ((uint32_t*)(src+2*stride))[0]= a;
1839 ((uint32_t*)(src+3*stride))[0]= a;
1842 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1843 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1844 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1845 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1846 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1849 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1850 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1851 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1853 ((uint32_t*)(src+0*stride))[0]=
1854 ((uint32_t*)(src+1*stride))[0]=
1855 ((uint32_t*)(src+2*stride))[0]=
1856 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1859 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1860 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1862 ((uint32_t*)(src+0*stride))[0]=
1863 ((uint32_t*)(src+1*stride))[0]=
1864 ((uint32_t*)(src+2*stride))[0]=
1865 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1868 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1869 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1871 ((uint32_t*)(src+0*stride))[0]=
1872 ((uint32_t*)(src+1*stride))[0]=
1873 ((uint32_t*)(src+2*stride))[0]=
1874 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
1877 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1878 ((uint32_t*)(src+0*stride))[0]=
1879 ((uint32_t*)(src+1*stride))[0]=
1880 ((uint32_t*)(src+2*stride))[0]=
1881 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1885 #define LOAD_TOP_RIGHT_EDGE\
1886 const int t4= topright[0];\
1887 const int t5= topright[1];\
1888 const int t6= topright[2];\
1889 const int t7= topright[3];\
1891 #define LOAD_LEFT_EDGE\
1892 const int l0= src[-1+0*stride];\
1893 const int l1= src[-1+1*stride];\
1894 const int l2= src[-1+2*stride];\
1895 const int l3= src[-1+3*stride];\
1897 #define LOAD_TOP_EDGE\
1898 const int t0= src[ 0-1*stride];\
1899 const int t1= src[ 1-1*stride];\
1900 const int t2= src[ 2-1*stride];\
1901 const int t3= src[ 3-1*stride];\
1903 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1904 const int lt= src[-1-1*stride];
1908 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
1910 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
1913 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
1917 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1920 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1922 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1923 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1926 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1931 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1933 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1936 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1940 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1943 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1945 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1946 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1949 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1950 const int lt= src[-1-1*stride];
1953 const __attribute__((unused)) int unu= l3;
1956 src[1+2*stride]=(lt + t0 + 1)>>1;
1958 src[2+2*stride]=(t0 + t1 + 1)>>1;
1960 src[3+2*stride]=(t1 + t2 + 1)>>1;
1961 src[3+0*stride]=(t2 + t3 + 1)>>1;
1963 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1965 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
1967 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1968 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1969 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1970 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1973 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
1976 const __attribute__((unused)) int unu= t7;
1978 src[0+0*stride]=(t0 + t1 + 1)>>1;
1980 src[0+2*stride]=(t1 + t2 + 1)>>1;
1982 src[1+2*stride]=(t2 + t3 + 1)>>1;
1984 src[2+2*stride]=(t3 + t4+ 1)>>1;
1985 src[3+2*stride]=(t4 + t5+ 1)>>1;
1986 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1988 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1990 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
1992 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
1993 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
1996 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
1999 src[0+0*stride]=(l0 + l1 + 1)>>1;
2000 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2002 src[0+1*stride]=(l1 + l2 + 1)>>1;
2004 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2006 src[0+2*stride]=(l2 + l3 + 1)>>1;
2008 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2017 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2018 const int lt= src[-1-1*stride];
2021 const __attribute__((unused)) int unu= t3;
2024 src[2+1*stride]=(lt + l0 + 1)>>1;
2026 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2027 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2028 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2030 src[2+2*stride]=(l0 + l1 + 1)>>1;
2032 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2034 src[2+3*stride]=(l1 + l2+ 1)>>1;
2036 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2037 src[0+3*stride]=(l2 + l3 + 1)>>1;
2038 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2041 static void pred16x16_vertical_c(uint8_t *src, int stride){
2043 const uint32_t a= ((uint32_t*)(src-stride))[0];
2044 const uint32_t b= ((uint32_t*)(src-stride))[1];
2045 const uint32_t c= ((uint32_t*)(src-stride))[2];
2046 const uint32_t d= ((uint32_t*)(src-stride))[3];
2048 for(i=0; i<16; i++){
2049 ((uint32_t*)(src+i*stride))[0]= a;
2050 ((uint32_t*)(src+i*stride))[1]= b;
2051 ((uint32_t*)(src+i*stride))[2]= c;
2052 ((uint32_t*)(src+i*stride))[3]= d;
2056 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2059 for(i=0; i<16; i++){
2060 ((uint32_t*)(src+i*stride))[0]=
2061 ((uint32_t*)(src+i*stride))[1]=
2062 ((uint32_t*)(src+i*stride))[2]=
2063 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2067 static void pred16x16_dc_c(uint8_t *src, int stride){
2071 dc+= src[-1+i*stride];
2078 dc= 0x01010101*((dc + 16)>>5);
2080 for(i=0; i<16; i++){
2081 ((uint32_t*)(src+i*stride))[0]=
2082 ((uint32_t*)(src+i*stride))[1]=
2083 ((uint32_t*)(src+i*stride))[2]=
2084 ((uint32_t*)(src+i*stride))[3]= dc;
2088 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2092 dc+= src[-1+i*stride];
2095 dc= 0x01010101*((dc + 8)>>4);
2097 for(i=0; i<16; i++){
2098 ((uint32_t*)(src+i*stride))[0]=
2099 ((uint32_t*)(src+i*stride))[1]=
2100 ((uint32_t*)(src+i*stride))[2]=
2101 ((uint32_t*)(src+i*stride))[3]= dc;
2105 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2111 dc= 0x01010101*((dc + 8)>>4);
2113 for(i=0; i<16; i++){
2114 ((uint32_t*)(src+i*stride))[0]=
2115 ((uint32_t*)(src+i*stride))[1]=
2116 ((uint32_t*)(src+i*stride))[2]=
2117 ((uint32_t*)(src+i*stride))[3]= dc;
2121 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2124 for(i=0; i<16; i++){
2125 ((uint32_t*)(src+i*stride))[0]=
2126 ((uint32_t*)(src+i*stride))[1]=
2127 ((uint32_t*)(src+i*stride))[2]=
2128 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2132 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2135 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2136 const uint8_t * const src0 = src+7-stride;
2137 const uint8_t *src1 = src+8*stride-1;
2138 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2139 int H = src0[1] - src0[-1];
2140 int V = src1[0] - src2[ 0];
2141 for(k=2; k<=8; ++k) {
2142 src1 += stride; src2 -= stride;
2143 H += k*(src0[k] - src0[-k]);
2144 V += k*(src1[0] - src2[ 0]);
2147 H = ( 5*(H/4) ) / 16;
2148 V = ( 5*(V/4) ) / 16;
2150 /* required for 100% accuracy */
2151 i = H; H = V; V = i;
2153 H = ( 5*H+32 ) >> 6;
2154 V = ( 5*V+32 ) >> 6;
2157 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2158 for(j=16; j>0; --j) {
2161 for(i=-16; i<0; i+=4) {
2162 src[16+i] = cm[ (b ) >> 5 ];
2163 src[17+i] = cm[ (b+ H) >> 5 ];
2164 src[18+i] = cm[ (b+2*H) >> 5 ];
2165 src[19+i] = cm[ (b+3*H) >> 5 ];
2172 static void pred16x16_plane_c(uint8_t *src, int stride){
2173 pred16x16_plane_compat_c(src, stride, 0);
2176 static void pred8x8_vertical_c(uint8_t *src, int stride){
2178 const uint32_t a= ((uint32_t*)(src-stride))[0];
2179 const uint32_t b= ((uint32_t*)(src-stride))[1];
2182 ((uint32_t*)(src+i*stride))[0]= a;
2183 ((uint32_t*)(src+i*stride))[1]= b;
2187 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2191 ((uint32_t*)(src+i*stride))[0]=
2192 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2196 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2200 ((uint32_t*)(src+i*stride))[0]=
2201 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2205 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2211 dc0+= src[-1+i*stride];
2212 dc2+= src[-1+(i+4)*stride];
2214 dc0= 0x01010101*((dc0 + 2)>>2);
2215 dc2= 0x01010101*((dc2 + 2)>>2);
2218 ((uint32_t*)(src+i*stride))[0]=
2219 ((uint32_t*)(src+i*stride))[1]= dc0;
2222 ((uint32_t*)(src+i*stride))[0]=
2223 ((uint32_t*)(src+i*stride))[1]= dc2;
2227 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2233 dc0+= src[i-stride];
2234 dc1+= src[4+i-stride];
2236 dc0= 0x01010101*((dc0 + 2)>>2);
2237 dc1= 0x01010101*((dc1 + 2)>>2);
2240 ((uint32_t*)(src+i*stride))[0]= dc0;
2241 ((uint32_t*)(src+i*stride))[1]= dc1;
2244 ((uint32_t*)(src+i*stride))[0]= dc0;
2245 ((uint32_t*)(src+i*stride))[1]= dc1;
2250 static void pred8x8_dc_c(uint8_t *src, int stride){
2252 int dc0, dc1, dc2, dc3;
2256 dc0+= src[-1+i*stride] + src[i-stride];
2257 dc1+= src[4+i-stride];
2258 dc2+= src[-1+(i+4)*stride];
2260 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2261 dc0= 0x01010101*((dc0 + 4)>>3);
2262 dc1= 0x01010101*((dc1 + 2)>>2);
2263 dc2= 0x01010101*((dc2 + 2)>>2);
2266 ((uint32_t*)(src+i*stride))[0]= dc0;
2267 ((uint32_t*)(src+i*stride))[1]= dc1;
2270 ((uint32_t*)(src+i*stride))[0]= dc2;
2271 ((uint32_t*)(src+i*stride))[1]= dc3;
2275 static void pred8x8_plane_c(uint8_t *src, int stride){
2278 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2279 const uint8_t * const src0 = src+3-stride;
2280 const uint8_t *src1 = src+4*stride-1;
2281 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2282 int H = src0[1] - src0[-1];
2283 int V = src1[0] - src2[ 0];
2284 for(k=2; k<=4; ++k) {
2285 src1 += stride; src2 -= stride;
2286 H += k*(src0[k] - src0[-k]);
2287 V += k*(src1[0] - src2[ 0]);
2289 H = ( 17*H+16 ) >> 5;
2290 V = ( 17*V+16 ) >> 5;
2292 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2293 for(j=8; j>0; --j) {
2296 src[0] = cm[ (b ) >> 5 ];
2297 src[1] = cm[ (b+ H) >> 5 ];
2298 src[2] = cm[ (b+2*H) >> 5 ];
2299 src[3] = cm[ (b+3*H) >> 5 ];
2300 src[4] = cm[ (b+4*H) >> 5 ];
2301 src[5] = cm[ (b+5*H) >> 5 ];
2302 src[6] = cm[ (b+6*H) >> 5 ];
2303 src[7] = cm[ (b+7*H) >> 5 ];
2308 #define SRC(x,y) src[(x)+(y)*stride]
2310 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2311 #define PREDICT_8x8_LOAD_LEFT \
2312 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2313 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2314 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2315 const int l7 = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2318 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2319 #define PREDICT_8x8_LOAD_TOP \
2320 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2321 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2322 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2323 const int t7 = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2324 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2327 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2328 #define PREDICT_8x8_LOAD_TOPRIGHT \
2329 int t8, t9, t10, t11, t12, t13, t14, t15; \
2330 if(has_topright) { \
2331 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2332 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2333 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2335 #define PREDICT_8x8_LOAD_TOPLEFT \
2336 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2338 #define PREDICT_8x8_DC(v) \
2340 for( y = 0; y < 8; y++ ) { \
2341 ((uint32_t*)src)[0] = \
2342 ((uint32_t*)src)[1] = v; \
2346 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2348 PREDICT_8x8_DC(0x80808080);
2350 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2352 PREDICT_8x8_LOAD_LEFT;
2353 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2356 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2358 PREDICT_8x8_LOAD_TOP;
2359 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2362 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2364 PREDICT_8x8_LOAD_LEFT;
2365 PREDICT_8x8_LOAD_TOP;
2366 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2367 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2370 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2372 PREDICT_8x8_LOAD_LEFT;
2373 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2374 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2375 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2378 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2381 PREDICT_8x8_LOAD_TOP;
2390 for( y = 1; y < 8; y++ )
2391 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2393 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2395 PREDICT_8x8_LOAD_TOP;
2396 PREDICT_8x8_LOAD_TOPRIGHT;
2397 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2398 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2399 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2400 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2401 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2402 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2403 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2404 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2405 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2406 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2407 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2408 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2409 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2410 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2411 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2413 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2415 PREDICT_8x8_LOAD_TOP;
2416 PREDICT_8x8_LOAD_LEFT;
2417 PREDICT_8x8_LOAD_TOPLEFT;
2418 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2419 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2420 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2421 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2422 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2423 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2424 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2425 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2426 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2427 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2428 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2429 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2430 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2431 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2432 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2435 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2437 PREDICT_8x8_LOAD_TOP;
2438 PREDICT_8x8_LOAD_LEFT;
2439 PREDICT_8x8_LOAD_TOPLEFT;
2440 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2441 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2442 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2443 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2444 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2445 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2446 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2447 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2448 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2449 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2450 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2451 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2452 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2453 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2454 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2455 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2456 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2457 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2458 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2459 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2460 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2461 SRC(7,0)= (t6 + t7 + 1) >> 1;
2463 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2465 PREDICT_8x8_LOAD_TOP;
2466 PREDICT_8x8_LOAD_LEFT;
2467 PREDICT_8x8_LOAD_TOPLEFT;
2468 SRC(0,7)= (l6 + l7 + 1) >> 1;
2469 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2470 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2471 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2472 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2473 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2474 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2475 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2476 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2477 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2478 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2479 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2480 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2481 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2482 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2483 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2484 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2485 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2486 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2487 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2488 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2489 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2491 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2493 PREDICT_8x8_LOAD_TOP;
2494 PREDICT_8x8_LOAD_TOPRIGHT;
2495 SRC(0,0)= (t0 + t1 + 1) >> 1;
2496 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2497 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2498 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2499 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2500 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2501 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2502 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2503 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2504 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2505 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2506 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2507 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2508 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2509 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2510 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2511 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2512 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2513 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2514 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2515 SRC(7,6)= (t10 + t11 + 1) >> 1;
2516 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2518 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2520 PREDICT_8x8_LOAD_LEFT;
2521 SRC(0,0)= (l0 + l1 + 1) >> 1;
2522 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2523 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2524 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2525 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2526 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2527 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2528 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2529 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2530 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2531 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2532 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2533 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2534 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2535 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2536 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2537 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2538 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2540 #undef PREDICT_8x8_LOAD_LEFT
2541 #undef PREDICT_8x8_LOAD_TOP
2542 #undef PREDICT_8x8_LOAD_TOPLEFT
2543 #undef PREDICT_8x8_LOAD_TOPRIGHT
2544 #undef PREDICT_8x8_DC
2550 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2551 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2552 int src_x_offset, int src_y_offset,
2553 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2554 MpegEncContext * const s = &h->s;
2555 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2556 const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2557 const int luma_xy= (mx&3) + ((my&3)<<2);
2558 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
2559 uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
2560 uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
2561 int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
2562 int extra_height= extra_width;
2564 const int full_mx= mx>>2;
2565 const int full_my= my>>2;
2567 assert(pic->data[0]);
2569 if(mx&7) extra_width -= 3;
2570 if(my&7) extra_height -= 3;
2572 if( full_mx < 0-extra_width
2573 || full_my < 0-extra_height
2574 || full_mx + 16/*FIXME*/ > s->width + extra_width
2575 || full_my + 16/*FIXME*/ > s->height + extra_height){
2576 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, s->width, s->height);
2577 src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
2581 qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
2583 qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
2586 if(s->flags&CODEC_FLAG_GRAY) return;
2589 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
2590 src_cb= s->edge_emu_buffer;
2592 chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
2595 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
2596 src_cr= s->edge_emu_buffer;
2598 chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
2601 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2602 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2603 int x_offset, int y_offset,
2604 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2605 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2606 int list0, int list1){
2607 MpegEncContext * const s = &h->s;
2608 qpel_mc_func *qpix_op= qpix_put;
2609 h264_chroma_mc_func chroma_op= chroma_put;
2611 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2612 dest_cb += x_offset + y_offset*s->uvlinesize;
2613 dest_cr += x_offset + y_offset*s->uvlinesize;
2614 x_offset += 8*s->mb_x;
2615 y_offset += 8*s->mb_y;
2618 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2619 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2620 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2621 qpix_op, chroma_op);
2624 chroma_op= chroma_avg;
2628 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2629 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2630 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2631 qpix_op, chroma_op);
2635 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2636 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2637 int x_offset, int y_offset,
2638 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2639 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2640 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2641 int list0, int list1){
2642 MpegEncContext * const s = &h->s;
2644 dest_y += 2*x_offset + 2*y_offset*s-> linesize;
2645 dest_cb += x_offset + y_offset*s->uvlinesize;
2646 dest_cr += x_offset + y_offset*s->uvlinesize;
2647 x_offset += 8*s->mb_x;
2648 y_offset += 8*s->mb_y;
2651 /* don't optimize for luma-only case, since B-frames usually
2652 * use implicit weights => chroma too. */
2653 uint8_t *tmp_cb = s->obmc_scratchpad;
2654 uint8_t *tmp_cr = tmp_cb + 8*s->uvlinesize;
2655 uint8_t *tmp_y = tmp_cr + 8*s->uvlinesize;
2656 int refn0 = h->ref_cache[0][ scan8[n] ];
2657 int refn1 = h->ref_cache[1][ scan8[n] ];
2659 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2660 dest_y, dest_cb, dest_cr,
2661 x_offset, y_offset, qpix_put, chroma_put);
2662 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2663 tmp_y, tmp_cb, tmp_cr,
2664 x_offset, y_offset, qpix_put, chroma_put);
2666 if(h->use_weight == 2){
2667 int weight0 = h->implicit_weight[refn0][refn1];
2668 int weight1 = 64 - weight0;
2669 luma_weight_avg( dest_y, tmp_y, s-> linesize, 5, weight0, weight1, 0, 0);
2670 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0, 0);
2671 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0, 0);
2673 luma_weight_avg(dest_y, tmp_y, s->linesize, h->luma_log2_weight_denom,
2674 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2675 h->luma_offset[0][refn0], h->luma_offset[1][refn1]);
2676 chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2677 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2678 h->chroma_offset[0][refn0][0], h->chroma_offset[1][refn1][0]);
2679 chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2680 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2681 h->chroma_offset[0][refn0][1], h->chroma_offset[1][refn1][1]);
2684 int list = list1 ? 1 : 0;
2685 int refn = h->ref_cache[list][ scan8[n] ];
2686 Picture *ref= &h->ref_list[list][refn];
2687 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
2688 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2689 qpix_put, chroma_put);
2691 luma_weight_op(dest_y, s->linesize, h->luma_log2_weight_denom,
2692 h->luma_weight[list][refn], h->luma_offset[list][refn]);
2693 if(h->use_weight_chroma){
2694 chroma_weight_op(dest_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2695 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2696 chroma_weight_op(dest_cr, s->uvlinesize, h->chroma_log2_weight_denom,
2697 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2702 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
2703 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2704 int x_offset, int y_offset,
2705 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2706 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2707 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
2708 int list0, int list1){
2709 if((h->use_weight==2 && list0 && list1
2710 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
2711 || h->use_weight==1)
2712 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2713 x_offset, y_offset, qpix_put, chroma_put,
2714 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
2716 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
2717 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2720 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2721 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2722 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
2723 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
2724 MpegEncContext * const s = &h->s;
2725 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2726 const int mb_type= s->current_picture.mb_type[mb_xy];
2728 assert(IS_INTER(mb_type));
2730 if(IS_16X16(mb_type)){
2731 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2732 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
2733 &weight_op[0], &weight_avg[0],
2734 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2735 }else if(IS_16X8(mb_type)){
2736 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
2737 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2738 &weight_op[1], &weight_avg[1],
2739 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2740 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
2741 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
2742 &weight_op[1], &weight_avg[1],
2743 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2744 }else if(IS_8X16(mb_type)){
2745 mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
2746 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2747 &weight_op[2], &weight_avg[2],
2748 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2749 mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
2750 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2751 &weight_op[2], &weight_avg[2],
2752 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2756 assert(IS_8X8(mb_type));
2759 const int sub_mb_type= h->sub_mb_type[i];
2761 int x_offset= (i&1)<<2;
2762 int y_offset= (i&2)<<1;
2764 if(IS_SUB_8X8(sub_mb_type)){
2765 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2766 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2767 &weight_op[3], &weight_avg[3],
2768 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2769 }else if(IS_SUB_8X4(sub_mb_type)){
2770 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2771 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2772 &weight_op[4], &weight_avg[4],
2773 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2774 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2775 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2776 &weight_op[4], &weight_avg[4],
2777 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2778 }else if(IS_SUB_4X8(sub_mb_type)){
2779 mc_part(h, n , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2780 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2781 &weight_op[5], &weight_avg[5],
2782 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2783 mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2784 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2785 &weight_op[5], &weight_avg[5],
2786 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2789 assert(IS_SUB_4X4(sub_mb_type));
2791 int sub_x_offset= x_offset + 2*(j&1);
2792 int sub_y_offset= y_offset + (j&2);
2793 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2794 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2795 &weight_op[6], &weight_avg[6],
2796 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2803 static void decode_init_vlc(H264Context *h){
2804 static int done = 0;
2810 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
2811 &chroma_dc_coeff_token_len [0], 1, 1,
2812 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
2815 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
2816 &coeff_token_len [i][0], 1, 1,
2817 &coeff_token_bits[i][0], 1, 1, 1);
2821 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2822 &chroma_dc_total_zeros_len [i][0], 1, 1,
2823 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
2825 for(i=0; i<15; i++){
2826 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
2827 &total_zeros_len [i][0], 1, 1,
2828 &total_zeros_bits[i][0], 1, 1, 1);
2832 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2833 &run_len [i][0], 1, 1,
2834 &run_bits[i][0], 1, 1, 1);
2836 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2837 &run_len [6][0], 1, 1,
2838 &run_bits[6][0], 1, 1, 1);
2843 * Sets the intra prediction function pointers.
2845 static void init_pred_ptrs(H264Context *h){
2846 // MpegEncContext * const s = &h->s;
2848 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
2849 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
2850 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
2851 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2852 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2853 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
2854 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
2855 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
2856 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
2857 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
2858 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
2859 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
2861 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
2862 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
2863 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
2864 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
2865 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
2866 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
2867 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
2868 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
2869 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
2870 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
2871 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
2872 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
2874 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
2875 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
2876 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
2877 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
2878 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
2879 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
2880 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
2882 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
2883 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
2884 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
2885 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
2886 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
2887 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
2888 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
2891 static void free_tables(H264Context *h){
2892 av_freep(&h->intra4x4_pred_mode);
2893 av_freep(&h->chroma_pred_mode_table);
2894 av_freep(&h->cbp_table);
2895 av_freep(&h->mvd_table[0]);
2896 av_freep(&h->mvd_table[1]);
2897 av_freep(&h->direct_table);
2898 av_freep(&h->non_zero_count);
2899 av_freep(&h->slice_table_base);
2900 av_freep(&h->top_borders[1]);
2901 av_freep(&h->top_borders[0]);
2902 h->slice_table= NULL;
2904 av_freep(&h->mb2b_xy);
2905 av_freep(&h->mb2b8_xy);
2907 av_freep(&h->dequant8_coeff);
2909 av_freep(&h->s.obmc_scratchpad);
2914 * needs width/height
2916 static int alloc_tables(H264Context *h){
2917 MpegEncContext * const s = &h->s;
2918 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2921 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2923 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2924 CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t))
2925 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2926 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2927 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2929 if( h->pps.cabac ) {
2930 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2931 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2932 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2933 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2936 memset(h->slice_table_base, -1, big_mb_num * sizeof(uint8_t));
2937 h->slice_table= h->slice_table_base + s->mb_stride + 1;
2939 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2940 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2941 for(y=0; y<s->mb_height; y++){
2942 for(x=0; x<s->mb_width; x++){
2943 const int mb_xy= x + y*s->mb_stride;
2944 const int b_xy = 4*x + 4*y*h->b_stride;
2945 const int b8_xy= 2*x + 2*y*h->b8_stride;
2947 h->mb2b_xy [mb_xy]= b_xy;
2948 h->mb2b8_xy[mb_xy]= b8_xy;
2952 CHECKED_ALLOCZ(h->dequant8_coeff, 52*64 * sizeof(uint16_t));
2953 for(q=0; q<52; q++){
2954 int shift = div6[q];
2956 if(shift >= 2) // qp<12 are shifted during dequant
2959 h->dequant8_coeff[q][x] = dequant8_coeff_init[idx][
2960 dequant8_coeff_init_scan[(x>>1)&12 | x&3] ] << shift;
2963 s->obmc_scratchpad = NULL;
2971 static void common_init(H264Context *h){
2972 MpegEncContext * const s = &h->s;
2974 s->width = s->avctx->width;
2975 s->height = s->avctx->height;
2976 s->codec_id= s->avctx->codec->id;
2980 s->unrestricted_mv=1;
2981 s->decode=1; //FIXME
2984 static int decode_init(AVCodecContext *avctx){
2985 H264Context *h= avctx->priv_data;
2986 MpegEncContext * const s = &h->s;
2988 MPV_decode_defaults(s);
2993 s->out_format = FMT_H264;
2994 s->workaround_bugs= avctx->workaround_bugs;
2997 // s->decode_mb= ff_h263_decode_mb;
2999 avctx->pix_fmt= PIX_FMT_YUV420P;
3001 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3002 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3003 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3006 for(i=0; i<16; i++){
3007 #define T(x) (x>>2) | ((x<<2) & 0xF)
3008 h->zigzag_scan[i] = T(zigzag_scan[i]);
3009 h-> field_scan[i] = T( field_scan[i]);
3015 if(avctx->extradata_size > 0 && avctx->extradata &&
3016 *(char *)avctx->extradata == 1){
3026 static void frame_start(H264Context *h){
3027 MpegEncContext * const s = &h->s;
3030 MPV_frame_start(s, s->avctx);
3031 ff_er_frame_start(s);
3033 assert(s->linesize && s->uvlinesize);
3035 for(i=0; i<16; i++){
3036 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3037 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3040 h->block_offset[16+i]=
3041 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3042 h->block_offset[24+16+i]=
3043 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3046 /* can't be in alloc_tables because linesize isn't known there.
3047 * FIXME: redo bipred weight to not require extra buffer? */
3048 if(!s->obmc_scratchpad)
3049 s->obmc_scratchpad = av_malloc(16*s->linesize + 2*8*s->uvlinesize);
3051 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3054 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3055 MpegEncContext * const s = &h->s;
3059 src_cb -= uvlinesize;
3060 src_cr -= uvlinesize;
3062 // There are two lines saved, the line above the the top macroblock of a pair,
3063 // and the line above the bottom macroblock
3064 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3065 for(i=1; i<17; i++){
3066 h->left_border[i]= src_y[15+i* linesize];
3069 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3070 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3072 if(!(s->flags&CODEC_FLAG_GRAY)){
3073 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3074 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3076 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3077 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3079 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3080 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3084 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3085 MpegEncContext * const s = &h->s;
3088 int deblock_left = (s->mb_x > 0);
3089 int deblock_top = (s->mb_y > 0);
3091 src_y -= linesize + 1;
3092 src_cb -= uvlinesize + 1;
3093 src_cr -= uvlinesize + 1;
3095 #define XCHG(a,b,t,xchg)\
3102 for(i = !deblock_top; i<17; i++){
3103 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3108 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3109 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3110 if(s->mb_x < s->mb_width){
3111 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3115 if(!(s->flags&CODEC_FLAG_GRAY)){
3117 for(i = !deblock_top; i<9; i++){
3118 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3119 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3123 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3124 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3129 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3130 MpegEncContext * const s = &h->s;
3133 src_y -= 2 * linesize;
3134 src_cb -= 2 * uvlinesize;
3135 src_cr -= 2 * uvlinesize;
3137 // There are two lines saved, the line above the the top macroblock of a pair,
3138 // and the line above the bottom macroblock
3139 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3140 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3141 for(i=2; i<34; i++){
3142 h->left_border[i]= src_y[15+i* linesize];
3145 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3146 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3147 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3148 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3150 if(!(s->flags&CODEC_FLAG_GRAY)){
3151 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3152 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3153 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3154 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3155 for(i=2; i<18; i++){
3156 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3157 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3159 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3160 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3161 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3162 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3166 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3167 MpegEncContext * const s = &h->s;
3170 int deblock_left = (s->mb_x > 0);
3171 int deblock_top = (s->mb_y > 0);
3173 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3175 src_y -= 2 * linesize + 1;
3176 src_cb -= 2 * uvlinesize + 1;
3177 src_cr -= 2 * uvlinesize + 1;
3179 #define XCHG(a,b,t,xchg)\
3186 for(i = (!deblock_top)<<1; i<34; i++){
3187 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3192 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3193 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3194 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3195 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3198 if(!(s->flags&CODEC_FLAG_GRAY)){
3200 for(i = (!deblock_top) << 1; i<18; i++){
3201 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3202 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3206 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3207 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3208 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3209 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3214 static void hl_decode_mb(H264Context *h){
3215 MpegEncContext * const s = &h->s;
3216 const int mb_x= s->mb_x;
3217 const int mb_y= s->mb_y;
3218 const int mb_xy= mb_x + mb_y*s->mb_stride;
3219 const int mb_type= s->current_picture.mb_type[mb_xy];
3220 uint8_t *dest_y, *dest_cb, *dest_cr;
3221 int linesize, uvlinesize /*dct_offset*/;
3223 int *block_offset = &h->block_offset[0];
3224 const unsigned int bottom = mb_y & 1;
3229 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3230 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3231 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3233 if (h->mb_field_decoding_flag) {
3234 linesize = s->linesize * 2;
3235 uvlinesize = s->uvlinesize * 2;
3236 block_offset = &h->block_offset[24];
3237 if(mb_y&1){ //FIXME move out of this func?
3238 dest_y -= s->linesize*15;
3239 dest_cb-= s->uvlinesize*7;
3240 dest_cr-= s->uvlinesize*7;
3243 linesize = s->linesize;
3244 uvlinesize = s->uvlinesize;
3245 // dct_offset = s->linesize * 16;
3248 if (IS_INTRA_PCM(mb_type)) {
3251 // The pixels are stored in h->mb array in the same order as levels,
3252 // copy them in output in the correct order.
3253 for(i=0; i<16; i++) {
3254 for (y=0; y<4; y++) {
3255 for (x=0; x<4; x++) {
3256 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3260 for(i=16; i<16+4; i++) {
3261 for (y=0; y<4; y++) {
3262 for (x=0; x<4; x++) {
3263 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3267 for(i=20; i<20+4; i++) {
3268 for (y=0; y<4; y++) {
3269 for (x=0; x<4; x++) {
3270 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3275 if(IS_INTRA(mb_type)){
3276 if(h->deblocking_filter) {
3277 if (h->mb_aff_frame) {
3279 xchg_pair_border(h, dest_y, dest_cb, dest_cr, s->linesize, s->uvlinesize, 1);
3281 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3285 if(!(s->flags&CODEC_FLAG_GRAY)){
3286 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3287 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3290 if(IS_INTRA4x4(mb_type)){
3292 if(IS_8x8DCT(mb_type)){
3293 for(i=0; i<16; i+=4){
3294 uint8_t * const ptr= dest_y + block_offset[i];
3295 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3296 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3297 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3298 if(h->non_zero_count_cache[ scan8[i] ])
3299 s->dsp.h264_idct8_add(ptr, h->mb + i*16, linesize);
3302 for(i=0; i<16; i++){
3303 uint8_t * const ptr= dest_y + block_offset[i];
3305 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3308 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3309 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3310 assert(mb_y || linesize <= block_offset[i]);
3311 if(!topright_avail){
3312 tr= ptr[3 - linesize]*0x01010101;
3313 topright= (uint8_t*) &tr;
3315 topright= ptr + 4 - linesize;
3319 h->pred4x4[ dir ](ptr, topright, linesize);
3320 if(h->non_zero_count_cache[ scan8[i] ]){
3321 if(s->codec_id == CODEC_ID_H264)
3322 s->dsp.h264_idct_add(ptr, h->mb + i*16, linesize);
3324 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3329 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3330 if(s->codec_id == CODEC_ID_H264)
3331 h264_luma_dc_dequant_idct_c(h->mb, s->qscale);
3333 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3335 if(h->deblocking_filter) {
3336 if (h->mb_aff_frame) {
3338 uint8_t *pair_dest_y = s->current_picture.data[0] + ((mb_y-1) * 16* s->linesize ) + mb_x * 16;
3339 uint8_t *pair_dest_cb = s->current_picture.data[1] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3340 uint8_t *pair_dest_cr = s->current_picture.data[2] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3342 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3346 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3349 }else if(s->codec_id == CODEC_ID_H264){
3350 hl_motion(h, dest_y, dest_cb, dest_cr,
3351 s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
3352 s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
3353 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3357 if(!IS_INTRA4x4(mb_type)){
3358 if(s->codec_id == CODEC_ID_H264){
3359 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3360 void (*idct)(uint8_t *dst, DCTELEM *block, int stride) =
3361 IS_8x8DCT(mb_type) ? s->dsp.h264_idct8_add : s->dsp.h264_idct_add;
3362 for(i=0; i<16; i+=di){
3363 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3364 uint8_t * const ptr= dest_y + block_offset[i];
3365 idct(ptr, h->mb + i*16, linesize);
3369 for(i=0; i<16; i++){
3370 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3371 uint8_t * const ptr= dest_y + block_offset[i];
3372 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3378 if(!(s->flags&CODEC_FLAG_GRAY)){
3379 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
3380 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
3381 if(s->codec_id == CODEC_ID_H264){
3382 for(i=16; i<16+4; i++){
3383 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3384 uint8_t * const ptr= dest_cb + block_offset[i];
3385 s->dsp.h264_idct_add(ptr, h->mb + i*16, uvlinesize);
3388 for(i=20; i<20+4; i++){
3389 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3390 uint8_t * const ptr= dest_cr + block_offset[i];
3391 s->dsp.h264_idct_add(ptr, h->mb + i*16, uvlinesize);
3395 for(i=16; i<16+4; i++){
3396 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3397 uint8_t * const ptr= dest_cb + block_offset[i];
3398 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3401 for(i=20; i<20+4; i++){
3402 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3403 uint8_t * const ptr= dest_cr + block_offset[i];
3404 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3410 if(h->deblocking_filter) {
3411 if (h->mb_aff_frame) {
3412 const int mb_y = s->mb_y - 1;
3413 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3414 const int mb_xy= mb_x + mb_y*s->mb_stride;
3415 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3416 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3417 uint8_t tmp = s->current_picture.data[1][384];
3418 if (!bottom) return;
3419 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3420 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3421 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3423 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3424 // TODO deblock a pair
3427 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3428 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3429 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3430 if (tmp != s->current_picture.data[1][384]) {
3431 tprintf("modified pixel 8,1 (1)\n");
3435 tprintf("call mbaff filter_mb\n");
3436 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3437 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3438 if (tmp != s->current_picture.data[1][384]) {
3439 tprintf("modified pixel 8,1 (2)\n");
3442 tprintf("call filter_mb\n");
3443 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3444 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3445 filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3451 * fills the default_ref_list.
3453 static int fill_default_ref_list(H264Context *h){
3454 MpegEncContext * const s = &h->s;
3456 int smallest_poc_greater_than_current = -1;
3457 Picture sorted_short_ref[32];
3459 if(h->slice_type==B_TYPE){
3463 /* sort frame according to poc in B slice */
3464 for(out_i=0; out_i<h->short_ref_count; out_i++){
3466 int best_poc=INT_MAX;
3468 for(i=0; i<h->short_ref_count; i++){
3469 const int poc= h->short_ref[i]->poc;
3470 if(poc > limit && poc < best_poc){
3476 assert(best_i != INT_MIN);
3479 sorted_short_ref[out_i]= *h->short_ref[best_i];
3480 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3481 if (-1 == smallest_poc_greater_than_current) {
3482 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3483 smallest_poc_greater_than_current = out_i;
3489 if(s->picture_structure == PICT_FRAME){
3490 if(h->slice_type==B_TYPE){
3492 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3494 // find the largest poc
3495 for(list=0; list<2; list++){
3498 int step= list ? -1 : 1;
3500 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3501 while(j<0 || j>= h->short_ref_count){
3502 if(j != -99 && step == (list ? -1 : 1))
3505 j= smallest_poc_greater_than_current + (step>>1);
3507 if(sorted_short_ref[j].reference != 3) continue;
3508 h->default_ref_list[list][index ]= sorted_short_ref[j];
3509 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3512 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3513 if(h->long_ref[i] == NULL) continue;
3514 if(h->long_ref[i]->reference != 3) continue;
3516 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3517 h->default_ref_list[ list ][index++].pic_id= i;;
3520 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3521 // swap the two first elements of L1 when
3522 // L0 and L1 are identical
3523 Picture temp= h->default_ref_list[1][0];
3524 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3525 h->default_ref_list[1][1] = temp;
3528 if(index < h->ref_count[ list ])
3529 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3533 for(i=0; i<h->short_ref_count; i++){
3534 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3535 h->default_ref_list[0][index ]= *h->short_ref[i];
3536 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3538 for(i = 0; i < 16; i++){
3539 if(h->long_ref[i] == NULL) continue;
3540 if(h->long_ref[i]->reference != 3) continue;
3541 h->default_ref_list[0][index ]= *h->long_ref[i];
3542 h->default_ref_list[0][index++].pic_id= i;;
3544 if(index < h->ref_count[0])
3545 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3548 if(h->slice_type==B_TYPE){
3550 //FIXME second field balh
3554 for (i=0; i<h->ref_count[0]; i++) {
3555 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3557 if(h->slice_type==B_TYPE){
3558 for (i=0; i<h->ref_count[1]; i++) {
3559 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3566 static void print_short_term(H264Context *h);
3567 static void print_long_term(H264Context *h);
3569 static int decode_ref_pic_list_reordering(H264Context *h){
3570 MpegEncContext * const s = &h->s;
3573 print_short_term(h);
3575 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3577 for(list=0; list<2; list++){
3578 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3580 if(get_bits1(&s->gb)){
3581 int pred= h->curr_pic_num;
3583 for(index=0; ; index++){
3584 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3587 Picture *ref = NULL;
3589 if(reordering_of_pic_nums_idc==3)
3592 if(index >= h->ref_count[list]){
3593 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3597 if(reordering_of_pic_nums_idc<3){
3598 if(reordering_of_pic_nums_idc<2){
3599 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3601 if(abs_diff_pic_num >= h->max_pic_num){
3602 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3606 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3607 else pred+= abs_diff_pic_num;
3608 pred &= h->max_pic_num - 1;
3610 for(i= h->short_ref_count-1; i>=0; i--){
3611 ref = h->short_ref[i];
3612 assert(ref->reference == 3);
3613 assert(!ref->long_ref);
3614 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3618 ref->pic_id= ref->frame_num;
3620 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3621 ref = h->long_ref[pic_id];
3622 ref->pic_id= pic_id;
3623 assert(ref->reference == 3);
3624 assert(ref->long_ref);
3629 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3630 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3632 for(i=index; i+1<h->ref_count[list]; i++){
3633 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3636 for(; i > index; i--){
3637 h->ref_list[list][i]= h->ref_list[list][i-1];
3639 h->ref_list[list][index]= *ref;
3642 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3648 if(h->slice_type!=B_TYPE) break;
3650 for(list=0; list<2; list++){
3651 for(index= 0; index < h->ref_count[list]; index++){
3652 if(!h->ref_list[list][index].data[0])
3653 h->ref_list[list][index]= s->current_picture;
3655 if(h->slice_type!=B_TYPE) break;
3658 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3659 direct_dist_scale_factor(h);
3660 direct_ref_list_init(h);
3664 static int pred_weight_table(H264Context *h){
3665 MpegEncContext * const s = &h->s;
3667 int luma_def, chroma_def;
3670 h->use_weight_chroma= 0;
3671 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3672 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3673 luma_def = 1<<h->luma_log2_weight_denom;
3674 chroma_def = 1<<h->chroma_log2_weight_denom;
3676 for(list=0; list<2; list++){
3677 for(i=0; i<h->ref_count[list]; i++){
3678 int luma_weight_flag, chroma_weight_flag;
3680 luma_weight_flag= get_bits1(&s->gb);
3681 if(luma_weight_flag){
3682 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3683 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3684 if( h->luma_weight[list][i] != luma_def
3685 || h->luma_offset[list][i] != 0)
3688 h->luma_weight[list][i]= luma_def;
3689 h->luma_offset[list][i]= 0;
3692 chroma_weight_flag= get_bits1(&s->gb);
3693 if(chroma_weight_flag){
3696 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3697 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3698 if( h->chroma_weight[list][i][j] != chroma_def
3699 || h->chroma_offset[list][i][j] != 0)
3700 h->use_weight_chroma= 1;
3705 h->chroma_weight[list][i][j]= chroma_def;
3706 h->chroma_offset[list][i][j]= 0;
3710 if(h->slice_type != B_TYPE) break;
3712 h->use_weight= h->use_weight || h->use_weight_chroma;
3716 static void implicit_weight_table(H264Context *h){
3717 MpegEncContext * const s = &h->s;
3719 int cur_poc = s->current_picture_ptr->poc;
3721 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3722 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3724 h->use_weight_chroma= 0;
3729 h->use_weight_chroma= 2;
3730 h->luma_log2_weight_denom= 5;
3731 h->chroma_log2_weight_denom= 5;
3734 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3735 int poc0 = h->ref_list[0][ref0].poc;
3736 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3737 int poc1 = h->ref_list[1][ref1].poc;
3738 int td = clip(poc1 - poc0, -128, 127);
3740 int tb = clip(cur_poc - poc0, -128, 127);
3741 int tx = (16384 + (ABS(td) >> 1)) / td;
3742 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3743 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3744 h->implicit_weight[ref0][ref1] = 32;
3746 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3748 h->implicit_weight[ref0][ref1] = 32;
3753 static inline void unreference_pic(H264Context *h, Picture *pic){
3756 if(pic == h->delayed_output_pic)
3759 for(i = 0; h->delayed_pic[i]; i++)
3760 if(pic == h->delayed_pic[i]){
3768 * instantaneous decoder refresh.
3770 static void idr(H264Context *h){
3773 for(i=0; i<16; i++){
3774 if (h->long_ref[i] != NULL) {
3775 unreference_pic(h, h->long_ref[i]);
3776 h->long_ref[i]= NULL;
3779 h->long_ref_count=0;
3781 for(i=0; i<h->short_ref_count; i++){
3782 unreference_pic(h, h->short_ref[i]);
3783 h->short_ref[i]= NULL;
3785 h->short_ref_count=0;
3788 /* forget old pics after a seek */
3789 static void flush_dpb(AVCodecContext *avctx){
3790 H264Context *h= avctx->priv_data;
3793 h->delayed_pic[i]= NULL;
3794 h->delayed_output_pic= NULL;
3796 if(h->s.current_picture_ptr)
3797 h->s.current_picture_ptr->reference= 0;
3802 * @return the removed picture or NULL if an error occurs
3804 static Picture * remove_short(H264Context *h, int frame_num){
3805 MpegEncContext * const s = &h->s;
3808 if(s->avctx->debug&FF_DEBUG_MMCO)
3809 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3811 for(i=0; i<h->short_ref_count; i++){
3812 Picture *pic= h->short_ref[i];
3813 if(s->avctx->debug&FF_DEBUG_MMCO)
3814 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3815 if(pic->frame_num == frame_num){
3816 h->short_ref[i]= NULL;
3817 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
3818 h->short_ref_count--;
3827 * @return the removed picture or NULL if an error occurs
3829 static Picture * remove_long(H264Context *h, int i){
3832 pic= h->long_ref[i];
3833 h->long_ref[i]= NULL;
3834 if(pic) h->long_ref_count--;
3840 * print short term list
3842 static void print_short_term(H264Context *h) {
3844 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3845 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3846 for(i=0; i<h->short_ref_count; i++){
3847 Picture *pic= h->short_ref[i];
3848 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3854 * print long term list
3856 static void print_long_term(H264Context *h) {
3858 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3859 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3860 for(i = 0; i < 16; i++){
3861 Picture *pic= h->long_ref[i];
3863 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3870 * Executes the reference picture marking (memory management control operations).
3872 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3873 MpegEncContext * const s = &h->s;
3875 int current_is_long=0;
3878 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3879 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3881 for(i=0; i<mmco_count; i++){
3882 if(s->avctx->debug&FF_DEBUG_MMCO)
3883 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
3885 switch(mmco[i].opcode){
3886 case MMCO_SHORT2UNUSED:
3887 pic= remove_short(h, mmco[i].short_frame_num);
3888 if(pic==NULL) return -1;
3889 unreference_pic(h, pic);
3891 case MMCO_SHORT2LONG:
3892 pic= remove_long(h, mmco[i].long_index);
3893 if(pic) unreference_pic(h, pic);
3895 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
3896 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3897 h->long_ref_count++;
3899 case MMCO_LONG2UNUSED:
3900 pic= remove_long(h, mmco[i].long_index);
3901 if(pic==NULL) return -1;
3902 unreference_pic(h, pic);
3905 pic= remove_long(h, mmco[i].long_index);
3906 if(pic) unreference_pic(h, pic);
3908 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
3909 h->long_ref[ mmco[i].long_index ]->long_ref=1;
3910 h->long_ref_count++;
3914 case MMCO_SET_MAX_LONG:
3915 assert(mmco[i].long_index <= 16);
3916 // just remove the long term which index is greater than new max
3917 for(j = mmco[i].long_index; j<16; j++){
3918 pic = remove_long(h, j);
3919 if (pic) unreference_pic(h, pic);
3923 while(h->short_ref_count){
3924 pic= remove_short(h, h->short_ref[0]->frame_num);
3925 unreference_pic(h, pic);
3927 for(j = 0; j < 16; j++) {
3928 pic= remove_long(h, j);
3929 if(pic) unreference_pic(h, pic);
3936 if(!current_is_long){
3937 pic= remove_short(h, s->current_picture_ptr->frame_num);
3939 unreference_pic(h, pic);
3940 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3943 if(h->short_ref_count)
3944 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3946 h->short_ref[0]= s->current_picture_ptr;
3947 h->short_ref[0]->long_ref=0;
3948 h->short_ref_count++;
3951 print_short_term(h);
3956 static int decode_ref_pic_marking(H264Context *h){
3957 MpegEncContext * const s = &h->s;
3960 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3961 s->broken_link= get_bits1(&s->gb) -1;
3962 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
3963 if(h->mmco[0].long_index == -1)
3966 h->mmco[0].opcode= MMCO_LONG;
3970 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
3971 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3972 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
3974 h->mmco[i].opcode= opcode;
3975 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3976 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
3977 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
3978 fprintf(stderr, "illegal short ref in memory management control operation %d\n", mmco);
3982 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3983 h->mmco[i].long_index= get_ue_golomb(&s->gb);
3984 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
3985 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3990 if(opcode > MMCO_LONG){
3991 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3994 if(opcode == MMCO_END)
3999 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4001 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4002 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4003 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4013 static int init_poc(H264Context *h){
4014 MpegEncContext * const s = &h->s;
4015 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4018 if(h->nal_unit_type == NAL_IDR_SLICE){
4019 h->frame_num_offset= 0;
4021 if(h->frame_num < h->prev_frame_num)
4022 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4024 h->frame_num_offset= h->prev_frame_num_offset;
4027 if(h->sps.poc_type==0){
4028 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4030 if(h->nal_unit_type == NAL_IDR_SLICE){
4035 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4036 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4037 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4038 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4040 h->poc_msb = h->prev_poc_msb;
4041 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4043 field_poc[1] = h->poc_msb + h->poc_lsb;
4044 if(s->picture_structure == PICT_FRAME)
4045 field_poc[1] += h->delta_poc_bottom;
4046 }else if(h->sps.poc_type==1){
4047 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4050 if(h->sps.poc_cycle_length != 0)
4051 abs_frame_num = h->frame_num_offset + h->frame_num;
4055 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4058 expected_delta_per_poc_cycle = 0;
4059 for(i=0; i < h->sps.poc_cycle_length; i++)
4060 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4062 if(abs_frame_num > 0){
4063 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4064 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4066 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4067 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4068 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4072 if(h->nal_ref_idc == 0)
4073 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4075 field_poc[0] = expectedpoc + h->delta_poc[0];
4076 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4078 if(s->picture_structure == PICT_FRAME)
4079 field_poc[1] += h->delta_poc[1];
4082 if(h->nal_unit_type == NAL_IDR_SLICE){
4085 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4086 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4092 if(s->picture_structure != PICT_BOTTOM_FIELD)
4093 s->current_picture_ptr->field_poc[0]= field_poc[0];
4094 if(s->picture_structure != PICT_TOP_FIELD)
4095 s->current_picture_ptr->field_poc[1]= field_poc[1];
4096 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4097 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4103 * decodes a slice header.
4104 * this will allso call MPV_common_init() and frame_start() as needed
4106 static int decode_slice_header(H264Context *h){
4107 MpegEncContext * const s = &h->s;
4108 int first_mb_in_slice, pps_id;
4109 int num_ref_idx_active_override_flag;
4110 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4112 int default_ref_list_done = 0;
4114 s->current_picture.reference= h->nal_ref_idc != 0;
4115 s->dropable= h->nal_ref_idc == 0;
4117 first_mb_in_slice= get_ue_golomb(&s->gb);
4119 slice_type= get_ue_golomb(&s->gb);
4121 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4126 h->slice_type_fixed=1;
4128 h->slice_type_fixed=0;
4130 slice_type= slice_type_map[ slice_type ];
4131 if (slice_type == I_TYPE
4132 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4133 default_ref_list_done = 1;
4135 h->slice_type= slice_type;
4137 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4139 pps_id= get_ue_golomb(&s->gb);
4141 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4144 h->pps= h->pps_buffer[pps_id];
4145 if(h->pps.slice_group_count == 0){
4146 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4150 h->sps= h->sps_buffer[ h->pps.sps_id ];
4151 if(h->sps.log2_max_frame_num == 0){
4152 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4156 s->mb_width= h->sps.mb_width;
4157 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4159 h->b_stride= s->mb_width*4 + 1;
4160 h->b8_stride= s->mb_width*2 + 1;
4162 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4163 if(h->sps.frame_mbs_only_flag)
4164 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4166 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4168 if (s->context_initialized
4169 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4173 if (!s->context_initialized) {
4174 if (MPV_common_init(s) < 0)
4179 s->avctx->width = s->width;
4180 s->avctx->height = s->height;
4181 s->avctx->sample_aspect_ratio= h->sps.sar;
4182 if(!s->avctx->sample_aspect_ratio.den)
4183 s->avctx->sample_aspect_ratio.den = 1;
4185 if(h->sps.timing_info_present_flag){
4186 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
4190 if(h->slice_num == 0){
4194 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4195 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4197 h->mb_aff_frame = 0;
4198 if(h->sps.frame_mbs_only_flag){
4199 s->picture_structure= PICT_FRAME;
4201 if(get_bits1(&s->gb)) { //field_pic_flag
4202 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4204 s->picture_structure= PICT_FRAME;
4205 first_mb_in_slice <<= 1;
4206 h->mb_aff_frame = h->sps.mb_aff;
4210 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4211 s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width;
4213 if(s->picture_structure==PICT_FRAME){
4214 h->curr_pic_num= h->frame_num;
4215 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4217 h->curr_pic_num= 2*h->frame_num;
4218 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4221 if(h->nal_unit_type == NAL_IDR_SLICE){
4222 get_ue_golomb(&s->gb); /* idr_pic_id */
4225 if(h->sps.poc_type==0){
4226 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4228 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4229 h->delta_poc_bottom= get_se_golomb(&s->gb);
4233 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4234 h->delta_poc[0]= get_se_golomb(&s->gb);
4236 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4237 h->delta_poc[1]= get_se_golomb(&s->gb);
4242 if(h->pps.redundant_pic_cnt_present){
4243 h->redundant_pic_count= get_ue_golomb(&s->gb);
4246 //set defaults, might be overriden a few line later
4247 h->ref_count[0]= h->pps.ref_count[0];
4248 h->ref_count[1]= h->pps.ref_count[1];
4250 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4251 if(h->slice_type == B_TYPE){
4252 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4254 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4256 if(num_ref_idx_active_override_flag){
4257 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4258 if(h->slice_type==B_TYPE)
4259 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4261 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4262 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4268 if(!default_ref_list_done){
4269 fill_default_ref_list(h);
4272 decode_ref_pic_list_reordering(h);
4274 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4275 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4276 pred_weight_table(h);
4277 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4278 implicit_weight_table(h);
4282 if(s->current_picture.reference)
4283 decode_ref_pic_marking(h);
4285 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4286 h->cabac_init_idc = get_ue_golomb(&s->gb);
4288 h->last_qscale_diff = 0;
4289 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4290 if(s->qscale<0 || s->qscale>51){
4291 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4294 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4295 //FIXME qscale / qp ... stuff
4296 if(h->slice_type == SP_TYPE){
4297 get_bits1(&s->gb); /* sp_for_switch_flag */
4299 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4300 get_se_golomb(&s->gb); /* slice_qs_delta */
4303 h->deblocking_filter = 1;
4304 h->slice_alpha_c0_offset = 0;
4305 h->slice_beta_offset = 0;
4306 if( h->pps.deblocking_filter_parameters_present ) {
4307 h->deblocking_filter= get_ue_golomb(&s->gb);
4308 if(h->deblocking_filter < 2)
4309 h->deblocking_filter^= 1; // 1<->0
4311 if( h->deblocking_filter ) {
4312 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4313 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4318 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4319 slice_group_change_cycle= get_bits(&s->gb, ?);
4324 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4325 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4327 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4329 av_get_pict_type_char(h->slice_type),
4330 pps_id, h->frame_num,
4331 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4332 h->ref_count[0], h->ref_count[1],
4334 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4336 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4346 static inline int get_level_prefix(GetBitContext *gb){
4350 OPEN_READER(re, gb);
4351 UPDATE_CACHE(re, gb);
4352 buf=GET_CACHE(re, gb);
4354 log= 32 - av_log2(buf);
4356 print_bin(buf>>(32-log), log);
4357 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4360 LAST_SKIP_BITS(re, gb, log);
4361 CLOSE_READER(re, gb);
4366 static inline int get_dct8x8_allowed(H264Context *h){
4369 if(!IS_SUB_8X8(h->sub_mb_type[i])
4370 || !h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i]))
4377 * decodes a residual block.
4378 * @param n block index
4379 * @param scantable scantable
4380 * @param max_coeff number of coefficients in the block
4381 * @return <0 if an error occured
4383 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff){
4384 MpegEncContext * const s = &h->s;
4385 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4386 int level[16], run[16];
4387 int suffix_length, zeros_left, coeff_num, coeff_token, total_coeff, i, trailing_ones;
4389 //FIXME put trailing_onex into the context
4391 if(n == CHROMA_DC_BLOCK_INDEX){
4392 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4393 total_coeff= coeff_token>>2;
4395 if(n == LUMA_DC_BLOCK_INDEX){
4396 total_coeff= pred_non_zero_count(h, 0);
4397 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4398 total_coeff= coeff_token>>2;
4400 total_coeff= pred_non_zero_count(h, n);
4401 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4402 total_coeff= coeff_token>>2;
4403 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4407 //FIXME set last_non_zero?
4412 trailing_ones= coeff_token&3;
4413 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4414 assert(total_coeff<=16);
4416 for(i=0; i<trailing_ones; i++){
4417 level[i]= 1 - 2*get_bits1(gb);
4420 suffix_length= total_coeff > 10 && trailing_ones < 3;
4422 for(; i<total_coeff; i++){
4423 const int prefix= get_level_prefix(gb);
4424 int level_code, mask;
4426 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4428 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4430 level_code= (prefix<<suffix_length); //part
4431 }else if(prefix==14){
4433 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4435 level_code= prefix + get_bits(gb, 4); //part
4436 }else if(prefix==15){
4437 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4438 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4440 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4444 if(i==trailing_ones && i<3) level_code+= 2; //FIXME split first iteration
4446 mask= -(level_code&1);
4447 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4449 if(suffix_length==0) suffix_length=1; //FIXME split first iteration
4452 if(ABS(level[i]) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
4454 if((2+level_code)>>1) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
4455 /* ? == prefix > 2 or sth */
4457 tprintf("level: %d suffix_length:%d\n", level[i], suffix_length);
4460 if(total_coeff == max_coeff)
4463 if(n == CHROMA_DC_BLOCK_INDEX)
4464 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4466 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4469 for(i=0; i<total_coeff-1; i++){
4472 else if(zeros_left < 7){
4473 run[i]= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4475 run[i]= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4477 zeros_left -= run[i];
4481 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4485 for(; i<total_coeff-1; i++){
4493 for(i=total_coeff-1; i>=0; i--){ //FIXME merge into rundecode?
4496 coeff_num += run[i] + 1; //FIXME add 1 earlier ?
4497 j= scantable[ coeff_num ];
4502 for(i=total_coeff-1; i>=0; i--){ //FIXME merge into rundecode?
4505 coeff_num += run[i] + 1; //FIXME add 1 earlier ?
4506 j= scantable[ coeff_num ];
4508 block[j]= level[i] * qmul[j];
4509 // printf("%d %d ", block[j], qmul[j]);
4516 * decodes a P_SKIP or B_SKIP macroblock
4518 static void decode_mb_skip(H264Context *h){
4519 MpegEncContext * const s = &h->s;
4520 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4523 memset(h->non_zero_count[mb_xy], 0, 16);
4524 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4526 if(h->mb_aff_frame && s->mb_skip_run==0 && (s->mb_y&1)==0){
4527 h->mb_field_decoding_flag= get_bits1(&s->gb);
4529 if(h->mb_field_decoding_flag)
4530 mb_type|= MB_TYPE_INTERLACED;
4532 if( h->slice_type == B_TYPE )
4534 // just for fill_caches. pred_direct_motion will set the real mb_type
4535 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4537 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4538 pred_direct_motion(h, &mb_type);
4540 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4541 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
4547 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4549 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4550 pred_pskip_motion(h, &mx, &my);
4551 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4552 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4554 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
4557 write_back_motion(h, mb_type);
4558 s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP;
4559 s->current_picture.qscale_table[mb_xy]= s->qscale;
4560 h->slice_table[ mb_xy ]= h->slice_num;
4561 h->prev_mb_skipped= 1;
4565 * decodes a macroblock
4566 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4568 static int decode_mb_cavlc(H264Context *h){
4569 MpegEncContext * const s = &h->s;
4570 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4571 int mb_type, partition_count, cbp;
4572 int dct8x8_allowed= h->pps.transform_8x8_mode;
4574 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4576 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4577 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4579 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4580 if(s->mb_skip_run==-1)
4581 s->mb_skip_run= get_ue_golomb(&s->gb);
4583 if (s->mb_skip_run--) {
4588 if(h->mb_aff_frame){
4589 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
4590 h->mb_field_decoding_flag = get_bits1(&s->gb);
4592 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4594 h->prev_mb_skipped= 0;
4596 mb_type= get_ue_golomb(&s->gb);
4597 if(h->slice_type == B_TYPE){
4599 partition_count= b_mb_type_info[mb_type].partition_count;
4600 mb_type= b_mb_type_info[mb_type].type;
4603 goto decode_intra_mb;
4605 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4607 partition_count= p_mb_type_info[mb_type].partition_count;
4608 mb_type= p_mb_type_info[mb_type].type;
4611 goto decode_intra_mb;
4614 assert(h->slice_type == I_TYPE);
4617 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4621 cbp= i_mb_type_info[mb_type].cbp;
4622 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4623 mb_type= i_mb_type_info[mb_type].type;
4626 if(h->mb_field_decoding_flag)
4627 mb_type |= MB_TYPE_INTERLACED;
4629 h->slice_table[ mb_xy ]= h->slice_num;
4631 if(IS_INTRA_PCM(mb_type)){
4634 // we assume these blocks are very rare so we dont optimize it
4635 align_get_bits(&s->gb);
4637 // The pixels are stored in the same order as levels in h->mb array.
4638 for(y=0; y<16; y++){
4639 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4640 for(x=0; x<16; x++){
4641 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4642 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4646 const int index= 256 + 4*(y&3) + 32*(y>>2);
4648 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4649 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4653 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4655 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4656 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4660 // In deblocking, the quantizer is 0
4661 s->current_picture.qscale_table[mb_xy]= 0;
4662 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
4663 // All coeffs are present
4664 memset(h->non_zero_count[mb_xy], 16, 16);
4666 s->current_picture.mb_type[mb_xy]= mb_type;
4670 fill_caches(h, mb_type, 0);
4673 if(IS_INTRA(mb_type)){
4674 // init_top_left_availability(h);
4675 if(IS_INTRA4x4(mb_type)){
4678 if(dct8x8_allowed && get_bits1(&s->gb)){
4679 mb_type |= MB_TYPE_8x8DCT;
4683 // fill_intra4x4_pred_table(h);
4684 for(i=0; i<16; i+=di){
4685 const int mode_coded= !get_bits1(&s->gb);
4686 const int predicted_mode= pred_intra_mode(h, i);
4690 const int rem_mode= get_bits(&s->gb, 3);
4691 if(rem_mode<predicted_mode)
4696 mode= predicted_mode;
4700 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4702 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4704 write_back_intra_pred_mode(h);
4705 if( check_intra4x4_pred_mode(h) < 0)
4708 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4709 if(h->intra16x16_pred_mode < 0)
4712 h->chroma_pred_mode= get_ue_golomb(&s->gb);
4714 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
4715 if(h->chroma_pred_mode < 0)
4717 }else if(partition_count==4){
4718 int i, j, sub_partition_count[4], list, ref[2][4];
4720 if(h->slice_type == B_TYPE){
4722 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4723 if(h->sub_mb_type[i] >=13){
4724 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4727 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4728 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4730 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4731 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3]))
4732 pred_direct_motion(h, &mb_type);
4734 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4736 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4737 if(h->sub_mb_type[i] >=4){
4738 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4741 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4742 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4746 for(list=0; list<2; list++){
4747 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4748 if(ref_count == 0) continue;
4749 if (h->mb_aff_frame && h->mb_field_decoding_flag) {
4753 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4754 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4755 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4764 dct8x8_allowed = get_dct8x8_allowed(h);
4766 for(list=0; list<2; list++){
4767 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4768 if(ref_count == 0) continue;
4771 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4772 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4773 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4775 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4776 const int sub_mb_type= h->sub_mb_type[i];
4777 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4778 for(j=0; j<sub_partition_count[i]; j++){
4780 const int index= 4*i + block_width*j;
4781 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4782 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4783 mx += get_se_golomb(&s->gb);
4784 my += get_se_golomb(&s->gb);
4785 tprintf("final mv:%d %d\n", mx, my);
4787 if(IS_SUB_8X8(sub_mb_type)){
4788 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
4789 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4790 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
4791 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4792 }else if(IS_SUB_8X4(sub_mb_type)){
4793 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
4794 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
4795 }else if(IS_SUB_4X8(sub_mb_type)){
4796 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
4797 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
4799 assert(IS_SUB_4X4(sub_mb_type));
4800 mv_cache[ 0 ][0]= mx;
4801 mv_cache[ 0 ][1]= my;
4805 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4811 }else if(IS_DIRECT(mb_type)){
4812 pred_direct_motion(h, &mb_type);
4813 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4815 int list, mx, my, i;
4816 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4817 if(IS_16X16(mb_type)){
4818 for(list=0; list<2; list++){
4819 if(h->ref_count[list]>0){
4820 if(IS_DIR(mb_type, 0, list)){
4821 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4822 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4824 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
4827 for(list=0; list<2; list++){
4828 if(IS_DIR(mb_type, 0, list)){
4829 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4830 mx += get_se_golomb(&s->gb);
4831 my += get_se_golomb(&s->gb);
4832 tprintf("final mv:%d %d\n", mx, my);
4834 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
4836 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
4839 else if(IS_16X8(mb_type)){
4840 for(list=0; list<2; list++){
4841 if(h->ref_count[list]>0){
4843 if(IS_DIR(mb_type, i, list)){
4844 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4845 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4847 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
4851 for(list=0; list<2; list++){
4853 if(IS_DIR(mb_type, i, list)){
4854 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4855 mx += get_se_golomb(&s->gb);
4856 my += get_se_golomb(&s->gb);
4857 tprintf("final mv:%d %d\n", mx, my);
4859 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
4861 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
4865 assert(IS_8X16(mb_type));
4866 for(list=0; list<2; list++){
4867 if(h->ref_count[list]>0){
4869 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4870 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
4871 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4873 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
4877 for(list=0; list<2; list++){
4879 if(IS_DIR(mb_type, i, list)){
4880 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4881 mx += get_se_golomb(&s->gb);
4882 my += get_se_golomb(&s->gb);
4883 tprintf("final mv:%d %d\n", mx, my);
4885 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
4887 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
4893 if(IS_INTER(mb_type))
4894 write_back_motion(h, mb_type);
4896 if(!IS_INTRA16x16(mb_type)){
4897 cbp= get_ue_golomb(&s->gb);
4899 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
4903 if(IS_INTRA4x4(mb_type))
4904 cbp= golomb_to_intra4x4_cbp[cbp];
4906 cbp= golomb_to_inter_cbp[cbp];
4909 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4910 if(get_bits1(&s->gb))
4911 mb_type |= MB_TYPE_8x8DCT;
4913 s->current_picture.mb_type[mb_xy]= mb_type;
4915 if(cbp || IS_INTRA16x16(mb_type)){
4916 int i8x8, i4x4, chroma_idx;
4917 int chroma_qp, dquant;
4918 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4919 const uint8_t *scan, *dc_scan;
4921 // fill_non_zero_count_cache(h);
4923 if(IS_INTERLACED(mb_type)){
4924 scan= h->field_scan;
4925 dc_scan= luma_dc_field_scan;
4927 scan= h->zigzag_scan;
4928 dc_scan= luma_dc_zigzag_scan;
4931 dquant= get_se_golomb(&s->gb);
4933 if( dquant > 25 || dquant < -26 ){
4934 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4938 s->qscale += dquant;
4939 if(((unsigned)s->qscale) > 51){
4940 if(s->qscale<0) s->qscale+= 52;
4941 else s->qscale-= 52;
4944 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4945 if(IS_INTRA16x16(mb_type)){
4946 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, dequant_coeff[s->qscale], 16) < 0){
4947 return -1; //FIXME continue if partitioned and other return -1 too
4950 assert((cbp&15) == 0 || (cbp&15) == 15);
4953 for(i8x8=0; i8x8<4; i8x8++){
4954 for(i4x4=0; i4x4<4; i4x4++){
4955 const int index= i4x4 + 4*i8x8;
4956 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, dequant_coeff[s->qscale], 15) < 0 ){
4962 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4965 for(i8x8=0; i8x8<4; i8x8++){
4966 if(cbp & (1<<i8x8)){
4967 if(IS_8x8DCT(mb_type)){
4968 DCTELEM *buf = &h->mb[64*i8x8];
4970 for(i4x4=0; i4x4<4; i4x4++){
4971 if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4,
4972 h->dequant8_coeff[s->qscale], 16) <0 )
4978 buf[i] = (buf[i] + 2) >> 2;
4980 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4981 nnz[0] |= nnz[1] | nnz[8] | nnz[9];
4983 for(i4x4=0; i4x4<4; i4x4++){
4984 const int index= i4x4 + 4*i8x8;
4986 if( decode_residual(h, gb, h->mb + 16*index, index, scan, dequant_coeff[s->qscale], 16) <0 ){
4992 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4993 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4999 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5000 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, dequant_coeff[chroma_qp], 4) < 0){
5006 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5007 for(i4x4=0; i4x4<4; i4x4++){
5008 const int index= 16 + 4*chroma_idx + i4x4;
5009 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, dequant_coeff[chroma_qp], 15) < 0){
5015 uint8_t * const nnz= &h->non_zero_count_cache[0];
5016 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5017 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5020 uint8_t * const nnz= &h->non_zero_count_cache[0];
5021 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5022 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5023 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5025 s->current_picture.qscale_table[mb_xy]= s->qscale;
5026 write_back_non_zero_count(h);
5031 static int decode_cabac_field_decoding_flag(H264Context *h) {
5032 MpegEncContext * const s = &h->s;
5033 const int mb_x = s->mb_x;
5034 const int mb_y = s->mb_y & ~1;
5035 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5036 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5038 unsigned int ctx = 0;
5040 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5043 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5047 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5050 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5051 uint8_t *state= &h->cabac_state[ctx_base];
5055 MpegEncContext * const s = &h->s;
5056 const int mba_xy = h->left_mb_xy[0];
5057 const int mbb_xy = h->top_mb_xy;
5059 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5061 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5063 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5064 return 0; /* I4x4 */
5067 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5068 return 0; /* I4x4 */
5071 if( get_cabac_terminate( &h->cabac ) )
5072 return 25; /* PCM */
5074 mb_type = 1; /* I16x16 */
5075 if( get_cabac( &h->cabac, &state[1] ) )
5076 mb_type += 12; /* cbp_luma != 0 */
5078 if( get_cabac( &h->cabac, &state[2] ) ) {
5079 if( get_cabac( &h->cabac, &state[2+intra_slice] ) )
5080 mb_type += 4 * 2; /* cbp_chroma == 2 */
5082 mb_type += 4 * 1; /* cbp_chroma == 1 */
5084 if( get_cabac( &h->cabac, &state[3+intra_slice] ) )
5086 if( get_cabac( &h->cabac, &state[3+2*intra_slice] ) )
5091 static int decode_cabac_mb_type( H264Context *h ) {
5092 MpegEncContext * const s = &h->s;
5094 if( h->slice_type == I_TYPE ) {
5095 return decode_cabac_intra_mb_type(h, 3, 1);
5096 } else if( h->slice_type == P_TYPE ) {
5097 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5099 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5100 if( get_cabac( &h->cabac, &h->cabac_state[16] ) == 0 )
5101 return 0; /* P_L0_D16x16; */
5103 return 3; /* P_8x8; */
5105 if( get_cabac( &h->cabac, &h->cabac_state[17] ) == 0 )
5106 return 2; /* P_L0_D8x16; */
5108 return 1; /* P_L0_D16x8; */
5111 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5113 } else if( h->slice_type == B_TYPE ) {
5114 const int mba_xy = h->left_mb_xy[0];
5115 const int mbb_xy = h->top_mb_xy;
5119 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] )
5120 && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5122 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )
5123 && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5126 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5127 return 0; /* B_Direct_16x16 */
5129 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5130 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5133 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5134 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5135 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5136 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5138 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5139 else if( bits == 13 ) {
5140 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5141 } else if( bits == 14 )
5142 return 11; /* B_L1_L0_8x16 */
5143 else if( bits == 15 )
5144 return 22; /* B_8x8 */
5146 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5147 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5149 /* TODO SI/SP frames? */
5154 static int decode_cabac_mb_skip( H264Context *h) {
5155 MpegEncContext * const s = &h->s;
5156 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5157 const int mba_xy = mb_xy - 1;
5158 const int mbb_xy = mb_xy - s->mb_stride;
5161 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5163 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5166 if( h->slice_type == P_TYPE || h->slice_type == SP_TYPE)
5167 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5169 return get_cabac( &h->cabac, &h->cabac_state[24+ctx] );
5172 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5175 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5178 if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
5180 if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
5182 if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
5184 if( mode >= pred_mode )
5190 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5191 const int mba_xy = h->left_mb_xy[0];
5192 const int mbb_xy = h->top_mb_xy;
5196 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5197 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5200 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5203 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5206 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5208 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5214 static const uint8_t block_idx_x[16] = {
5215 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5217 static const uint8_t block_idx_y[16] = {
5218 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5220 static const uint8_t block_idx_xy[4][4] = {
5227 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5228 MpegEncContext * const s = &h->s;
5233 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5239 x = block_idx_x[4*i8x8];
5240 y = block_idx_y[4*i8x8];
5244 else if( s->mb_x > 0 && (h->slice_table[h->left_mb_xy[0]] == h->slice_num)) {
5245 cbp_a = h->left_cbp;
5246 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5251 else if( s->mb_y > 0 && (h->slice_table[h->top_mb_xy] == h->slice_num)) {
5253 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5256 /* No need to test for skip as we put 0 for skip block */
5257 /* No need to test for IPCM as we put 1 for IPCM block */
5259 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5260 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5265 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5266 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5270 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5276 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5280 cbp_a = (h->left_cbp>>4)&0x03;
5281 cbp_b = (h-> top_cbp>>4)&0x03;
5284 if( cbp_a > 0 ) ctx++;
5285 if( cbp_b > 0 ) ctx += 2;
5286 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5290 if( cbp_a == 2 ) ctx++;
5291 if( cbp_b == 2 ) ctx += 2;
5292 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5294 static int decode_cabac_mb_dqp( H264Context *h) {
5295 MpegEncContext * const s = &h->s;
5301 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5303 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5305 if( h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) )
5308 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5319 return -(val + 1)/2;
5321 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5322 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5324 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5326 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5330 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5332 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5333 return 0; /* B_Direct_8x8 */
5334 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5335 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5337 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5338 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5339 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5342 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5343 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5347 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5348 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5351 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5352 int refa = h->ref_cache[list][scan8[n] - 1];
5353 int refb = h->ref_cache[list][scan8[n] - 8];
5357 if( h->slice_type == B_TYPE) {
5358 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5360 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5369 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5379 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5380 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5381 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5382 int ctxbase = (l == 0) ? 40 : 47;
5387 else if( amvd > 32 )
5392 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5397 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5405 while( get_cabac_bypass( &h->cabac ) ) {
5410 if( get_cabac_bypass( &h->cabac ) )
5414 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
5418 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5423 nza = h->left_cbp&0x100;
5424 nzb = h-> top_cbp&0x100;
5425 } else if( cat == 1 || cat == 2 ) {
5426 nza = h->non_zero_count_cache[scan8[idx] - 1];
5427 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5428 } else if( cat == 3 ) {
5429 nza = (h->left_cbp>>(6+idx))&0x01;
5430 nzb = (h-> top_cbp>>(6+idx))&0x01;
5433 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5434 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5443 return ctx + 4 * cat;
5446 static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff) {
5447 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5448 static const int significant_coeff_flag_field_offset[2] = { 105, 277 };
5449 static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 };
5450 static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 };
5451 static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 };
5452 static const int coeff_abs_level_m1_offset[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 };
5453 static const int identity[15] = {
5454 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
5456 static const int significant_coeff_flag_offset_8x8[63] = {
5457 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5458 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5459 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5460 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
5462 static const int last_coeff_flag_offset_8x8[63] = {
5463 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5464 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5465 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5466 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5472 int coeff_count = 0;
5475 int abslevelgt1 = 0;
5477 const int* significant_coeff_ctx_offset;
5478 const int* last_coeff_ctx_offset;
5479 const int significant_coeff_ctx_base = significant_coeff_flag_offset[cat]
5480 + significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5481 const int last_coeff_ctx_base = last_significant_coeff_flag_offset[cat]
5482 + last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
5484 /* cat: 0-> DC 16x16 n = 0
5485 * 1-> AC 16x16 n = luma4x4idx
5486 * 2-> Luma4x4 n = luma4x4idx
5487 * 3-> DC Chroma n = iCbCr
5488 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5489 * 5-> Luma8x8 n = 4 * luma8x8idx
5492 /* read coded block flag */
5494 significant_coeff_ctx_offset = significant_coeff_flag_offset_8x8;
5495 last_coeff_ctx_offset = last_coeff_flag_offset_8x8;
5497 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5498 if( cat == 1 || cat == 2 )
5499 h->non_zero_count_cache[scan8[n]] = 0;
5501 h->non_zero_count_cache[scan8[16+n]] = 0;
5506 significant_coeff_ctx_offset =
5507 last_coeff_ctx_offset = identity;
5510 for(last= 0; last < max_coeff - 1; last++) {
5511 int sig_ctx = significant_coeff_ctx_base + significant_coeff_ctx_offset[last];
5512 if( get_cabac( &h->cabac, &h->cabac_state[sig_ctx] )) {
5513 int last_ctx = last_coeff_ctx_base + last_coeff_ctx_offset[last];
5514 index[coeff_count++] = last;
5515 if( get_cabac( &h->cabac, &h->cabac_state[last_ctx] ) ) {
5521 if( last == max_coeff -1 ) {
5522 index[coeff_count++] = last;
5524 assert(coeff_count > 0);
5527 h->cbp_table[mb_xy] |= 0x100;
5528 else if( cat == 1 || cat == 2 )
5529 h->non_zero_count_cache[scan8[n]] = coeff_count;
5531 h->cbp_table[mb_xy] |= 0x40 << n;
5533 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5536 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, 1, 1);
5539 for( i = coeff_count - 1; i >= 0; i-- ) {
5540 int ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + coeff_abs_level_m1_offset[cat];
5541 int j= scantable[index[i]];
5543 if( get_cabac( &h->cabac, &h->cabac_state[ctx] ) == 0 ) {
5544 if( cat == 0 || cat == 3 ) {
5545 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
5548 if( get_cabac_bypass( &h->cabac ) ) block[j] = -qmul[j];
5549 else block[j] = qmul[j];
5555 ctx = 5 + FFMIN( 4, abslevelgt1 ) + coeff_abs_level_m1_offset[cat];
5556 while( coeff_abs < 15 && get_cabac( &h->cabac, &h->cabac_state[ctx] ) ) {
5560 if( coeff_abs >= 15 ) {
5562 while( get_cabac_bypass( &h->cabac ) ) {
5563 coeff_abs += 1 << j;
5568 if( get_cabac_bypass( &h->cabac ) )
5569 coeff_abs += 1 << j ;
5573 if( cat == 0 || cat == 3 ) {
5574 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
5575 else block[j] = coeff_abs;
5577 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs * qmul[j];
5578 else block[j] = coeff_abs * qmul[j];
5587 void inline compute_mb_neighboors(H264Context *h)
5589 MpegEncContext * const s = &h->s;
5590 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5591 h->top_mb_xy = mb_xy - s->mb_stride;
5592 h->left_mb_xy[0] = mb_xy - 1;
5593 if(h->mb_aff_frame){
5594 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5595 const int top_pair_xy = pair_xy - s->mb_stride;
5596 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5597 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5598 const int curr_mb_frame_flag = !h->mb_field_decoding_flag;
5599 const int bottom = (s->mb_y & 1);
5601 ? !curr_mb_frame_flag // bottom macroblock
5602 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5604 h->top_mb_xy -= s->mb_stride;
5606 if (left_mb_frame_flag != curr_mb_frame_flag) {
5607 h->left_mb_xy[0] = pair_xy - 1;
5614 * decodes a macroblock
5615 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5617 static int decode_mb_cabac(H264Context *h) {
5618 MpegEncContext * const s = &h->s;
5619 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5620 int mb_type, partition_count, cbp = 0;
5621 int dct8x8_allowed= h->pps.transform_8x8_mode;
5623 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5625 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5626 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5627 /* read skip flags */
5628 if( decode_cabac_mb_skip( h ) ) {
5631 h->cbp_table[mb_xy] = 0;
5632 h->chroma_pred_mode_table[mb_xy] = 0;
5633 h->last_qscale_diff = 0;
5639 if(h->mb_aff_frame){
5640 if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
5641 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5643 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5645 h->prev_mb_skipped = 0;
5647 compute_mb_neighboors(h);
5648 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5649 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5653 if( h->slice_type == B_TYPE ) {
5655 partition_count= b_mb_type_info[mb_type].partition_count;
5656 mb_type= b_mb_type_info[mb_type].type;
5659 goto decode_intra_mb;
5661 } else if( h->slice_type == P_TYPE ) {
5663 partition_count= p_mb_type_info[mb_type].partition_count;
5664 mb_type= p_mb_type_info[mb_type].type;
5667 goto decode_intra_mb;
5670 assert(h->slice_type == I_TYPE);
5672 partition_count = 0;
5673 cbp= i_mb_type_info[mb_type].cbp;
5674 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5675 mb_type= i_mb_type_info[mb_type].type;
5677 if(h->mb_field_decoding_flag)
5678 mb_type |= MB_TYPE_INTERLACED;
5680 h->slice_table[ mb_xy ]= h->slice_num;
5682 if(IS_INTRA_PCM(mb_type)) {
5686 // We assume these blocks are very rare so we dont optimize it.
5687 // FIXME The two following lines get the bitstream position in the cabac
5688 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5689 ptr= h->cabac.bytestream;
5690 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
5692 // The pixels are stored in the same order as levels in h->mb array.
5693 for(y=0; y<16; y++){
5694 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5695 for(x=0; x<16; x++){
5696 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
5697 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5701 const int index= 256 + 4*(y&3) + 32*(y>>2);
5703 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5704 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5708 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5710 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5711 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5715 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5717 // All blocks are present
5718 h->cbp_table[mb_xy] = 0x1ef;
5719 h->chroma_pred_mode_table[mb_xy] = 0;
5720 // In deblocking, the quantizer is 0
5721 s->current_picture.qscale_table[mb_xy]= 0;
5722 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5723 // All coeffs are present
5724 memset(h->non_zero_count[mb_xy], 16, 16);
5725 s->current_picture.mb_type[mb_xy]= mb_type;
5729 fill_caches(h, mb_type, 0);
5731 if( IS_INTRA( mb_type ) ) {
5733 if( IS_INTRA4x4( mb_type ) ) {
5734 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5735 mb_type |= MB_TYPE_8x8DCT;
5736 for( i = 0; i < 16; i+=4 ) {
5737 int pred = pred_intra_mode( h, i );
5738 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5739 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5742 for( i = 0; i < 16; i++ ) {
5743 int pred = pred_intra_mode( h, i );
5744 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5746 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5749 write_back_intra_pred_mode(h);
5750 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5752 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5753 if( h->intra16x16_pred_mode < 0 ) return -1;
5755 h->chroma_pred_mode_table[mb_xy] =
5756 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5758 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
5759 if( h->chroma_pred_mode < 0 ) return -1;
5760 } else if( partition_count == 4 ) {
5761 int i, j, sub_partition_count[4], list, ref[2][4];
5763 if( h->slice_type == B_TYPE ) {
5764 for( i = 0; i < 4; i++ ) {
5765 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5766 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5767 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5769 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5770 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5771 pred_direct_motion(h, &mb_type);
5772 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5773 for( i = 0; i < 4; i++ )
5774 if( IS_DIRECT(h->sub_mb_type[i]) )
5775 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5779 for( i = 0; i < 4; i++ ) {
5780 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5781 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5782 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5786 for( list = 0; list < 2; list++ ) {
5787 if( h->ref_count[list] > 0 ) {
5788 for( i = 0; i < 4; i++ ) {
5789 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5790 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5791 if( h->ref_count[list] > 1 )
5792 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5798 h->ref_cache[list][ scan8[4*i]+1 ]=
5799 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5805 dct8x8_allowed = get_dct8x8_allowed(h);
5807 for(list=0; list<2; list++){
5809 if(IS_DIRECT(h->sub_mb_type[i])){
5810 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5813 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5815 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5816 const int sub_mb_type= h->sub_mb_type[i];
5817 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5818 for(j=0; j<sub_partition_count[i]; j++){
5821 const int index= 4*i + block_width*j;
5822 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5823 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5824 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5826 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5827 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5828 tprintf("final mv:%d %d\n", mx, my);
5830 if(IS_SUB_8X8(sub_mb_type)){
5831 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5832 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5833 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5834 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5836 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
5837 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5838 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
5839 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5840 }else if(IS_SUB_8X4(sub_mb_type)){
5841 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5842 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5844 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
5845 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
5846 }else if(IS_SUB_4X8(sub_mb_type)){
5847 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5848 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5850 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
5851 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
5853 assert(IS_SUB_4X4(sub_mb_type));
5854 mv_cache[ 0 ][0]= mx;
5855 mv_cache[ 0 ][1]= my;
5857 mvd_cache[ 0 ][0]= mx - mpx;
5858 mvd_cache[ 0 ][1]= my - mpy;
5862 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5863 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5864 p[0] = p[1] = p[8] = p[9] = 0;
5865 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5869 } else if( IS_DIRECT(mb_type) ) {
5870 pred_direct_motion(h, &mb_type);
5871 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5872 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5873 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5875 int list, mx, my, i, mpx, mpy;
5876 if(IS_16X16(mb_type)){
5877 for(list=0; list<2; list++){
5878 if(IS_DIR(mb_type, 0, list)){
5879 if(h->ref_count[list] > 0 ){
5880 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5881 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5884 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
5886 for(list=0; list<2; list++){
5887 if(IS_DIR(mb_type, 0, list)){
5888 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5890 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5891 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5892 tprintf("final mv:%d %d\n", mx, my);
5894 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5895 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5897 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5900 else if(IS_16X8(mb_type)){
5901 for(list=0; list<2; list++){
5902 if(h->ref_count[list]>0){
5904 if(IS_DIR(mb_type, i, list)){
5905 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5906 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5908 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5912 for(list=0; list<2; list++){
5914 if(IS_DIR(mb_type, i, list)){
5915 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5916 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5917 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5918 tprintf("final mv:%d %d\n", mx, my);
5920 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5921 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5923 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5924 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5929 assert(IS_8X16(mb_type));
5930 for(list=0; list<2; list++){
5931 if(h->ref_count[list]>0){
5933 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5934 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5935 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5937 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5941 for(list=0; list<2; list++){
5943 if(IS_DIR(mb_type, i, list)){
5944 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5945 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5946 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5948 tprintf("final mv:%d %d\n", mx, my);
5949 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5950 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5952 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5953 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5960 if( IS_INTER( mb_type ) ) {
5961 h->chroma_pred_mode_table[mb_xy] = 0;
5962 write_back_motion( h, mb_type );
5965 if( !IS_INTRA16x16( mb_type ) ) {
5966 cbp = decode_cabac_mb_cbp_luma( h );
5967 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5970 h->cbp_table[mb_xy] = cbp;
5972 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5973 if( decode_cabac_mb_transform_size( h ) )
5974 mb_type |= MB_TYPE_8x8DCT;
5976 s->current_picture.mb_type[mb_xy]= mb_type;
5978 if( cbp || IS_INTRA16x16( mb_type ) ) {
5979 const uint8_t *scan, *dc_scan;
5982 if(IS_INTERLACED(mb_type)){
5983 scan= h->field_scan;
5984 dc_scan= luma_dc_field_scan;
5986 scan= h->zigzag_scan;
5987 dc_scan= luma_dc_zigzag_scan;
5990 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5992 if(((unsigned)s->qscale) > 51){
5993 if(s->qscale<0) s->qscale+= 52;
5994 else s->qscale-= 52;
5996 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5998 if( IS_INTRA16x16( mb_type ) ) {
6000 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6001 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, dequant_coeff[s->qscale], 16) < 0)
6004 for( i = 0; i < 16; i++ ) {
6005 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6006 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, dequant_coeff[s->qscale], 15) < 0 )
6010 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6014 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6015 if( cbp & (1<<i8x8) ) {
6016 if( IS_8x8DCT(mb_type) ) {
6017 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6018 zigzag_scan8x8, h->dequant8_coeff[s->qscale], 64) < 0 )
6023 h->mb[64*i8x8+i] = (h->mb[64*i8x8+i] + 2) >> 2;
6026 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6027 const int index = 4*i8x8 + i4x4;
6028 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6029 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, dequant_coeff[s->qscale], 16) < 0 )
6033 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6034 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6041 for( c = 0; c < 2; c++ ) {
6042 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6043 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, dequant_coeff[h->chroma_qp], 4) < 0)
6050 for( c = 0; c < 2; c++ ) {
6051 for( i = 0; i < 4; i++ ) {
6052 const int index = 16 + 4 * c + i;
6053 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6054 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, dequant_coeff[h->chroma_qp], 15) < 0)
6059 uint8_t * const nnz= &h->non_zero_count_cache[0];
6060 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6061 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6064 uint8_t * const nnz= &h->non_zero_count_cache[0];
6065 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6066 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6067 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6070 s->current_picture.qscale_table[mb_xy]= s->qscale;
6071 write_back_non_zero_count(h);
6077 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6079 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6080 const int alpha = alpha_table[index_a];
6081 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6086 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6087 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6089 /* 16px edge length, because bS=4 is triggered by being at
6090 * the edge of an intra MB, so all 4 bS are the same */
6091 for( d = 0; d < 16; d++ ) {
6092 const int p0 = pix[-1];
6093 const int p1 = pix[-2];
6094 const int p2 = pix[-3];
6096 const int q0 = pix[0];
6097 const int q1 = pix[1];
6098 const int q2 = pix[2];
6100 if( ABS( p0 - q0 ) < alpha &&
6101 ABS( p1 - p0 ) < beta &&
6102 ABS( q1 - q0 ) < beta ) {
6104 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6105 if( ABS( p2 - p0 ) < beta)
6107 const int p3 = pix[-4];
6109 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6110 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6111 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6114 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6116 if( ABS( q2 - q0 ) < beta)
6118 const int q3 = pix[3];
6120 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6121 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6122 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6125 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6129 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6130 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6132 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6138 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6140 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6141 const int alpha = alpha_table[index_a];
6142 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6147 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6148 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6150 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6154 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6156 for( i = 0; i < 16; i++, pix += stride) {
6162 int bS_index = (i >> 1);
6163 if (h->mb_field_decoding_flag) {
6165 bS_index |= (i & 1);
6168 if( bS[bS_index] == 0 ) {
6172 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6173 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6174 alpha = alpha_table[index_a];
6175 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6178 if( bS[bS_index] < 4 ) {
6179 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6180 /* 4px edge length */
6181 const int p0 = pix[-1];
6182 const int p1 = pix[-2];
6183 const int p2 = pix[-3];
6184 const int q0 = pix[0];
6185 const int q1 = pix[1];
6186 const int q2 = pix[2];
6188 if( ABS( p0 - q0 ) < alpha &&
6189 ABS( p1 - p0 ) < beta &&
6190 ABS( q1 - q0 ) < beta ) {
6194 if( ABS( p2 - p0 ) < beta ) {
6195 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6198 if( ABS( q2 - q0 ) < beta ) {
6199 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6203 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6204 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6205 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6206 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6209 /* 4px edge length */
6210 const int p0 = pix[-1];
6211 const int p1 = pix[-2];
6212 const int p2 = pix[-3];
6214 const int q0 = pix[0];
6215 const int q1 = pix[1];
6216 const int q2 = pix[2];
6218 if( ABS( p0 - q0 ) < alpha &&
6219 ABS( p1 - p0 ) < beta &&
6220 ABS( q1 - q0 ) < beta ) {
6222 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6223 if( ABS( p2 - p0 ) < beta)
6225 const int p3 = pix[-4];
6227 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6228 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6229 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6232 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6234 if( ABS( q2 - q0 ) < beta)
6236 const int q3 = pix[3];
6238 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6239 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6240 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6243 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6247 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6248 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6250 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6255 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp[2] ) {
6257 for( i = 0; i < 8; i++, pix += stride) {
6265 if( bS[bS_index] == 0 ) {
6269 qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6270 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6271 alpha = alpha_table[index_a];
6272 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6273 if( bS[bS_index] < 4 ) {
6274 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6275 /* 2px edge length (because we use same bS than the one for luma) */
6276 const int p0 = pix[-1];
6277 const int p1 = pix[-2];
6278 const int q0 = pix[0];
6279 const int q1 = pix[1];
6281 if( ABS( p0 - q0 ) < alpha &&
6282 ABS( p1 - p0 ) < beta &&
6283 ABS( q1 - q0 ) < beta ) {
6284 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6286 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6287 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6288 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6291 const int p0 = pix[-1];
6292 const int p1 = pix[-2];
6293 const int q0 = pix[0];
6294 const int q1 = pix[1];
6296 if( ABS( p0 - q0 ) < alpha &&
6297 ABS( p1 - p0 ) < beta &&
6298 ABS( q1 - q0 ) < beta ) {
6300 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6301 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6302 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6308 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6310 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6311 const int alpha = alpha_table[index_a];
6312 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6313 const int pix_next = stride;
6318 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6319 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6321 /* 16px edge length, see filter_mb_edgev */
6322 for( d = 0; d < 16; d++ ) {
6323 const int p0 = pix[-1*pix_next];
6324 const int p1 = pix[-2*pix_next];
6325 const int p2 = pix[-3*pix_next];
6326 const int q0 = pix[0];
6327 const int q1 = pix[1*pix_next];
6328 const int q2 = pix[2*pix_next];
6330 if( ABS( p0 - q0 ) < alpha &&
6331 ABS( p1 - p0 ) < beta &&
6332 ABS( q1 - q0 ) < beta ) {
6334 const int p3 = pix[-4*pix_next];
6335 const int q3 = pix[ 3*pix_next];
6337 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6338 if( ABS( p2 - p0 ) < beta) {
6340 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6341 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6342 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6345 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6347 if( ABS( q2 - q0 ) < beta) {
6349 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6350 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6351 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6354 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6358 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6359 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6361 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6368 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6370 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6371 const int alpha = alpha_table[index_a];
6372 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6377 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6378 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6380 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6384 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6385 MpegEncContext * const s = &h->s;
6386 const int mb_xy= mb_x + mb_y*s->mb_stride;
6387 int first_vertical_edge_done = 0;
6389 /* FIXME: A given frame may occupy more than one position in
6390 * the reference list. So ref2frm should be populated with
6391 * frame numbers, not indices. */
6392 static const int ref2frm[18] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6395 // left mb is in picture
6396 && h->slice_table[mb_xy-1] != 255
6397 // and current and left pair do not have the same interlaced type
6398 && (IS_INTERLACED(s->current_picture.mb_type[mb_xy]) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6399 // and left mb is in the same slice if deblocking_filter == 2
6400 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6401 /* First vertical edge is different in MBAFF frames
6402 * There are 8 different bS to compute and 2 different Qp
6409 first_vertical_edge_done = 1;
6410 for( i = 0; i < 8; i++ ) {
6412 int b_idx= 8 + 4 + 8*y;
6413 int bn_idx= b_idx - 1;
6415 int mbn_xy = h->mb_field_decoding_flag ? h->left_mb_xy[i>>2] : h->left_mb_xy[i&1];
6417 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6418 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6420 } else if( h->non_zero_count_cache[b_idx] != 0 ||
6421 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6422 h->non_zero_count_cache[bn_idx] != 0 ) {
6427 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6428 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6429 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6430 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6437 if(bS[0]+bS[1]+bS[2]+bS[3] != 0) {
6438 // Do not use s->qscale as luma quantizer because it has not the same
6439 // value in IPCM macroblocks.
6440 qp[0] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[0]] + 1 ) >> 1;
6441 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6442 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[0]] ) + 1 ) >> 1;
6443 qp[1] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[1]] + 1 ) >> 1;
6444 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
6445 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[1]] ) + 1 ) >> 1;
6448 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
6449 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6450 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6451 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
6452 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
6455 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6456 for( dir = 0; dir < 2; dir++ )
6459 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6460 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6462 if (first_vertical_edge_done) {
6464 first_vertical_edge_done = 0;
6467 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6471 for( edge = start; edge < 4; edge++ ) {
6472 /* mbn_xy: neighbor macroblock */
6473 int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6477 if( (edge&1) && IS_8x8DCT(s->current_picture.mb_type[mb_xy]) )
6480 if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0)
6481 && !IS_INTERLACED(s->current_picture.mb_type[mb_xy])
6482 && IS_INTERLACED(s->current_picture.mb_type[mbn_xy])
6484 // This is a special case in the norm where the filtering must
6485 // be done twice (one each of the field) even if we are in a
6486 // frame macroblock.
6488 unsigned int tmp_linesize = 2 * linesize;
6489 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6490 int mbn_xy = mb_xy - 2 * s->mb_stride;
6494 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6495 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6496 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6502 // Do not use s->qscale as luma quantizer because it has not the same
6503 // value in IPCM macroblocks.
6504 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6505 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6506 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6507 filter_mb_edgeh( h, &img_y[0], tmp_linesize, bS, qp );
6508 chroma_qp = ( h->chroma_qp +
6509 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6510 filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp );
6511 filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp );
6514 mbn_xy += s->mb_stride;
6515 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6516 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6517 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6523 // Do not use s->qscale as luma quantizer because it has not the same
6524 // value in IPCM macroblocks.
6525 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6526 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6527 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6528 filter_mb_edgeh( h, &img_y[linesize], tmp_linesize, bS, qp );
6529 chroma_qp = ( h->chroma_qp +
6530 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6531 filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6532 filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6535 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
6536 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
6539 if ( (!IS_INTERLACED(s->current_picture.mb_type[mb_xy]) && !IS_INTERLACED(s->current_picture.mb_type[mbm_xy]))
6540 || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6549 bS[0] = bS[1] = bS[2] = bS[3] = value;
6552 for( i = 0; i < 4; i++ ) {
6553 int x = dir == 0 ? edge : i;
6554 int y = dir == 0 ? i : edge;
6555 int b_idx= 8 + 4 + x + 8*y;
6556 int bn_idx= b_idx - (dir ? 8:1);
6558 if( h->non_zero_count_cache[b_idx] != 0 ||
6559 h->non_zero_count_cache[bn_idx] != 0 ) {
6566 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6567 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6568 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6569 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
6577 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6582 // Do not use s->qscale as luma quantizer because it has not the same
6583 // value in IPCM macroblocks.
6584 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6585 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6586 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6587 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
6589 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6590 if( (edge&1) == 0 ) {
6591 int chroma_qp = ( h->chroma_qp +
6592 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6593 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
6594 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
6597 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6598 if( (edge&1) == 0 ) {
6599 int chroma_qp = ( h->chroma_qp +
6600 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6601 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6602 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
6609 static int decode_slice(H264Context *h){
6610 MpegEncContext * const s = &h->s;
6611 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6615 if( h->pps.cabac ) {
6619 align_get_bits( &s->gb );
6622 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
6623 ff_init_cabac_decoder( &h->cabac,
6624 s->gb.buffer + get_bits_count(&s->gb)/8,
6625 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6626 /* calculate pre-state */
6627 for( i= 0; i < 460; i++ ) {
6629 if( h->slice_type == I_TYPE )
6630 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6632 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6635 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6637 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6641 int ret = decode_mb_cabac(h);
6644 if(ret>=0) hl_decode_mb(h);
6646 /* XXX: useless as decode_mb_cabac it doesn't support that ... */
6647 if( ret >= 0 && h->mb_aff_frame ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6650 if(ret>=0) ret = decode_mb_cabac(h);
6655 eos = get_cabac_terminate( &h->cabac );
6657 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
6658 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6659 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6663 if( ++s->mb_x >= s->mb_width ) {
6665 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6667 if(h->mb_aff_frame) {
6672 if( eos || s->mb_y >= s->mb_height ) {
6673 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6674 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6681 int ret = decode_mb_cavlc(h);
6683 if(ret>=0) hl_decode_mb(h);
6685 if(ret>=0 && h->mb_aff_frame){ //FIXME optimal? or let mb_decode decode 16x32 ?
6687 ret = decode_mb_cavlc(h);
6689 if(ret>=0) hl_decode_mb(h);
6694 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6695 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6700 if(++s->mb_x >= s->mb_width){
6702 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6704 if(h->mb_aff_frame) {
6707 if(s->mb_y >= s->mb_height){
6708 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6710 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6711 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6715 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6722 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6723 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6724 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6725 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6729 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6738 for(;s->mb_y < s->mb_height; s->mb_y++){
6739 for(;s->mb_x < s->mb_width; s->mb_x++){
6740 int ret= decode_mb(h);
6745 fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6746 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6751 if(++s->mb_x >= s->mb_width){
6753 if(++s->mb_y >= s->mb_height){
6754 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6755 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6759 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6766 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6767 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6768 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6772 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6779 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6782 return -1; //not reached
6785 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6786 MpegEncContext * const s = &h->s;
6788 cpb_count = get_ue_golomb(&s->gb) + 1;
6789 get_bits(&s->gb, 4); /* bit_rate_scale */
6790 get_bits(&s->gb, 4); /* cpb_size_scale */
6791 for(i=0; i<cpb_count; i++){
6792 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6793 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6794 get_bits1(&s->gb); /* cbr_flag */
6796 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6797 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6798 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6799 get_bits(&s->gb, 5); /* time_offset_length */
6802 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6803 MpegEncContext * const s = &h->s;
6804 int aspect_ratio_info_present_flag, aspect_ratio_idc;
6805 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6807 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6809 if( aspect_ratio_info_present_flag ) {
6810 aspect_ratio_idc= get_bits(&s->gb, 8);
6811 if( aspect_ratio_idc == EXTENDED_SAR ) {
6812 sps->sar.num= get_bits(&s->gb, 16);
6813 sps->sar.den= get_bits(&s->gb, 16);
6814 }else if(aspect_ratio_idc < 16){
6815 sps->sar= pixel_aspect[aspect_ratio_idc];
6817 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6824 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6826 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6827 get_bits1(&s->gb); /* overscan_appropriate_flag */
6830 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6831 get_bits(&s->gb, 3); /* video_format */
6832 get_bits1(&s->gb); /* video_full_range_flag */
6833 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6834 get_bits(&s->gb, 8); /* colour_primaries */
6835 get_bits(&s->gb, 8); /* transfer_characteristics */
6836 get_bits(&s->gb, 8); /* matrix_coefficients */
6840 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6841 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6842 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6845 sps->timing_info_present_flag = get_bits1(&s->gb);
6846 if(sps->timing_info_present_flag){
6847 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6848 sps->time_scale = get_bits_long(&s->gb, 32);
6849 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6852 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6853 if(nal_hrd_parameters_present_flag)
6854 decode_hrd_parameters(h, sps);
6855 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6856 if(vcl_hrd_parameters_present_flag)
6857 decode_hrd_parameters(h, sps);
6858 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6859 get_bits1(&s->gb); /* low_delay_hrd_flag */
6860 get_bits1(&s->gb); /* pic_struct_present_flag */
6862 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6863 if(sps->bitstream_restriction_flag){
6864 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6865 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6866 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6867 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6868 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6869 sps->num_reorder_frames = get_ue_golomb(&s->gb);
6870 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
6876 static inline int decode_seq_parameter_set(H264Context *h){
6877 MpegEncContext * const s = &h->s;
6878 int profile_idc, level_idc;
6882 profile_idc= get_bits(&s->gb, 8);
6883 get_bits1(&s->gb); //constraint_set0_flag
6884 get_bits1(&s->gb); //constraint_set1_flag
6885 get_bits1(&s->gb); //constraint_set2_flag
6886 get_bits1(&s->gb); //constraint_set3_flag
6887 get_bits(&s->gb, 4); // reserved
6888 level_idc= get_bits(&s->gb, 8);
6889 sps_id= get_ue_golomb(&s->gb);
6891 sps= &h->sps_buffer[ sps_id ];
6892 sps->profile_idc= profile_idc;
6893 sps->level_idc= level_idc;
6895 if(sps->profile_idc >= 100){ //high profile
6896 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
6897 get_bits1(&s->gb); //residual_color_transform_flag
6898 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
6899 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
6900 get_bits1(&s->gb); //qpprime_y_zero_transform_bypass_flag
6901 if(get_bits1(&s->gb)){ //seq_scaling_matrix_present_flag
6902 av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n");
6907 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
6908 sps->poc_type= get_ue_golomb(&s->gb);
6910 if(sps->poc_type == 0){ //FIXME #define
6911 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
6912 } else if(sps->poc_type == 1){//FIXME #define
6913 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
6914 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
6915 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
6916 sps->poc_cycle_length= get_ue_golomb(&s->gb);
6918 for(i=0; i<sps->poc_cycle_length; i++)
6919 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
6921 if(sps->poc_type > 2){
6922 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
6926 sps->ref_frame_count= get_ue_golomb(&s->gb);
6927 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
6928 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
6930 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
6931 sps->mb_width= get_ue_golomb(&s->gb) + 1;
6932 sps->mb_height= get_ue_golomb(&s->gb) + 1;
6933 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
6934 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
6937 sps->frame_mbs_only_flag= get_bits1(&s->gb);
6938 if(!sps->frame_mbs_only_flag)
6939 sps->mb_aff= get_bits1(&s->gb);
6943 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
6945 sps->crop= get_bits1(&s->gb);
6947 sps->crop_left = get_ue_golomb(&s->gb);
6948 sps->crop_right = get_ue_golomb(&s->gb);
6949 sps->crop_top = get_ue_golomb(&s->gb);
6950 sps->crop_bottom= get_ue_golomb(&s->gb);
6951 if(sps->crop_left || sps->crop_top){
6952 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
6958 sps->crop_bottom= 0;
6961 sps->vui_parameters_present_flag= get_bits1(&s->gb);
6962 if( sps->vui_parameters_present_flag )
6963 decode_vui_parameters(h, sps);
6965 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
6966 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
6967 sps_id, sps->profile_idc, sps->level_idc,
6969 sps->ref_frame_count,
6970 sps->mb_width, sps->mb_height,
6971 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
6972 sps->direct_8x8_inference_flag ? "8B8" : "",
6973 sps->crop_left, sps->crop_right,
6974 sps->crop_top, sps->crop_bottom,
6975 sps->vui_parameters_present_flag ? "VUI" : ""
6981 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
6982 MpegEncContext * const s = &h->s;
6983 int pps_id= get_ue_golomb(&s->gb);
6984 PPS *pps= &h->pps_buffer[pps_id];
6986 pps->sps_id= get_ue_golomb(&s->gb);
6987 pps->cabac= get_bits1(&s->gb);
6988 pps->pic_order_present= get_bits1(&s->gb);
6989 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
6990 if(pps->slice_group_count > 1 ){
6991 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
6992 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
6993 switch(pps->mb_slice_group_map_type){
6996 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
6997 | run_length[ i ] |1 |ue(v) |
7002 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7004 | top_left_mb[ i ] |1 |ue(v) |
7005 | bottom_right_mb[ i ] |1 |ue(v) |
7013 | slice_group_change_direction_flag |1 |u(1) |
7014 | slice_group_change_rate_minus1 |1 |ue(v) |
7019 | slice_group_id_cnt_minus1 |1 |ue(v) |
7020 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7022 | slice_group_id[ i ] |1 |u(v) |
7027 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7028 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7029 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7030 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7034 pps->weighted_pred= get_bits1(&s->gb);
7035 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7036 pps->init_qp= get_se_golomb(&s->gb) + 26;
7037 pps->init_qs= get_se_golomb(&s->gb) + 26;
7038 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7039 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7040 pps->constrained_intra_pred= get_bits1(&s->gb);
7041 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7043 if(get_bits_count(&s->gb) < bit_length){
7044 pps->transform_8x8_mode= get_bits1(&s->gb);
7045 if(get_bits1(&s->gb)){ //pic_scaling_matrix_present_flag
7046 av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n");
7049 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7052 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7053 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7054 pps_id, pps->sps_id,
7055 pps->cabac ? "CABAC" : "CAVLC",
7056 pps->slice_group_count,
7057 pps->ref_count[0], pps->ref_count[1],
7058 pps->weighted_pred ? "weighted" : "",
7059 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7060 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7061 pps->constrained_intra_pred ? "CONSTR" : "",
7062 pps->redundant_pic_cnt_present ? "REDU" : "",
7063 pps->transform_8x8_mode ? "8x8DCT" : ""
7071 * finds the end of the current frame in the bitstream.
7072 * @return the position of the first byte of the next frame, or -1
7074 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7077 ParseContext *pc = &(h->s.parse_context);
7078 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7079 // mb_addr= pc->mb_addr - 1;
7081 for(i=0; i<=buf_size; i++){
7082 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7083 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7084 if(pc->frame_start_found){
7085 // If there isn't one more byte in the buffer
7086 // the test on first_mb_in_slice cannot be done yet
7087 // do it at next call.
7088 if (i >= buf_size) break;
7089 if (buf[i] & 0x80) {
7090 // first_mb_in_slice is 0, probably the first nal of a new
7092 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7094 pc->frame_start_found= 0;
7098 pc->frame_start_found = 1;
7101 state= (state<<8) | buf[i];
7105 return END_NOT_FOUND;
7108 static int h264_parse(AVCodecParserContext *s,
7109 AVCodecContext *avctx,
7110 uint8_t **poutbuf, int *poutbuf_size,
7111 const uint8_t *buf, int buf_size)
7113 H264Context *h = s->priv_data;
7114 ParseContext *pc = &h->s.parse_context;
7117 next= find_frame_end(h, buf, buf_size);
7119 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
7125 *poutbuf = (uint8_t *)buf;
7126 *poutbuf_size = buf_size;
7130 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7131 MpegEncContext * const s = &h->s;
7132 AVCodecContext * const avctx= s->avctx;
7136 for(i=0; i<32; i++){
7137 printf("%X ", buf[i]);
7149 if(buf_index >= buf_size) break;
7151 for(i = 0; i < h->nal_length_size; i++)
7152 nalsize = (nalsize << 8) | buf[buf_index++];
7154 // start code prefix search
7155 for(; buf_index + 3 < buf_size; buf_index++){
7156 // this should allways succeed in the first iteration
7157 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7161 if(buf_index+3 >= buf_size) break;
7166 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7167 if(ptr[dst_length - 1] == 0) dst_length--;
7168 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
7170 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7171 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7174 if (h->is_avc && (nalsize != consumed))
7175 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7177 buf_index += consumed;
7179 if( s->hurry_up == 1 && h->nal_ref_idc == 0 )
7182 switch(h->nal_unit_type){
7184 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7186 init_get_bits(&s->gb, ptr, bit_length);
7188 h->inter_gb_ptr= &s->gb;
7189 s->data_partitioning = 0;
7191 if(decode_slice_header(h) < 0) return -1;
7192 if(h->redundant_pic_count==0 && s->hurry_up < 5 )
7196 init_get_bits(&s->gb, ptr, bit_length);
7198 h->inter_gb_ptr= NULL;
7199 s->data_partitioning = 1;
7201 if(decode_slice_header(h) < 0) return -1;
7204 init_get_bits(&h->intra_gb, ptr, bit_length);
7205 h->intra_gb_ptr= &h->intra_gb;
7208 init_get_bits(&h->inter_gb, ptr, bit_length);
7209 h->inter_gb_ptr= &h->inter_gb;
7211 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning && s->hurry_up < 5 )
7217 init_get_bits(&s->gb, ptr, bit_length);
7218 decode_seq_parameter_set(h);
7220 if(s->flags& CODEC_FLAG_LOW_DELAY)
7223 if(avctx->has_b_frames < 2)
7224 avctx->has_b_frames= !s->low_delay;
7227 init_get_bits(&s->gb, ptr, bit_length);
7229 decode_picture_parameter_set(h, bit_length);
7232 case NAL_PICTURE_DELIMITER:
7234 case NAL_FILTER_DATA:
7237 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
7241 if(!s->current_picture_ptr) return buf_index; //no frame
7243 s->current_picture_ptr->pict_type= s->pict_type;
7244 s->current_picture_ptr->key_frame= s->pict_type == I_TYPE && h->nal_unit_type == NAL_IDR_SLICE;
7246 h->prev_frame_num_offset= h->frame_num_offset;
7247 h->prev_frame_num= h->frame_num;
7248 if(s->current_picture_ptr->reference){
7249 h->prev_poc_msb= h->poc_msb;
7250 h->prev_poc_lsb= h->poc_lsb;
7252 if(s->current_picture_ptr->reference)
7253 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7263 * returns the number of bytes consumed for building the current frame
7265 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7266 if(s->flags&CODEC_FLAG_TRUNCATED){
7267 pos -= s->parse_context.last_index;
7268 if(pos<0) pos=0; // FIXME remove (unneeded?)
7272 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
7273 if(pos+10>buf_size) pos=buf_size; // oops ;)
7279 static int decode_frame(AVCodecContext *avctx,
7280 void *data, int *data_size,
7281 uint8_t *buf, int buf_size)
7283 H264Context *h = avctx->priv_data;
7284 MpegEncContext *s = &h->s;
7285 AVFrame *pict = data;
7288 s->flags= avctx->flags;
7289 s->flags2= avctx->flags2;
7291 /* no supplementary picture */
7292 if (buf_size == 0) {
7296 if(s->flags&CODEC_FLAG_TRUNCATED){
7297 int next= find_frame_end(h, buf, buf_size);
7299 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
7301 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7304 if(h->is_avc && !h->got_avcC) {
7305 int i, cnt, nalsize;
7306 unsigned char *p = avctx->extradata;
7307 if(avctx->extradata_size < 7) {
7308 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7312 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7315 /* sps and pps in the avcC always have length coded with 2 bytes,
7316 so put a fake nal_length_size = 2 while parsing them */
7317 h->nal_length_size = 2;
7318 // Decode sps from avcC
7319 cnt = *(p+5) & 0x1f; // Number of sps
7321 for (i = 0; i < cnt; i++) {
7322 nalsize = BE_16(p) + 2;
7323 if(decode_nal_units(h, p, nalsize) != nalsize) {
7324 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7329 // Decode pps from avcC
7330 cnt = *(p++); // Number of pps
7331 for (i = 0; i < cnt; i++) {
7332 nalsize = BE_16(p) + 2;
7333 if(decode_nal_units(h, p, nalsize) != nalsize) {
7334 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7339 // Now store right nal length size, that will be use to parse all other nals
7340 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7341 // Do not reparse avcC
7345 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
7346 if(0 < decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) )
7350 buf_index=decode_nal_units(h, buf, buf_size);
7354 //FIXME do something with unavailable reference frames
7356 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
7357 if(!s->current_picture_ptr){
7358 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
7363 Picture *out = s->current_picture_ptr;
7364 #if 0 //decode order
7365 *data_size = sizeof(AVFrame);
7367 /* Sort B-frames into display order */
7368 Picture *cur = s->current_picture_ptr;
7369 Picture *prev = h->delayed_output_pic;
7374 int dropped_frame = 0;
7377 if(h->sps.bitstream_restriction_flag
7378 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7379 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7383 while(h->delayed_pic[pics]) pics++;
7384 h->delayed_pic[pics++] = cur;
7385 if(cur->reference == 0)
7388 for(i=0; h->delayed_pic[i]; i++)
7389 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7392 out = h->delayed_pic[0];
7393 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7394 if(h->delayed_pic[i]->poc < out->poc){
7395 out = h->delayed_pic[i];
7399 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7400 if(prev && pics <= s->avctx->has_b_frames)
7402 else if((out_of_order && pics-1 == s->avctx->has_b_frames)
7404 ((!cross_idr && prev && out->poc > prev->poc + 2)
7405 || cur->pict_type == B_TYPE)))
7408 s->avctx->has_b_frames++;
7411 else if(out_of_order)
7414 if(out_of_order || pics > s->avctx->has_b_frames){
7415 dropped_frame = (out != h->delayed_pic[out_idx]);
7416 for(i=out_idx; h->delayed_pic[i]; i++)
7417 h->delayed_pic[i] = h->delayed_pic[i+1];
7420 if(prev == out && !dropped_frame)
7423 *data_size = sizeof(AVFrame);
7424 if(prev && prev != out && prev->reference == 1)
7425 prev->reference = 0;
7426 h->delayed_output_pic = out;
7429 *pict= *(AVFrame*)out;
7432 assert(pict->data[0]);
7433 ff_print_debug_info(s, pict);
7434 //printf("out %d\n", (int)pict->data[0]);
7437 /* Return the Picture timestamp as the frame number */
7438 /* we substract 1 because it is added on utils.c */
7439 avctx->frame_number = s->picture_number - 1;
7441 return get_consumed_bytes(s, buf_index, buf_size);
7444 static inline void fill_mb_avail(H264Context *h){
7445 MpegEncContext * const s = &h->s;
7446 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7449 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7450 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7451 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7457 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7458 h->mb_avail[4]= 1; //FIXME move out
7459 h->mb_avail[5]= 0; //FIXME move out
7465 #define SIZE (COUNT*40)
7471 // int int_temp[10000];
7473 AVCodecContext avctx;
7475 dsputil_init(&dsp, &avctx);
7477 init_put_bits(&pb, temp, SIZE);
7478 printf("testing unsigned exp golomb\n");
7479 for(i=0; i<COUNT; i++){
7481 set_ue_golomb(&pb, i);
7482 STOP_TIMER("set_ue_golomb");
7484 flush_put_bits(&pb);
7486 init_get_bits(&gb, temp, 8*SIZE);
7487 for(i=0; i<COUNT; i++){
7490 s= show_bits(&gb, 24);
7493 j= get_ue_golomb(&gb);
7495 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7498 STOP_TIMER("get_ue_golomb");
7502 init_put_bits(&pb, temp, SIZE);
7503 printf("testing signed exp golomb\n");
7504 for(i=0; i<COUNT; i++){
7506 set_se_golomb(&pb, i - COUNT/2);
7507 STOP_TIMER("set_se_golomb");
7509 flush_put_bits(&pb);
7511 init_get_bits(&gb, temp, 8*SIZE);
7512 for(i=0; i<COUNT; i++){
7515 s= show_bits(&gb, 24);
7518 j= get_se_golomb(&gb);
7519 if(j != i - COUNT/2){
7520 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7523 STOP_TIMER("get_se_golomb");
7526 printf("testing 4x4 (I)DCT\n");
7529 uint8_t src[16], ref[16];
7530 uint64_t error= 0, max_error=0;
7532 for(i=0; i<COUNT; i++){
7534 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7535 for(j=0; j<16; j++){
7536 ref[j]= random()%255;
7537 src[j]= random()%255;
7540 h264_diff_dct_c(block, src, ref, 4);
7543 for(j=0; j<16; j++){
7544 // printf("%d ", block[j]);
7545 block[j]= block[j]*4;
7546 if(j&1) block[j]= (block[j]*4 + 2)/5;
7547 if(j&4) block[j]= (block[j]*4 + 2)/5;
7551 s->dsp.h264_idct_add(ref, block, 4);
7552 /* for(j=0; j<16; j++){
7553 printf("%d ", ref[j]);
7557 for(j=0; j<16; j++){
7558 int diff= ABS(src[j] - ref[j]);
7561 max_error= FFMAX(max_error, diff);
7564 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7566 printf("testing quantizer\n");
7567 for(qp=0; qp<52; qp++){
7569 src1_block[i]= src2_block[i]= random()%255;
7573 printf("Testing NAL layer\n");
7575 uint8_t bitstream[COUNT];
7576 uint8_t nal[COUNT*2];
7578 memset(&h, 0, sizeof(H264Context));
7580 for(i=0; i<COUNT; i++){
7588 for(j=0; j<COUNT; j++){
7589 bitstream[j]= (random() % 255) + 1;
7592 for(j=0; j<zeros; j++){
7593 int pos= random() % COUNT;
7594 while(bitstream[pos] == 0){
7603 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7605 printf("encoding failed\n");
7609 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7613 if(out_length != COUNT){
7614 printf("incorrect length %d %d\n", out_length, COUNT);
7618 if(consumed != nal_length){
7619 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7623 if(memcmp(bitstream, out, COUNT)){
7624 printf("missmatch\n");
7629 printf("Testing RBSP\n");
7637 static int decode_end(AVCodecContext *avctx)
7639 H264Context *h = avctx->priv_data;
7640 MpegEncContext *s = &h->s;
7642 free_tables(h); //FIXME cleanup init stuff perhaps
7645 // memset(h, 0, sizeof(H264Context));
7651 AVCodec h264_decoder = {
7655 sizeof(H264Context),
7660 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
7664 AVCodecParser h264_parser = {
7666 sizeof(H264Context),