X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fh264.h;h=caea7ba7eb0200820c742e672f6121cb926ab6f9;hb=f666276fa61623f4d6fa97b99d6b336ec0eba8c3;hp=ba813b66ba5077bca7b5c73002330a4b2a71df2e;hpb=32e543f866d9d4b450729e93cd81dacd8c457971;p=ffmpeg diff --git a/libavcodec/h264.h b/libavcodec/h264.h index ba813b66ba5..caea7ba7eb0 100644 --- a/libavcodec/h264.h +++ b/libavcodec/h264.h @@ -2,25 +2,25 @@ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder * Copyright (c) 2003 Michael Niedermayer * - * This file is part of FFmpeg. + * This file is part of Libav. * - * FFmpeg is free software; you can redistribute it and/or + * Libav is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * FFmpeg is distributed in the hope that it will be useful, + * Libav is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software + * License along with Libav; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /** - * @file libavcodec/h264.h + * @file * H.264 / AVC / MPEG4 part10 codec. * @author Michael Niedermayer */ @@ -39,16 +39,6 @@ #define interlaced_dct interlaced_dct_is_a_bad_name #define mb_intra mb_intra_is_not_initialized_see_mb_type -#define LUMA_DC_BLOCK_INDEX 25 -#define CHROMA_DC_BLOCK_INDEX 26 - -#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8 -#define COEFF_TOKEN_VLC_BITS 8 -#define TOTAL_ZEROS_VLC_BITS 9 -#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3 -#define RUN_VLC_BITS 3 -#define RUN7_VLC_BITS 6 - #define MAX_SPS_COUNT 32 #define MAX_PPS_COUNT 256 @@ -60,8 +50,6 @@ * of progressive decoding by about 2%. */ #define ALLOW_INTERLACE -#define ALLOW_NOCHROMA - #define FMO 0 /** @@ -75,6 +63,10 @@ #define MB_FIELD h->mb_field_decoding_flag #define FRAME_MBAFF h->mb_aff_frame #define FIELD_PICTURE (s->picture_structure != PICT_FRAME) +#define LEFT_MBS 2 +#define LTOP 0 +#define LBOT 1 +#define LEFT(i) (i) #else #define MB_MBAFF 0 #define MB_FIELD 0 @@ -82,19 +74,20 @@ #define FIELD_PICTURE 0 #undef IS_INTERLACED #define IS_INTERLACED(mb_type) 0 +#define LEFT_MBS 1 +#define LTOP 0 +#define LBOT 0 +#define LEFT(i) 0 #endif #define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE) -#ifdef ALLOW_NOCHROMA -#define CHROMA h->sps.chroma_format_idc -#else -#define CHROMA 1 -#endif - #ifndef CABAC #define CABAC h->pps.cabac #endif +#define CHROMA422 (h->sps.chroma_format_idc == 2) +#define CHROMA444 (h->sps.chroma_format_idc == 3) + #define EXTENDED_SAR 255 #define MB_TYPE_REF0 MB_TYPE_ACPRED //dirty but it fits in 16 bit @@ -108,6 +101,7 @@ */ #define DELAYED_PIC_REF 4 +#define QP_MAX_NUM (51 + 2*6) // The maximum supported qp /* NAL unit types */ enum { @@ -197,7 +191,7 @@ typedef struct SPS{ int num_reorder_frames; int scaling_matrix_present; uint8_t scaling_matrix4[6][16]; - uint8_t scaling_matrix8[2][64]; + uint8_t scaling_matrix8[6][64]; int nal_hrd_parameters_present_flag; int vcl_hrd_parameters_present_flag; int pic_struct_present_flag; @@ -209,6 +203,7 @@ typedef struct SPS{ int bit_depth_luma; ///< bit_depth_luma_minus8 + 8 int bit_depth_chroma; ///< bit_depth_chroma_minus8 + 8 int residual_color_transform_flag; ///< residual_colour_transform_flag + int constraint_set_flags; ///< constraint_set[0-3]_flag }SPS; /** @@ -231,7 +226,7 @@ typedef struct PPS{ int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag int transform_8x8_mode; ///< transform_8x8_mode_flag uint8_t scaling_matrix4[6][16]; - uint8_t scaling_matrix8[2][64]; + uint8_t scaling_matrix8[6][64]; uint8_t chroma_qp_table[2][64]; ///< pre-scaled (with chroma_qp_index_offset) version of qp_table int chroma_qp_diff; }PPS; @@ -264,6 +259,7 @@ typedef struct MMCO{ typedef struct H264Context{ MpegEncContext s; H264DSPContext h264dsp; + int pixel_shift; ///< 0 for 8-bit H264, 1 for high-bit-depth H264 int chroma_qp[2]; //QPc int qp_thresh; ///< QP threshold to skip loopfilter @@ -278,12 +274,12 @@ typedef struct H264Context{ int topleft_mb_xy; int top_mb_xy; int topright_mb_xy; - int left_mb_xy[2]; + int left_mb_xy[LEFT_MBS]; int topleft_type; int top_type; int topright_type; - int left_type[2]; + int left_type[LEFT_MBS]; const uint8_t * left_block; int topleft_partition; @@ -295,21 +291,15 @@ typedef struct H264Context{ unsigned int top_samples_available; unsigned int topright_samples_available; unsigned int left_samples_available; - uint8_t (*top_borders[2])[16+2*8]; + uint8_t (*top_borders[2])[(16*3)*2]; /** * non zero coeff count cache. * is 64 if not available. */ - DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[6*8]; + DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15*8]; - /* - .UU.YYYY - .UU.YYYY - .vv.YYYY - .VV.YYYY - */ - uint8_t (*non_zero_count)[32]; + uint8_t (*non_zero_count)[48]; /** * Motion vector cache. @@ -319,11 +309,6 @@ typedef struct H264Context{ #define LIST_NOT_USED -1 //FIXME rename? #define PART_NOT_AVAILABLE -2 - /** - * is 1 if the specific list MV&references are set to 0,0,-2. - */ - int mv_cache_clean[2]; - /** * number of neighbors (top and/or left) that used 8x8 dct */ @@ -333,7 +318,7 @@ typedef struct H264Context{ * block_offset[ 0..23] for frame macroblocks * block_offset[24..47] for field macroblocks */ - int block_offset[2*(16+8)]; + int block_offset[2*(16*3)]; uint32_t *mb2b_xy; //FIXME are these 4 a good idea? uint32_t *mb2br_xy; @@ -352,10 +337,10 @@ typedef struct H264Context{ */ PPS pps; //FIXME move to Picture perhaps? (->no) do we need that? - uint32_t dequant4_buffer[6][52][16]; //FIXME should these be moved down? - uint32_t dequant8_buffer[2][52][64]; + uint32_t dequant4_buffer[6][QP_MAX_NUM+1][16]; //FIXME should these be moved down? + uint32_t dequant8_buffer[6][QP_MAX_NUM+1][64]; uint32_t (*dequant4_coeff[6])[16]; - uint32_t (*dequant8_coeff[2])[64]; + uint32_t (*dequant8_coeff[6])[64]; int slice_num; uint16_t *slice_table; ///< slice_table_base + 2*mb_stride + 1 @@ -378,7 +363,7 @@ typedef struct H264Context{ //The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss int luma_weight[48][2][2]; int chroma_weight[48][2][2][2]; - int implicit_weight[48][48]; + int implicit_weight[48][48][2]; int direct_spatial_mv_pred; int col_parity; @@ -405,14 +390,15 @@ typedef struct H264Context{ GetBitContext *intra_gb_ptr; GetBitContext *inter_gb_ptr; - DECLARE_ALIGNED(16, DCTELEM, mb)[16*24]; - DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb + DECLARE_ALIGNED(16, DCTELEM, mb)[16*48*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space. + DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16*2]; + DCTELEM mb_padding[256*2]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb /** * Cabac */ CABACContext cabac; - uint8_t cabac_state[460]; + uint8_t cabac_state[1024]; /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */ uint16_t *cbp_table; @@ -464,6 +450,7 @@ typedef struct H264Context{ */ int is_avc; ///< this flag is != 0 if codec is avc1 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4) + int got_first; ///< this flag is != 0 if we've parsed a frame SPS *sps_buffers[MAX_SPS_COUNT]; PPS *pps_buffers[MAX_PPS_COUNT]; @@ -501,7 +488,10 @@ typedef struct H264Context{ Picture *long_ref[32]; Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size? + int last_pocs[MAX_DELAYED_PIC_COUNT]; + Picture *next_output_pic; int outputed_poc; + int next_outputed_poc; /** * memory management control operations buffer. @@ -515,7 +505,7 @@ typedef struct H264Context{ int cabac_init_idc; /** - * @defgroup multithreading Members for slice based multithreading + * @name Members for slice based multithreading * @{ */ struct H264Context *thread_context[MAX_THREADS]; @@ -588,20 +578,11 @@ typedef struct H264Context{ int sei_buffering_period_present; ///< Buffering period SEI flag int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs - //SVQ3 specific fields - int halfpel_flag; - int thirdpel_flag; - int unknown_svq3_flag; - int next_slice_index; - uint32_t svq3_watermark_key; + int cur_chroma_format_idc; }H264Context; -extern const uint8_t ff_h264_chroma_qp[52]; - -void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); - -void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); +extern const uint8_t ff_h264_chroma_qp[3][QP_MAX_NUM+1]; ///< One chroma qp table for each supported bit depth (8, 9, 10). /** * Decode SEI @@ -613,13 +594,18 @@ int ff_h264_decode_sei(H264Context *h); */ int ff_h264_decode_seq_parameter_set(H264Context *h); +/** + * compute profile from sps + */ +int ff_h264_get_profile(SPS *sps); + /** * Decode PPS */ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length); /** - * Decodes a network abstraction layer unit. + * Decode a network abstraction layer unit. * @param consumed is the number of bytes used as input * @param length is the length of the array * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing? @@ -628,29 +614,23 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length); const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length); /** - * identifies the exact end of the bitstream - * @return the length of the trailing, or 0 if damaged - */ -int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src); - -/** - * frees any data that may have been allocated in the H264 context like SPS, PPS etc. + * Free any data that may have been allocated in the H264 context like SPS, PPS etc. */ av_cold void ff_h264_free_context(H264Context *h); /** - * reconstructs bitstream slice_type. + * Reconstruct bitstream slice_type. */ int ff_h264_get_slice_type(const H264Context *h); /** - * allocates tables. + * Allocate tables. * needs width/height */ int ff_h264_alloc_tables(H264Context *h); /** - * fills the default_ref_list. + * Fill the default_ref_list. */ int ff_h264_fill_default_ref_list(H264Context *h); @@ -659,38 +639,40 @@ void ff_h264_fill_mbaff_ref_list(H264Context *h); void ff_h264_remove_all_refs(H264Context *h); /** - * Executes the reference picture marking (memory management control operations). + * Execute the reference picture marking (memory management control operations). */ int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count); int ff_h264_decode_ref_pic_marking(H264Context *h, GetBitContext *gb); +void ff_generate_sliding_window_mmcos(H264Context *h); + /** - * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. + * Check if the top & left blocks are available if needed & change the dc mode so it only uses the available blocks. */ int ff_h264_check_intra4x4_pred_mode(H264Context *h); /** - * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. + * Check if the top & left blocks are available if needed & change the dc mode so it only uses the available blocks. */ int ff_h264_check_intra_pred_mode(H264Context *h, int mode); -void ff_h264_write_back_intra_pred_mode(H264Context *h); void ff_h264_hl_decode_mb(H264Context *h); int ff_h264_frame_start(H264Context *h); +int ff_h264_decode_extradata(H264Context *h); av_cold int ff_h264_decode_init(AVCodecContext *avctx); av_cold int ff_h264_decode_end(AVCodecContext *avctx); av_cold void ff_h264_decode_init_vlc(void); /** - * decodes a macroblock + * Decode a macroblock * @return 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed */ int ff_h264_decode_mb_cavlc(H264Context *h); /** - * decodes a CABAC coded macroblock + * Decode a CABAC coded macroblock * @return 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed */ int ff_h264_decode_mb_cabac(H264Context *h); @@ -721,16 +703,45 @@ o-o o-o / / / o-o o-o */ + +/* Scan8 organization: + * 0 1 2 3 4 5 6 7 + * 0 DY y y y y y + * 1 y Y Y Y Y + * 2 y Y Y Y Y + * 3 y Y Y Y Y + * 4 y Y Y Y Y + * 5 DU u u u u u + * 6 u U U U U + * 7 u U U U U + * 8 u U U U U + * 9 u U U U U + * 10 DV v v v v v + * 11 v V V V V + * 12 v V V V V + * 13 v V V V V + * 14 v V V V V + * DY/DU/DV are for luma/chroma DC. + */ + +#define LUMA_DC_BLOCK_INDEX 48 +#define CHROMA_DC_BLOCK_INDEX 49 + //This table must be here because scan8[constant] must be known at compiletime -static const uint8_t scan8[16 + 2*4]={ - 4+1*8, 5+1*8, 4+2*8, 5+2*8, - 6+1*8, 7+1*8, 6+2*8, 7+2*8, - 4+3*8, 5+3*8, 4+4*8, 5+4*8, - 6+3*8, 7+3*8, 6+4*8, 7+4*8, - 1+1*8, 2+1*8, - 1+2*8, 2+2*8, - 1+4*8, 2+4*8, - 1+5*8, 2+5*8, +static const uint8_t scan8[16*3 + 3]={ + 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8, + 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8, + 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8, + 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8, + 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8, + 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8, + 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8, + 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8, + 4+11*8, 5+11*8, 4+12*8, 5+12*8, + 6+11*8, 7+11*8, 6+12*8, 7+12*8, + 4+13*8, 5+13*8, 4+14*8, 5+14*8, + 6+13*8, 7+13*8, 6+14*8, 7+14*8, + 0+ 0*8, 0+ 5*8, 0+10*8 }; static av_always_inline uint32_t pack16to32(int a, int b){ @@ -752,637 +763,14 @@ static av_always_inline uint16_t pack8to16(int a, int b){ /** * gets the chroma qp. */ -static inline int get_chroma_qp(H264Context *h, int t, int qscale){ +static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale){ return h->pps.chroma_qp_table[t][qscale]; } -static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my); - -static void fill_decode_neighbors(H264Context *h, int mb_type){ - MpegEncContext * const s = &h->s; - const int mb_xy= h->mb_xy; - int topleft_xy, top_xy, topright_xy, left_xy[2]; - static const uint8_t left_block_options[4][16]={ - {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*8}, - {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8}, - {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}, - {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8} - }; - - h->topleft_partition= -1; - - top_xy = mb_xy - (s->mb_stride << MB_FIELD); - - /* Wow, what a mess, why didn't they simplify the interlacing & intra - * stuff, I can't imagine that these complex rules are worth it. */ - - topleft_xy = top_xy - 1; - topright_xy= top_xy + 1; - left_xy[1] = left_xy[0] = mb_xy-1; - h->left_block = left_block_options[0]; - if(FRAME_MBAFF){ - const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]); - const int curr_mb_field_flag = IS_INTERLACED(mb_type); - if(s->mb_y&1){ - if (left_mb_field_flag != curr_mb_field_flag) { - left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1; - if (curr_mb_field_flag) { - left_xy[1] += s->mb_stride; - h->left_block = left_block_options[3]; - } else { - topleft_xy += s->mb_stride; - // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition - h->topleft_partition = 0; - h->left_block = left_block_options[1]; - } - } - }else{ - if(curr_mb_field_flag){ - topleft_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy - 1]>>7)&1)-1); - topright_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy + 1]>>7)&1)-1); - top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1); - } - if (left_mb_field_flag != curr_mb_field_flag) { - if (curr_mb_field_flag) { - left_xy[1] += s->mb_stride; - h->left_block = left_block_options[3]; - } else { - h->left_block = left_block_options[2]; - } - } - } - } - - h->topleft_mb_xy = topleft_xy; - h->top_mb_xy = top_xy; - h->topright_mb_xy= topright_xy; - h->left_mb_xy[0] = left_xy[0]; - h->left_mb_xy[1] = left_xy[1]; - //FIXME do we need all in the context? - - h->topleft_type = s->current_picture.mb_type[topleft_xy] ; - h->top_type = s->current_picture.mb_type[top_xy] ; - h->topright_type= s->current_picture.mb_type[topright_xy]; - h->left_type[0] = s->current_picture.mb_type[left_xy[0]] ; - h->left_type[1] = s->current_picture.mb_type[left_xy[1]] ; - - if(FMO){ - if(h->slice_table[topleft_xy ] != h->slice_num) h->topleft_type = 0; - if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0; - if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_type[1] = 0; - }else{ - if(h->slice_table[topleft_xy ] != h->slice_num){ - h->topleft_type = 0; - if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0; - if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_type[1] = 0; - } - } - if(h->slice_table[topright_xy] != h->slice_num) h->topright_type= 0; -} - -static void fill_decode_caches(H264Context *h, int mb_type){ - MpegEncContext * const s = &h->s; - int topleft_xy, top_xy, topright_xy, left_xy[2]; - int topleft_type, top_type, topright_type, left_type[2]; - const uint8_t * left_block= h->left_block; - int i; - - topleft_xy = h->topleft_mb_xy ; - top_xy = h->top_mb_xy ; - topright_xy = h->topright_mb_xy; - left_xy[0] = h->left_mb_xy[0] ; - left_xy[1] = h->left_mb_xy[1] ; - topleft_type = h->topleft_type ; - top_type = h->top_type ; - topright_type= h->topright_type ; - left_type[0] = h->left_type[0] ; - left_type[1] = h->left_type[1] ; - - if(!IS_SKIP(mb_type)){ - if(IS_INTRA(mb_type)){ - int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1; - h->topleft_samples_available= - h->top_samples_available= - h->left_samples_available= 0xFFFF; - h->topright_samples_available= 0xEEEA; - - if(!(top_type & type_mask)){ - h->topleft_samples_available= 0xB3FF; - h->top_samples_available= 0x33FF; - h->topright_samples_available= 0x26EA; - } - if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){ - if(IS_INTERLACED(mb_type)){ - if(!(left_type[0] & type_mask)){ - h->topleft_samples_available&= 0xDFFF; - h->left_samples_available&= 0x5FFF; - } - if(!(left_type[1] & type_mask)){ - h->topleft_samples_available&= 0xFF5F; - h->left_samples_available&= 0xFF5F; - } - }else{ - int left_typei = s->current_picture.mb_type[left_xy[0] + s->mb_stride]; - - assert(left_xy[0] == left_xy[1]); - if(!((left_typei & type_mask) && (left_type[0] & type_mask))){ - h->topleft_samples_available&= 0xDF5F; - h->left_samples_available&= 0x5F5F; - } - } - }else{ - if(!(left_type[0] & type_mask)){ - h->topleft_samples_available&= 0xDF5F; - h->left_samples_available&= 0x5F5F; - } - } - - if(!(topleft_type & type_mask)) - h->topleft_samples_available&= 0x7FFF; - - if(!(topright_type & type_mask)) - h->topright_samples_available&= 0xFBFF; - - if(IS_INTRA4x4(mb_type)){ - if(IS_INTRA4x4(top_type)){ - AV_COPY32(h->intra4x4_pred_mode_cache+4+8*0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]); - }else{ - h->intra4x4_pred_mode_cache[4+8*0]= - h->intra4x4_pred_mode_cache[5+8*0]= - h->intra4x4_pred_mode_cache[6+8*0]= - h->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_mask); - } - for(i=0; i<2; i++){ - if(IS_INTRA4x4(left_type[i])){ - int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[i]]; - h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[6-left_block[0+2*i]]; - h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[6-left_block[1+2*i]]; - }else{ - h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= - h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left_type[i] & type_mask); - } - } - } - } - - -/* -0 . T T. T T T T -1 L . .L . . . . -2 L . .L . . . . -3 . T TL . . . . -4 L . .L . . . . -5 L . .. . . . . -*/ -//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) - if(top_type){ - AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]); - h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8]; - h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8]; - - h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8]; - h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8]; - }else { - h->non_zero_count_cache[1+8*0]= - h->non_zero_count_cache[2+8*0]= - - h->non_zero_count_cache[1+8*3]= - h->non_zero_count_cache[2+8*3]= - AV_WN32A(&h->non_zero_count_cache[4+8*0], CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040); - } - - for (i=0; i<2; i++) { - if(left_type[i]){ - h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]]; - h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]]; - h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]]; - h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]]; - }else{ - h->non_zero_count_cache[3+8*1 + 2*8*i]= - h->non_zero_count_cache[3+8*2 + 2*8*i]= - h->non_zero_count_cache[0+8*1 + 8*i]= - h->non_zero_count_cache[0+8*4 + 8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64; - } - } - - if( CABAC ) { - // top_cbp - if(top_type) { - h->top_cbp = h->cbp_table[top_xy]; - } else { - h->top_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F; - } - // left_cbp - if (left_type[0]) { - h->left_cbp = (h->cbp_table[left_xy[0]] & 0x1f0) - | ((h->cbp_table[left_xy[0]]>>(left_block[0]&(~1)))&2) - | (((h->cbp_table[left_xy[1]]>>(left_block[2]&(~1)))&2) << 2); - } else { - h->left_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F; - } - } - } - -#if 1 - if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){ - int list; - for(list=0; listlist_count; list++){ - if(!USES_LIST(mb_type, list)){ - /*if(!h->mv_cache_clean[list]){ - memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all? - memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t)); - h->mv_cache_clean[list]= 1; - }*/ - continue; - } - assert(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)); - - h->mv_cache_clean[list]= 0; - - if(USES_LIST(top_type, list)){ - const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; - AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]); - h->ref_cache[list][scan8[0] + 0 - 1*8]= - h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][4*top_xy + 2]; - h->ref_cache[list][scan8[0] + 2 - 1*8]= - h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][4*top_xy + 3]; - }else{ - AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); - AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101); - } - - if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){ - for(i=0; i<2; i++){ - int cache_idx = scan8[0] - 1 + i*2*8; - if(USES_LIST(left_type[i], list)){ - const int b_xy= h->mb2b_xy[left_xy[i]] + 3; - const int b8_xy= 4*left_xy[i] + 1; - AV_COPY32(h->mv_cache[list][cache_idx ], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]); - AV_COPY32(h->mv_cache[list][cache_idx+8], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]); - h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + (left_block[0+i*2]&~1)]; - h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + (left_block[1+i*2]&~1)]; - }else{ - AV_ZERO32(h->mv_cache [list][cache_idx ]); - AV_ZERO32(h->mv_cache [list][cache_idx+8]); - h->ref_cache[list][cache_idx ]= - h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE; - } - } - }else{ - if(USES_LIST(left_type[0], list)){ - const int b_xy= h->mb2b_xy[left_xy[0]] + 3; - const int b8_xy= 4*left_xy[0] + 1; - AV_COPY32(h->mv_cache[list][scan8[0] - 1], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]]); - h->ref_cache[list][scan8[0] - 1]= s->current_picture.ref_index[list][b8_xy + (left_block[0]&~1)]; - }else{ - AV_ZERO32(h->mv_cache [list][scan8[0] - 1]); - h->ref_cache[list][scan8[0] - 1]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE; - } - } - - if(USES_LIST(topright_type, list)){ - const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; - AV_COPY32(h->mv_cache[list][scan8[0] + 4 - 1*8], s->current_picture.motion_val[list][b_xy]); - h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][4*topright_xy + 2]; - }else{ - AV_ZERO32(h->mv_cache [list][scan8[0] + 4 - 1*8]); - h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; - } - if(h->ref_cache[list][scan8[0] + 4 - 1*8] < 0){ - if(USES_LIST(topleft_type, list)){ - const int b_xy = h->mb2b_xy [topleft_xy] + 3 + h->b_stride + (h->topleft_partition & 2*h->b_stride); - const int b8_xy= 4*topleft_xy + 1 + (h->topleft_partition & 2); - AV_COPY32(h->mv_cache[list][scan8[0] - 1 - 1*8], s->current_picture.motion_val[list][b_xy]); - h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; - }else{ - AV_ZERO32(h->mv_cache[list][scan8[0] - 1 - 1*8]); - h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; - } - } - - if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF) - continue; - - if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))) { - h->ref_cache[list][scan8[4 ]] = - h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE; - AV_ZERO32(h->mv_cache [list][scan8[4 ]]); - AV_ZERO32(h->mv_cache [list][scan8[12]]); - - if( CABAC ) { - /* XXX beurk, Load mvd */ - if(USES_LIST(top_type, list)){ - const int b_xy= h->mb2br_xy[top_xy]; - AV_COPY64(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]); - }else{ - AV_ZERO64(h->mvd_cache[list][scan8[0] + 0 - 1*8]); - } - if(USES_LIST(left_type[0], list)){ - const int b_xy= h->mb2br_xy[left_xy[0]] + 6; - AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy - left_block[0]]); - AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy - left_block[1]]); - }else{ - AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 0*8]); - AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 1*8]); - } - if(USES_LIST(left_type[1], list)){ - const int b_xy= h->mb2br_xy[left_xy[1]] + 6; - AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy - left_block[2]]); - AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy - left_block[3]]); - }else{ - AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 2*8]); - AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 3*8]); - } - AV_ZERO16(h->mvd_cache [list][scan8[4 ]]); - AV_ZERO16(h->mvd_cache [list][scan8[12]]); - if(h->slice_type_nos == FF_B_TYPE){ - fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1); - - if(IS_DIRECT(top_type)){ - AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101u*(MB_TYPE_DIRECT2>>1)); - }else if(IS_8X8(top_type)){ - int b8_xy = 4*top_xy; - h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy + 2]; - h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 3]; - }else{ - AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_16x16>>1)); - } - - if(IS_DIRECT(left_type[0])) - h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_DIRECT2>>1; - else if(IS_8X8(left_type[0])) - h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[4*left_xy[0] + 1 + (left_block[0]&~1)]; - else - h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_16x16>>1; - - if(IS_DIRECT(left_type[1])) - h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_DIRECT2>>1; - else if(IS_8X8(left_type[1])) - h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[4*left_xy[1] + 1 + (left_block[2]&~1)]; - else - h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_16x16>>1; - } - } - } - if(FRAME_MBAFF){ -#define MAP_MVS\ - MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\ - MAP_F2F(scan8[0] + 0 - 1*8, top_type)\ - MAP_F2F(scan8[0] + 1 - 1*8, top_type)\ - MAP_F2F(scan8[0] + 2 - 1*8, top_type)\ - MAP_F2F(scan8[0] + 3 - 1*8, top_type)\ - MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\ - MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\ - MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\ - MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\ - MAP_F2F(scan8[0] - 1 + 3*8, left_type[1]) - if(MB_FIELD){ -#define MAP_F2F(idx, mb_type)\ - if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ - h->ref_cache[list][idx] <<= 1;\ - h->mv_cache[list][idx][1] /= 2;\ - h->mvd_cache[list][idx][1] >>=1;\ - } - MAP_MVS -#undef MAP_F2F - }else{ -#define MAP_F2F(idx, mb_type)\ - if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ - h->ref_cache[list][idx] >>= 1;\ - h->mv_cache[list][idx][1] <<= 1;\ - h->mvd_cache[list][idx][1] <<= 1;\ - } - MAP_MVS -#undef MAP_F2F - } - } - } - } -#endif - - h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]); -} - -/** - * - * @return non zero if the loop filter can be skiped - */ -static int fill_filter_caches(H264Context *h, int mb_type){ - MpegEncContext * const s = &h->s; - const int mb_xy= h->mb_xy; - int top_xy, left_xy[2]; - int top_type, left_type[2]; - - top_xy = mb_xy - (s->mb_stride << MB_FIELD); - - //FIXME deblocking could skip the intra and nnz parts. - - /* Wow, what a mess, why didn't they simplify the interlacing & intra - * stuff, I can't imagine that these complex rules are worth it. */ - - left_xy[1] = left_xy[0] = mb_xy-1; - if(FRAME_MBAFF){ - const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]); - const int curr_mb_field_flag = IS_INTERLACED(mb_type); - if(s->mb_y&1){ - if (left_mb_field_flag != curr_mb_field_flag) { - left_xy[0] -= s->mb_stride; - } - }else{ - if(curr_mb_field_flag){ - top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1); - } - if (left_mb_field_flag != curr_mb_field_flag) { - left_xy[1] += s->mb_stride; - } - } - } - - h->top_mb_xy = top_xy; - h->left_mb_xy[0] = left_xy[0]; - h->left_mb_xy[1] = left_xy[1]; - { - //for sufficiently low qp, filtering wouldn't do anything - //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp - int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice - int qp = s->current_picture.qscale_table[mb_xy]; - if(qp <= qp_thresh - && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh) - && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){ - if(!FRAME_MBAFF) - return 1; - if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh) - && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh)) - return 1; - } - } - - top_type = s->current_picture.mb_type[top_xy] ; - left_type[0] = s->current_picture.mb_type[left_xy[0]]; - left_type[1] = s->current_picture.mb_type[left_xy[1]]; - if(h->deblocking_filter == 2){ - if(h->slice_table[top_xy ] != h->slice_num) top_type= 0; - if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0; - }else{ - if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0; - if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0; - } - h->top_type = top_type ; - h->left_type[0]= left_type[0]; - h->left_type[1]= left_type[1]; - - if(IS_INTRA(mb_type)) - return 0; - - AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]); - AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]); - AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]); - AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]); - AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]); - - h->cbp= h->cbp_table[mb_xy]; - - { - int list; - for(list=0; listlist_count; list++){ - int8_t *ref; - int y, b_stride; - int16_t (*mv_dst)[2]; - int16_t (*mv_src)[2]; - - if(!USES_LIST(mb_type, list)){ - fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4); - AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u); - AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u); - AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u); - AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u); - continue; - } - - ref = &s->current_picture.ref_index[list][4*mb_xy]; - { - int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); - AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); - AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); - ref += 2; - AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); - AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); - } - - b_stride = h->b_stride; - mv_dst = &h->mv_cache[list][scan8[0]]; - mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride]; - for(y=0; y<4; y++){ - AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride); - } - - } - } - - -/* -0 . T T. T T T T -1 L . .L . . . . -2 L . .L . . . . -3 . T TL . . . . -4 L . .L . . . . -5 L . .. . . . . -*/ -//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) - if(top_type){ - AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]); - } - - if(left_type[0]){ - h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8]; - h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8]; - h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8]; - h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8]; - } - - // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs - if(!CABAC && h->pps.transform_8x8_mode){ - if(IS_8x8DCT(top_type)){ - h->non_zero_count_cache[4+8*0]= - h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4; - h->non_zero_count_cache[6+8*0]= - h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8; - } - if(IS_8x8DCT(left_type[0])){ - h->non_zero_count_cache[3+8*1]= - h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF - } - if(IS_8x8DCT(left_type[1])){ - h->non_zero_count_cache[3+8*3]= - h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF - } - - if(IS_8x8DCT(mb_type)){ - h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]= - h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1; - - h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]= - h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2; - - h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]= - h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4; - - h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]= - h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8; - } - } - - if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ - int list; - for(list=0; listlist_count; list++){ - if(USES_LIST(top_type, list)){ - const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; - const int b8_xy= 4*top_xy + 2; - int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); - AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]); - h->ref_cache[list][scan8[0] + 0 - 1*8]= - h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]]; - h->ref_cache[list][scan8[0] + 2 - 1*8]= - h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]]; - }else{ - AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); - AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u); - } - - if(!IS_INTERLACED(mb_type^left_type[0])){ - if(USES_LIST(left_type[0], list)){ - const int b_xy= h->mb2b_xy[left_xy[0]] + 3; - const int b8_xy= 4*left_xy[0] + 1; - int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); - AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]); - AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]); - AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]); - AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]); - h->ref_cache[list][scan8[0] - 1 + 0 ]= - h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]]; - h->ref_cache[list][scan8[0] - 1 +16 ]= - h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]]; - }else{ - AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]); - AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]); - AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]); - AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]); - h->ref_cache[list][scan8[0] - 1 + 0 ]= - h->ref_cache[list][scan8[0] - 1 + 8 ]= - h->ref_cache[list][scan8[0] - 1 + 16 ]= - h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED; - } - } - } - } - - return 0; -} - /** * gets the predicted intra4x4 prediction mode. */ -static inline int pred_intra_mode(H264Context *h, int n){ +static av_always_inline int pred_intra_mode(H264Context *h, int n){ const int index8= scan8[n]; const int left= h->intra4x4_pred_mode_cache[index8 - 1]; const int top = h->intra4x4_pred_mode_cache[index8 - 8]; @@ -1394,62 +782,87 @@ static inline int pred_intra_mode(H264Context *h, int n){ else return min; } -static inline void write_back_non_zero_count(H264Context *h){ +static av_always_inline void write_back_intra_pred_mode(H264Context *h){ + int8_t *i4x4= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy]; + int8_t *i4x4_cache= h->intra4x4_pred_mode_cache; + + AV_COPY32(i4x4, i4x4_cache + 4 + 8*4); + i4x4[4]= i4x4_cache[7+8*3]; + i4x4[5]= i4x4_cache[7+8*2]; + i4x4[6]= i4x4_cache[7+8*1]; +} + +static av_always_inline void write_back_non_zero_count(H264Context *h){ const int mb_xy= h->mb_xy; + uint8_t *nnz = h->non_zero_count[mb_xy]; + uint8_t *nnz_cache = h->non_zero_count_cache; + + AV_COPY32(&nnz[ 0], &nnz_cache[4+8* 1]); + AV_COPY32(&nnz[ 4], &nnz_cache[4+8* 2]); + AV_COPY32(&nnz[ 8], &nnz_cache[4+8* 3]); + AV_COPY32(&nnz[12], &nnz_cache[4+8* 4]); + AV_COPY32(&nnz[16], &nnz_cache[4+8* 6]); + AV_COPY32(&nnz[20], &nnz_cache[4+8* 7]); + AV_COPY32(&nnz[32], &nnz_cache[4+8*11]); + AV_COPY32(&nnz[36], &nnz_cache[4+8*12]); + + if(!h->s.chroma_y_shift){ + AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]); + AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]); + AV_COPY32(&nnz[40], &nnz_cache[4+8*13]); + AV_COPY32(&nnz[44], &nnz_cache[4+8*14]); + } +} - AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]); - AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]); - AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[0+8*5]); - AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8*3]); - AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]); +static av_always_inline void write_back_motion_list(H264Context *h, MpegEncContext * const s, int b_stride, + int b_xy, int b8_xy, int mb_type, int list ) +{ + int16_t (*mv_dst)[2] = &s->current_picture.f.motion_val[list][b_xy]; + int16_t (*mv_src)[2] = &h->mv_cache[list][scan8[0]]; + AV_COPY128(mv_dst + 0*b_stride, mv_src + 8*0); + AV_COPY128(mv_dst + 1*b_stride, mv_src + 8*1); + AV_COPY128(mv_dst + 2*b_stride, mv_src + 8*2); + AV_COPY128(mv_dst + 3*b_stride, mv_src + 8*3); + if( CABAC ) { + uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]]; + uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; + if(IS_SKIP(mb_type)) + AV_ZERO128(mvd_dst); + else{ + AV_COPY64(mvd_dst, mvd_src + 8*3); + AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0); + AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1); + AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2); + } + } + + { + int8_t *ref_index = &s->current_picture.f.ref_index[list][b8_xy]; + int8_t *ref_cache = h->ref_cache[list]; + ref_index[0+0*2]= ref_cache[scan8[0]]; + ref_index[1+0*2]= ref_cache[scan8[4]]; + ref_index[0+1*2]= ref_cache[scan8[8]]; + ref_index[1+1*2]= ref_cache[scan8[12]]; + } } -static inline void write_back_motion(H264Context *h, int mb_type){ +static av_always_inline void write_back_motion(H264Context *h, int mb_type){ MpegEncContext * const s = &h->s; + const int b_stride = h->b_stride; const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; //try mb2b(8)_xy const int b8_xy= 4*h->mb_xy; - int list; - - if(!USES_LIST(mb_type, 0)) - fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, 2, (uint8_t)LIST_NOT_USED, 1); - for(list=0; listlist_count; list++){ - int y, b_stride; - int16_t (*mv_dst)[2]; - int16_t (*mv_src)[2]; - - if(!USES_LIST(mb_type, list)) - continue; - - b_stride = h->b_stride; - mv_dst = &s->current_picture.motion_val[list][b_xy]; - mv_src = &h->mv_cache[list][scan8[0]]; - for(y=0; y<4; y++){ - AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y); - } - if( CABAC ) { - uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]]; - uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; - if(IS_SKIP(mb_type)) - AV_ZERO128(mvd_dst); - else{ - AV_COPY64(mvd_dst, mvd_src + 8*3); - AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0); - AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1); - AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2); - } - } - - { - int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy]; - ref_index[0+0*2]= h->ref_cache[list][scan8[0]]; - ref_index[1+0*2]= h->ref_cache[list][scan8[4]]; - ref_index[0+1*2]= h->ref_cache[list][scan8[8]]; - ref_index[1+1*2]= h->ref_cache[list][scan8[12]]; - } + if(USES_LIST(mb_type, 0)){ + write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 0); + }else{ + fill_rectangle(&s->current_picture.f.ref_index[0][b8_xy], + 2, 2, 2, (uint8_t)LIST_NOT_USED, 1); + } + if(USES_LIST(mb_type, 1)){ + write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 1); } - if(h->slice_type_nos == FF_B_TYPE && CABAC){ + if(h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC){ if(IS_8X8(mb_type)){ uint8_t *direct_table = &h->direct_table[4*h->mb_xy]; direct_table[1] = h->sub_mb_type[1]>>1; @@ -1459,57 +872,11 @@ static inline void write_back_motion(H264Context *h, int mb_type){ } } -static inline int get_dct8x8_allowed(H264Context *h){ +static av_always_inline int get_dct8x8_allowed(H264Context *h){ if(h->sps.direct_8x8_inference_flag) return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL)); else return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); } -/** - * decodes a P_SKIP or B_SKIP macroblock - */ -static void decode_mb_skip(H264Context *h){ - MpegEncContext * const s = &h->s; - const int mb_xy= h->mb_xy; - int mb_type=0; - - memset(h->non_zero_count[mb_xy], 0, 32); - memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui - - if(MB_FIELD) - mb_type|= MB_TYPE_INTERLACED; - - if( h->slice_type_nos == FF_B_TYPE ) - { - // just for fill_caches. pred_direct_motion will set the real mb_type - mb_type|= MB_TYPE_L0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; - if(h->direct_spatial_mv_pred){ - fill_decode_neighbors(h, mb_type); - fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ... - } - ff_h264_pred_direct_motion(h, &mb_type); - mb_type|= MB_TYPE_SKIP; - } - else - { - int mx, my; - mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; - - fill_decode_neighbors(h, mb_type); - fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ... - pred_pskip_motion(h, &mx, &my); - fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); - fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4); - } - - write_back_motion(h, mb_type); - s->current_picture.mb_type[mb_xy]= mb_type; - s->current_picture.qscale_table[mb_xy]= s->qscale; - h->slice_table[ mb_xy ]= h->slice_num; - h->prev_mb_skipped= 1; -} - -#include "h264_mvpred.h" //For pred_pskip_motion() - #endif /* AVCODEC_H264_H */