X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;ds=sidebyside;f=libavcodec%2Fh264.c;h=b923d50c368f57be54910bc8ab8971540d3e96b4;hb=f7287c42b2c56e4fbdf1ce56785dbfd9c75d1528;hp=3ce31c92e548514ebfd8e51894f59896629d4d80;hpb=e8b562087db87a426aa895d6c3664bc26949eb54;p=ffmpeg diff --git a/libavcodec/h264.c b/libavcodec/h264.c index 3ce31c92e54..b923d50c368 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -14,7 +14,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ @@ -162,7 +162,7 @@ typedef struct H264Context{ #define NAL_SPS_EXT 13 #define NAL_AUXILIARY_SLICE 19 uint8_t *rbsp_buffer; - int rbsp_buffer_size; + unsigned int rbsp_buffer_size; /** * Used to parse AVC variant of h264 @@ -199,14 +199,14 @@ typedef struct H264Context{ * non zero coeff count cache. * is 64 if not available. */ - uint8_t non_zero_count_cache[6*8] __align8; + DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]); uint8_t (*non_zero_count)[16]; /** * Motion vector cache. */ - int16_t mv_cache[2][5*8][2] __align8; - int8_t ref_cache[2][5*8] __align8; + DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]); + DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]); #define LIST_NOT_USED -1 //FIXME rename? #define PART_NOT_AVAILABLE -2 @@ -335,7 +335,7 @@ typedef struct H264Context{ GetBitContext *intra_gb_ptr; GetBitContext *inter_gb_ptr; - DCTELEM mb[16*24] __align8; + DECLARE_ALIGNED_8(DCTELEM, mb[16*24]); /** * Cabac @@ -352,14 +352,18 @@ typedef struct H264Context{ uint8_t *chroma_pred_mode_table; int last_qscale_diff; int16_t (*mvd_table[2])[2]; - int16_t mvd_cache[2][5*8][2] __align8; + DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]); uint8_t *direct_table; uint8_t direct_cache[5*8]; uint8_t zigzag_scan[16]; uint8_t field_scan[16]; + uint8_t zigzag_scan8x8[64]; + uint8_t zigzag_scan8x8_cavlc[64]; const uint8_t *zigzag_scan_q0; const uint8_t *field_scan_q0; + const uint8_t *zigzag_scan8x8_q0; + const uint8_t *zigzag_scan8x8_cavlc_q0; int x264_build; }H264Context; @@ -1202,7 +1206,7 @@ static inline void direct_ref_list_init(H264Context * const h){ for(list=0; list<2; list++){ for(i=0; iref_count[list]; i++){ const int poc = ref1->ref_poc[list][i]; - h->map_col_to_list0[list][i] = PART_NOT_AVAILABLE; + h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */ for(j=0; jref_count[list]; j++) if(h->ref_list[list][j].poc == poc){ h->map_col_to_list0[list][i] = j; @@ -2607,7 +2611,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, const int pic_width = 16*s->mb_width; const int pic_height = 16*s->mb_height; - assert(pic->data[0]); + if(!pic->data[0]) + return; if(mx&7) extra_width -= 3; if(my&7) extra_height -= 3; @@ -2952,6 +2957,7 @@ static void free_tables(H264Context *h){ static void init_dequant8_coeff_table(H264Context *h){ int i,q,x; + const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly h->dequant8_coeff[0] = h->dequant8_buffer[0]; h->dequant8_coeff[1] = h->dequant8_buffer[1]; @@ -2965,8 +2971,9 @@ static void init_dequant8_coeff_table(H264Context *h){ int shift = div6[q]; int idx = rem6[q]; for(x=0; x<64; x++) - h->dequant8_coeff[i][q][x] = ((uint32_t)dequant8_coeff_init[idx][ - dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * h->pps.scaling_matrix8[i][x]) << shift; + h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] = + ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * + h->pps.scaling_matrix8[i][x]) << shift; } } } @@ -3314,6 +3321,7 @@ static void hl_decode_mb(H264Context *h){ const unsigned int bottom = mb_y & 1; const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass); void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); + void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); if(!s->decode) return; @@ -3337,9 +3345,16 @@ static void hl_decode_mb(H264Context *h){ // dct_offset = s->linesize * 16; } - idct_add = transform_bypass - ? IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4 - : IS_8x8DCT(mb_type) ? s->dsp.h264_idct8_add : s->dsp.h264_idct_add; + if(transform_bypass){ + idct_dc_add = + idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4; + }else if(IS_8x8DCT(mb_type)){ + idct_dc_add = s->dsp.h264_idct8_dc_add; + idct_add = s->dsp.h264_idct8_add; + }else{ + idct_dc_add = s->dsp.h264_idct_dc_add; + idct_add = s->dsp.h264_idct_add; + } if (IS_INTRA_PCM(mb_type)) { unsigned int x, y; @@ -3389,17 +3404,22 @@ static void hl_decode_mb(H264Context *h){ for(i=0; i<16; i+=4){ uint8_t * const ptr= dest_y + block_offset[i]; const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; + const int nnz = h->non_zero_count_cache[ scan8[i] ]; h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<topright_samples_available<<(i+1))&0x8000, linesize); - if(h->non_zero_count_cache[ scan8[i] ]) - idct_add(ptr, h->mb + i*16, linesize); + if(nnz){ + if(nnz == 1 && h->mb[i*16]) + idct_dc_add(ptr, h->mb + i*16, linesize); + else + idct_add(ptr, h->mb + i*16, linesize); + } } }else for(i=0; i<16; i++){ uint8_t * const ptr= dest_y + block_offset[i]; uint8_t *topright; const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; - int tr; + int nnz, tr; if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ const int topright_avail= (h->topright_samples_available<pred4x4[ dir ](ptr, topright, linesize); - if(h->non_zero_count_cache[ scan8[i] ]){ - if(s->codec_id == CODEC_ID_H264) - idct_add(ptr, h->mb + i*16, linesize); - else + nnz = h->non_zero_count_cache[ scan8[i] ]; + if(nnz){ + if(s->codec_id == CODEC_ID_H264){ + if(nnz == 1 && h->mb[i*16]) + idct_dc_add(ptr, h->mb + i*16, linesize); + else + idct_add(ptr, h->mb + i*16, linesize); + }else svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0); } } @@ -3453,11 +3477,23 @@ static void hl_decode_mb(H264Context *h){ if(!IS_INTRA4x4(mb_type)){ if(s->codec_id == CODEC_ID_H264){ - const int di = IS_8x8DCT(mb_type) ? 4 : 1; - for(i=0; i<16; i+=di){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below - uint8_t * const ptr= dest_y + block_offset[i]; - idct_add(ptr, h->mb + i*16, linesize); + if(IS_INTRA16x16(mb_type)){ + for(i=0; i<16; i++){ + if(h->non_zero_count_cache[ scan8[i] ]) + idct_add(dest_y + block_offset[i], h->mb + i*16, linesize); + else if(h->mb[i*16]) + idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize); + } + }else{ + const int di = IS_8x8DCT(mb_type) ? 4 : 1; + for(i=0; i<16; i+=di){ + int nnz = h->non_zero_count_cache[ scan8[i] ]; + if(nnz){ + if(nnz==1 && h->mb[i*16]) + idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize); + else + idct_add(dest_y + block_offset[i], h->mb + i*16, linesize); + } } } }else{ @@ -3471,34 +3507,26 @@ static void hl_decode_mb(H264Context *h){ } if(!(s->flags&CODEC_FLAG_GRAY)){ - idct_add = transform_bypass ? s->dsp.add_pixels4 : s->dsp.h264_idct_add; - if(!transform_bypass){ + uint8_t *dest[2] = {dest_cb, dest_cr}; + if(transform_bypass){ + idct_add = idct_dc_add = s->dsp.add_pixels4; + }else{ + idct_add = s->dsp.h264_idct_add; + idct_dc_add = s->dsp.h264_idct_dc_add; chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]); chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]); } if(s->codec_id == CODEC_ID_H264){ - for(i=16; i<16+4; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ - uint8_t * const ptr= dest_cb + block_offset[i]; - idct_add(ptr, h->mb + i*16, uvlinesize); - } - } - for(i=20; i<20+4; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ - uint8_t * const ptr= dest_cr + block_offset[i]; - idct_add(ptr, h->mb + i*16, uvlinesize); - } + for(i=16; i<16+8; i++){ + if(h->non_zero_count_cache[ scan8[i] ]) + idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); + else if(h->mb[i*16]) + idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); } }else{ - for(i=16; i<16+4; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ - uint8_t * const ptr= dest_cb + block_offset[i]; - svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2); - } - } - for(i=20; i<20+4; i++){ + for(i=16; i<16+8; i++){ if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ - uint8_t * const ptr= dest_cr + block_offset[i]; + uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i]; svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2); } } @@ -3887,8 +3915,13 @@ static void idr(H264Context *h){ static void flush_dpb(AVCodecContext *avctx){ H264Context *h= avctx->priv_data; int i; - for(i=0; i<16; i++) + for(i=0; i<16; i++) { + if(h->delayed_pic[i]) + h->delayed_pic[i]->reference= 0; h->delayed_pic[i]= NULL; + } + if(h->delayed_output_pic) + h->delayed_output_pic->reference= 0; h->delayed_output_pic= NULL; idr(h); if(h->s.current_picture_ptr) @@ -4263,8 +4296,8 @@ static int decode_slice_header(H264Context *h){ s->mb_width= h->sps.mb_width; s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); - h->b_stride= s->mb_width*4 + 1; - h->b8_stride= s->mb_width*2 + 1; + h->b_stride= s->mb_width*4; + h->b8_stride= s->mb_width*2; s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right ); if(h->sps.frame_mbs_only_flag) @@ -4290,14 +4323,31 @@ static int decode_slice_header(H264Context *h){ #define T(x) (x>>2) | ((x<<2) & 0xF) h->zigzag_scan[i] = T(zigzag_scan[i]); h-> field_scan[i] = T( field_scan[i]); +#undef T + } + } + if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){ + memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t)); + memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t)); + }else{ + int i; + for(i=0; i<64; i++){ +#define T(x) (x>>3) | ((x&7)<<3) + h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]); + h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]); +#undef T } } if(h->sps.transform_bypass){ //FIXME same ugly h->zigzag_scan_q0 = zigzag_scan; h->field_scan_q0 = field_scan; + h->zigzag_scan8x8_q0 = zigzag_scan8x8; + h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc; }else{ h->zigzag_scan_q0 = h->zigzag_scan; h->field_scan_q0 = h->field_scan; + h->zigzag_scan8x8_q0 = h->zigzag_scan8x8; + h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc; } alloc_tables(h); @@ -4309,7 +4359,11 @@ static int decode_slice_header(H264Context *h){ s->avctx->sample_aspect_ratio.den = 1; if(h->sps.timing_info_present_flag){ - s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale}; + s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale}; + if(h->x264_build > 0 && h->x264_build < 44) + s->avctx->time_base.den *= 2; + av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den, + s->avctx->time_base.num, s->avctx->time_base.den, 1<<30); } } @@ -5070,7 +5124,7 @@ decode_intra_mb: int i8x8, i4x4, chroma_idx; int chroma_qp, dquant; GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr; - const uint8_t *scan, *dc_scan; + const uint8_t *scan, *scan8x8, *dc_scan; // fill_non_zero_count_cache(h); @@ -5081,6 +5135,7 @@ decode_intra_mb: scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0; dc_scan= luma_dc_zigzag_scan; } + scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0; dquant= get_se_golomb(&s->gb); @@ -5122,12 +5177,12 @@ decode_intra_mb: DCTELEM *buf = &h->mb[64*i8x8]; uint8_t *nnz; for(i4x4=0; i4x4<4; i4x4++){ - if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4, + if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 ) return -1; } nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; - nnz[0] |= nnz[1] | nnz[8] | nnz[9]; + nnz[0] += nnz[1] + nnz[8] + nnz[9]; }else{ for(i4x4=0; i4x4<4; i4x4++){ const int index= i4x4 + 4*i8x8; @@ -5221,19 +5276,11 @@ static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_sl return 25; /* PCM */ mb_type = 1; /* I16x16 */ - if( get_cabac( &h->cabac, &state[1] ) ) - mb_type += 12; /* cbp_luma != 0 */ - - if( get_cabac( &h->cabac, &state[2] ) ) { - if( get_cabac( &h->cabac, &state[2+intra_slice] ) ) - mb_type += 4 * 2; /* cbp_chroma == 2 */ - else - mb_type += 4 * 1; /* cbp_chroma == 1 */ - } - if( get_cabac( &h->cabac, &state[3+intra_slice] ) ) - mb_type += 2; - if( get_cabac( &h->cabac, &state[3+2*intra_slice] ) ) - mb_type += 1; + mb_type += 12 * get_cabac( &h->cabac, &state[1] ); /* cbp_luma != 0 */ + if( get_cabac( &h->cabac, &state[2] ) ) /* cbp_chroma */ + mb_type += 4 + 4 * get_cabac( &h->cabac, &state[2+intra_slice] ); + mb_type += 2 * get_cabac( &h->cabac, &state[3+intra_slice] ); + mb_type += 1 * get_cabac( &h->cabac, &state[3+2*intra_slice] ); return mb_type; } @@ -5246,15 +5293,11 @@ static int decode_cabac_mb_type( H264Context *h ) { if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) { /* P-type */ if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) { - if( get_cabac( &h->cabac, &h->cabac_state[16] ) == 0 ) - return 0; /* P_L0_D16x16; */ - else - return 3; /* P_8x8; */ + /* P_L0_D16x16, P_8x8 */ + return 3 * get_cabac( &h->cabac, &h->cabac_state[16] ); } else { - if( get_cabac( &h->cabac, &h->cabac_state[17] ) == 0 ) - return 2; /* P_L0_D8x16; */ - else - return 1; /* P_L0_D16x8; */ + /* P_L0_D8x16, P_L0_D16x8 */ + return 2 - get_cabac( &h->cabac, &h->cabac_state[17] ); } } else { return decode_cabac_intra_mb_type(h, 17, 0) + 5; @@ -5265,11 +5308,9 @@ static int decode_cabac_mb_type( H264Context *h ) { int ctx = 0; int bits; - if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ) - && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) ) + if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) ) ctx++; - if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ) - && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) ) + if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) ) ctx++; if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) ) @@ -5312,10 +5353,9 @@ static int decode_cabac_mb_skip( H264Context *h) { if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )) ctx++; - if( h->slice_type == P_TYPE || h->slice_type == SP_TYPE) - return get_cabac( &h->cabac, &h->cabac_state[11+ctx] ); - else /* B-frame */ - return get_cabac( &h->cabac, &h->cabac_state[24+ctx] ); + if( h->slice_type == B_TYPE ) + ctx += 13; + return get_cabac( &h->cabac, &h->cabac_state[11+ctx] ); } static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) { @@ -5372,14 +5412,17 @@ static const uint8_t block_idx_xy[4][4] = { }; static int decode_cabac_mb_cbp_luma( H264Context *h) { - MpegEncContext * const s = &h->s; - int cbp = 0; + int cbp_b = -1; int i8x8; + if( h->slice_table[h->top_mb_xy] == h->slice_num ) { + cbp_b = h->top_cbp; + tprintf("cbp_b = top_cbp = %x\n", cbp_b); + } + for( i8x8 = 0; i8x8 < 4; i8x8++ ) { int cbp_a = -1; - int cbp_b = -1; int x, y; int ctx = 0; @@ -5388,17 +5431,13 @@ static int decode_cabac_mb_cbp_luma( H264Context *h) { if( x > 0 ) cbp_a = cbp; - else if( s->mb_x > 0 && (h->slice_table[h->left_mb_xy[0]] == h->slice_num)) { + else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) { cbp_a = h->left_cbp; tprintf("cbp_a = left_cbp = %x\n", cbp_a); } if( y > 0 ) cbp_b = cbp; - else if( s->mb_y > 0 && (h->slice_table[h->top_mb_xy] == h->slice_num)) { - cbp_b = h->top_cbp; - tprintf("cbp_b = top_cbp = %x\n", cbp_b); - } /* No need to test for skip as we put 0 for skip block */ /* No need to test for IPCM as we put 1 for IPCM block */ @@ -5458,7 +5497,7 @@ static int decode_cabac_mb_dqp( H264Context *h) { else ctx = 3; val++; - if(val > 52) //prevent infinite loop + if(val > 102) //prevent infinite loop return INT_MIN; } @@ -5686,7 +5725,7 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n h->non_zero_count_cache[scan8[16+n]] = coeff_count; else { assert( cat == 5 ); - fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, 1, 1); + fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1); } for( i = coeff_count - 1; i >= 0; i-- ) { @@ -5737,7 +5776,7 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n return 0; } -void inline compute_mb_neighboors(H264Context *h) +static void inline compute_mb_neighbors(H264Context *h) { MpegEncContext * const s = &h->s; const int mb_xy = s->mb_x + s->mb_y*s->mb_stride; @@ -5797,7 +5836,7 @@ static int decode_mb_cabac(H264Context *h) { h->prev_mb_skipped = 0; - compute_mb_neighboors(h); + compute_mb_neighbors(h); if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) { av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" ); return -1; @@ -6129,7 +6168,7 @@ decode_intra_mb: s->current_picture.mb_type[mb_xy]= mb_type; if( cbp || IS_INTRA16x16( mb_type ) ) { - const uint8_t *scan, *dc_scan; + const uint8_t *scan, *scan8x8, *dc_scan; int dqp; if(IS_INTERLACED(mb_type)){ @@ -6139,6 +6178,7 @@ decode_intra_mb: scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0; dc_scan= luma_dc_zigzag_scan; } + scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0; h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h ); if( dqp == INT_MIN ){ @@ -6172,7 +6212,7 @@ decode_intra_mb: if( cbp & (1<mb + 64*i8x8, 5, 4*i8x8, - zigzag_scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 ) + scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 ) return -1; } else for( i4x4 = 0; i4x4 < 4; i4x4++ ) { @@ -6543,6 +6583,18 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 * frame numbers, not indices. */ static const int ref2frm[18] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + //for sufficiently low qp, filtering wouldn't do anything + //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp + if(!h->mb_aff_frame){ + int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset); + int qp = s->current_picture.qscale_table[mb_xy]; + if(qp <= qp_thresh + && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh) + && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){ + return; + } + } + if (h->mb_aff_frame // left mb is in picture && h->slice_table[mb_xy-1] != 255 @@ -6613,8 +6665,8 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 const int mbm_type = s->current_picture.mb_type[mbm_xy]; int start = h->slice_table[mbm_xy] == 255 ? 1 : 0; - const int edges = ((mb_type & mbm_type) & (MB_TYPE_16x16|MB_TYPE_SKIP)) - == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4; + const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP)) + == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4; // how often to recheck mv-based bS when iterating between edges const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 : (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0; @@ -6925,7 +6977,7 @@ static int decode_slice(H264Context *h){ hl_decode_mb(h); if(ret<0){ - fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); + av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); return -1; @@ -7469,6 +7521,15 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ nalsize = 0; for(i = 0; i < h->nal_length_size; i++) nalsize = (nalsize << 8) | buf[buf_index++]; + if(nalsize <= 1){ + if(nalsize == 1){ + buf_index++; + continue; + }else{ + av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize); + break; + } + } } else { // start code prefix search for(; buf_index + 3 < buf_size; buf_index++){ @@ -7512,6 +7573,7 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n"); break; } + s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE); if(h->redundant_pic_count==0 && s->hurry_up < 5 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc) && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE) @@ -7581,7 +7643,6 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264; s->current_picture_ptr->pict_type= s->pict_type; - s->current_picture_ptr->key_frame= s->pict_type == I_TYPE && h->nal_unit_type == NAL_IDR_SLICE; h->prev_frame_num_offset= h->frame_num_offset; h->prev_frame_num= h->frame_num; @@ -7737,7 +7798,9 @@ static int decode_frame(AVCodecContext *avctx, } out_of_order = !cross_idr && prev && out->poc < prev->poc; - if(prev && pics <= s->avctx->has_b_frames) + if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames) + { } + else if(prev && pics <= s->avctx->has_b_frames) out = prev; else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15) || (s->low_delay &&