X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=common%2Fframe.c;h=8bf253d3b1c66ae5dcc6e6bdc0c4af25bca851a7;hb=774dbb4795638f4b8ead6a77bc045584223f4d03;hp=26c564075f870faa9121ad2251ab29ff58116910;hpb=91522693403f9bc06985f8e4e9aebb6d4b43fc5a;p=x264 diff --git a/common/frame.c b/common/frame.c index 26c56407..8bf253d3 100644 --- a/common/frame.c +++ b/common/frame.c @@ -5,6 +5,7 @@ * * Authors: Laurent Aimar * Loren Merritt + * Fiona Glaser * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,20 +26,19 @@ #define ALIGN(x,a) (((x)+((a)-1))&~((a)-1)) -x264_frame_t *x264_frame_new( x264_t *h ) +x264_frame_t *x264_frame_new( x264_t *h, int b_fdec ) { - x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) ); + x264_frame_t *frame; int i, j; int i_mb_count = h->mb.i_mb_count; int i_stride, i_width, i_lines; int i_padv = PADV << h->param.b_interlaced; int luma_plane_size; + int chroma_plane_size; int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16; - if( !frame ) return NULL; - - memset( frame, 0, sizeof(x264_frame_t) ); + CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) ); /* allocate frame data (+64 for extra data for me) */ i_width = ALIGN( h->param.i_width, 16 ); @@ -48,51 +48,22 @@ x264_frame_t *x264_frame_new( x264_t *h ) frame->i_plane = 3; for( i = 0; i < 3; i++ ) { - frame->i_stride[i] = i_stride >> !!i; + frame->i_stride[i] = ALIGN( i_stride >> !!i, align ); frame->i_width[i] = i_width >> !!i; frame->i_lines[i] = i_lines >> !!i; } - luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv )); + luma_plane_size = (frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv)); + chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*i_padv)); for( i = 1; i < 3; i++ ) { - CHECKED_MALLOC( frame->buffer[i], luma_plane_size/4 ); + CHECKED_MALLOC( frame->buffer[i], chroma_plane_size ); frame->plane[i] = frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2; } - /* all 4 luma planes allocated together, since the cacheline split code - * requires them to be in-phase wrt cacheline alignment. */ - CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size); - for( i = 0; i < 4; i++ ) - frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH; - frame->plane[0] = frame->filtered[0]; - - if( h->frames.b_have_lowres ) - { - frame->i_width_lowres = frame->i_width[0]/2; - frame->i_stride_lowres = ALIGN( frame->i_width_lowres + 2*PADH, align ); - frame->i_lines_lowres = frame->i_lines[0]/2; - - luma_plane_size = frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv ); - - CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size ); - for( i = 0; i < 4; i++ ) - frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * i_padv + PADH) + i * luma_plane_size; - - for( j = 0; j <= !!h->param.i_bframe; j++ ) - for( i = 0; i <= h->param.i_bframe; i++ ) - { - CHECKED_MALLOC( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) ); - memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) ); - CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) ); - } - } - if( h->param.analyse.i_me_method >= X264_ME_ESA ) - { - CHECKED_MALLOC( frame->buffer[3], - 2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) ); - frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH; - } + for( i = 0; i < h->param.i_bframe + 2; i++ ) + for( j = 0; j < h->param.i_bframe + 2; j++ ) + CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) ); frame->i_poc = -1; frame->i_type = X264_TYPE_AUTO; @@ -101,68 +72,153 @@ x264_frame_t *x264_frame_new( x264_t *h ) frame->i_frame = -1; frame->i_frame_num = -1; frame->i_lines_completed = -1; + frame->b_fdec = b_fdec; + frame->i_pic_struct = PIC_STRUCT_AUTO; + frame->i_field_cnt = -1; + frame->i_duration = + frame->i_cpb_duration = + frame->i_dpb_output_delay = + frame->i_cpb_delay = 0; + frame->i_coded_fields_lookahead = + frame->i_cpb_delay_lookahead = -1; + + frame->orig = frame; - CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t)); - CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) ); - CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) ); - CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) ); - if( h->param.i_bframe ) + /* all 4 luma planes allocated together, since the cacheline split code + * requires them to be in-phase wrt cacheline alignment. */ + if( h->param.analyse.i_subpel_refine && b_fdec ) { - CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) ); - CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) ); + CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size ); + for( i = 0; i < 4; i++ ) + frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH; + frame->plane[0] = frame->filtered[0]; } else { - frame->mv[1] = NULL; - frame->ref[1] = NULL; + CHECKED_MALLOC( frame->buffer[0], luma_plane_size ); + frame->filtered[0] = frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH; } - CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) ); - CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) ); - for( i = 0; i < h->param.i_bframe + 2; i++ ) - for( j = 0; j < h->param.i_bframe + 2; j++ ) - CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) ); + frame->b_duplicate = 0; - if( h->param.rc.i_aq_mode ) - CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) ); + if( b_fdec ) /* fdec frame */ + { + CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t)); + CHECKED_MALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t)); + CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) ); + CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) ); + if( h->param.i_bframe ) + { + CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) ); + CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) ); + } + else + { + frame->mv[1] = NULL; + frame->ref[1] = NULL; + } + CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) ); + CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) ); + if( h->param.analyse.i_me_method >= X264_ME_ESA ) + { + CHECKED_MALLOC( frame->buffer[3], + frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa ); + frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH; + } + } + else /* fenc frame */ + { + if( h->frames.b_have_lowres ) + { + frame->i_width_lowres = frame->i_width[0]/2; + frame->i_stride_lowres = ALIGN( frame->i_width_lowres + 2*PADH, align ); + frame->i_lines_lowres = frame->i_lines[0]/2; + + luma_plane_size = frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV); + + CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size ); + for( i = 0; i < 4; i++ ) + frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size; + + for( j = 0; j <= !!h->param.i_bframe; j++ ) + for( i = 0; i <= h->param.i_bframe; i++ ) + { + CHECKED_MALLOCZERO( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) ); + CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) ); + } + CHECKED_MALLOC( frame->i_propagate_cost, (i_mb_count+3) * sizeof(uint16_t) ); + for( j = 0; j <= h->param.i_bframe+1; j++ ) + for( i = 0; i <= h->param.i_bframe+1; i++ ) + { + CHECKED_MALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) ); + CHECKED_MALLOC( frame->lowres_inter_types[j][i], (i_mb_count+3)/4 * sizeof(uint8_t) ); + } + frame->i_intra_cost = frame->lowres_costs[0][0]; + memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) ); + } + if( h->param.rc.i_aq_mode ) + { + CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) ); + CHECKED_MALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) ); + if( h->frames.b_have_lowres ) + /* shouldn't really be initialized, just silences a valgrind false-positive in x264_mbtree_propagate_cost_sse2 */ + CHECKED_MALLOCZERO( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) ); + } + } - x264_pthread_mutex_init( &frame->mutex, NULL ); - x264_pthread_cond_init( &frame->cv, NULL ); + if( x264_pthread_mutex_init( &frame->mutex, NULL ) ) + goto fail; + if( x264_pthread_cond_init( &frame->cv, NULL ) ) + goto fail; return frame; fail: - x264_frame_delete( frame ); + x264_free( frame ); return NULL; } void x264_frame_delete( x264_frame_t *frame ) { int i, j; - for( i = 0; i < 4; i++ ) - x264_free( frame->buffer[i] ); - for( i = 0; i < 4; i++ ) - x264_free( frame->buffer_lowres[i] ); - for( i = 0; i < X264_BFRAME_MAX+2; i++ ) - for( j = 0; j < X264_BFRAME_MAX+2; j++ ) - x264_free( frame->i_row_satds[i][j] ); - for( j = 0; j < 2; j++ ) - for( i = 0; i <= X264_BFRAME_MAX; i++ ) - { - x264_free( frame->lowres_mvs[j][i] ); - x264_free( frame->lowres_mv_costs[j][i] ); - } - x264_free( frame->f_qp_offset ); - x264_free( frame->i_intra_cost ); - x264_free( frame->i_row_bits ); - x264_free( frame->i_row_qp ); - x264_free( frame->mb_type ); - x264_free( frame->mv[0] ); - x264_free( frame->mv[1] ); - x264_free( frame->ref[0] ); - x264_free( frame->ref[1] ); - x264_pthread_mutex_destroy( &frame->mutex ); - x264_pthread_cond_destroy( &frame->cv ); + /* Duplicate frames are blank copies of real frames (including pointers), + * so freeing those pointers would cause a double free later. */ + if( !frame->b_duplicate ) + { + for( i = 0; i < 4; i++ ) + x264_free( frame->buffer[i] ); + for( i = 0; i < 4; i++ ) + x264_free( frame->buffer_lowres[i] ); + for( i = 0; i < X264_BFRAME_MAX+2; i++ ) + for( j = 0; j < X264_BFRAME_MAX+2; j++ ) + x264_free( frame->i_row_satds[i][j] ); + for( j = 0; j < 2; j++ ) + for( i = 0; i <= X264_BFRAME_MAX; i++ ) + { + x264_free( frame->lowres_mvs[j][i] ); + x264_free( frame->lowres_mv_costs[j][i] ); + } + x264_free( frame->i_propagate_cost ); + for( j = 0; j <= X264_BFRAME_MAX+1; j++ ) + for( i = 0; i <= X264_BFRAME_MAX+1; i++ ) + { + x264_free( frame->lowres_costs[j][i] ); + x264_free( frame->lowres_inter_types[j][i] ); + } + x264_free( frame->f_qp_offset ); + x264_free( frame->f_qp_offset_aq ); + x264_free( frame->i_inv_qscale_factor ); + x264_free( frame->i_row_bits ); + x264_free( frame->i_row_qp ); + x264_free( frame->mb_type ); + x264_free( frame->mb_partition ); + x264_free( frame->mv[0] ); + x264_free( frame->mv[1] ); + x264_free( frame->ref[0] ); + x264_free( frame->ref[1] ); + x264_pthread_mutex_destroy( &frame->mutex ); + x264_pthread_cond_destroy( &frame->cv ); + } x264_free( frame ); } @@ -178,7 +234,9 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src ) dst->i_type = src->i_type; dst->i_qpplus1 = src->i_qpplus1; - dst->i_pts = src->i_pts; + dst->i_pts = dst->i_reordered_pts = src->i_pts; + dst->param = src->param; + dst->i_pic_struct = src->i_pic_struct; for( i=0; i<3; i++ ) { @@ -282,7 +340,7 @@ void x264_frame_expand_border_lowres( x264_frame_t *frame ) { int i; for( i = 0; i < 4; i++ ) - plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 ); + plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_width_lowres, frame->i_lines_lowres, PADH, PADV, 1, 1 ); } void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame ) @@ -293,8 +351,8 @@ void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame ) int i_subsample = i ? 1 : 0; int i_width = h->param.i_width >> i_subsample; int i_height = h->param.i_height >> i_subsample; - int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample; - int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample; + int i_padx = (h->sps->i_mb_width * 16 - h->param.i_width) >> i_subsample; + int i_pady = (h->sps->i_mb_height * 16 - h->param.i_height) >> i_subsample; if( i_padx ) { @@ -305,10 +363,9 @@ void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame ) } if( i_pady ) { - //FIXME interlace? or just let it pad using the wrong field for( y = i_height; y < i_height + i_pady; y++ ) memcpy( &frame->plane[i][y*frame->i_stride[i]], - &frame->plane[i][(i_height-1)*frame->i_stride[i]], + &frame->plane[i][(i_height-(~y&h->param.b_interlaced)-1)*frame->i_stride[i]], i_width + i_padx ); } } @@ -426,12 +483,14 @@ static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int a int delta; if( abs( p2 - p0 ) < beta ) { - pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] ); + if( tc0[i] ) + pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] ); tc++; } if( abs( q2 - q0 ) < beta ) { - pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] ); + if( tc0[i] ) + pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] ); tc++; } @@ -615,9 +674,10 @@ void x264_frame_deblock_row( x264_t *h, int mb_y ) int stride2y = stridey << b_interlaced; int strideuv = h->fdec->i_stride[1]; int stride2uv = strideuv << b_interlaced; + uint8_t (*nnz_backup)[16] = h->scratch_buffer; if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode ) - munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row ); + munge_cavlc_nnz( h, mb_y, nnz_backup, munge_cavlc_nnz_row ); for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced ) { @@ -682,10 +742,10 @@ void x264_frame_deblock_row( x264_t *h, int mb_y ) {\ /* *** Get bS for each 4px for the current edge *** */\ if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\ - *(uint32_t*)bS = 0x03030303;\ + M32( bS ) = 0x03030303;\ else\ {\ - *(uint32_t*)bS = 0x00000000;\ + M32( bS ) = 0x00000000;\ for( i = 0; i < 4; i++ )\ {\ int x = i_dir == 0 ? i_edge : i;\ @@ -701,15 +761,20 @@ void x264_frame_deblock_row( x264_t *h, int mb_y ) bS[i] = bS[i-1];\ else\ {\ - /* FIXME: A given frame may occupy more than one position in\ - * the reference list. So we should compare the frame numbers,\ - * not the indices in the ref list.\ - * No harm yet, as we don't generate that case.*/\ int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\ int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\ int i4p= mb_4x4+x+y*s4x4;\ int i4q= mbn_4x4+xn+yn*s4x4;\ - if((h->mb.ref[0][i8p] != h->mb.ref[0][i8q] ||\ + int refs_equal;\ + /* We don't use duplicate refs in B-frames, so we can take this shortcut for now. */ \ + if( h->sh.i_type == SLICE_TYPE_B || h->mb.ref[0][i8p] < 0 || h->mb.ref[0][i8q] < 0 )\ + refs_equal = h->mb.ref[0][i8p] == h->mb.ref[0][i8q];\ + else if( !h->mb.b_interlaced )\ + refs_equal = h->fref0[h->mb.ref[0][i8p]]->i_poc == h->fref0[h->mb.ref[0][i8q]]->i_poc;\ + else\ + refs_equal = h->fref0[h->mb.ref[0][i8p]>>1]->i_poc == h->fref0[h->mb.ref[0][i8q]>>1]->i_poc\ + && (h->mb.ref[0][i8p]&1) == (h->mb.ref[0][i8q]&1);\ + if((!refs_equal ||\ abs( h->mb.mv[0][i4p][0] - h->mb.mv[0][i4q][0] ) >= 4 ||\ abs( h->mb.mv[0][i4p][1] - h->mb.mv[0][i4q][1] ) >= mvy_limit ) ||\ (h->sh.i_type == SLICE_TYPE_B &&\ @@ -731,7 +796,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y ) {\ int i_edge = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\ int i_qpn, i, mbn_xy, mbn_8x8, mbn_4x4;\ - DECLARE_ALIGNED_4( uint8_t bS[4] ); /* filtering strength */\ + ALIGNED_4( uint8_t bS[4] ); /* filtering strength */\ if( i_edge )\ i_edge+= b_8x8_transform;\ else\ @@ -751,7 +816,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y ) goto end##i_dir;\ }\ DEBLOCK_STRENGTH(i_dir);\ - if( *(uint32_t*)bS )\ + if( M32( bS ) )\ FILTER_DIR( , i_dir);\ end##i_dir:\ i_edge += b_8x8_transform+1;\ @@ -762,7 +827,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y ) for( ; i_edge < i_edge_end; i_edge+=b_8x8_transform+1 )\ {\ DEBLOCK_STRENGTH(i_dir);\ - if( *(uint32_t*)bS )\ + if( M32( bS ) )\ FILTER_DIR( , i_dir);\ }\ } @@ -772,7 +837,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y ) } if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode ) - munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row ); + munge_cavlc_nnz( h, mb_y, nnz_backup, restore_cavlc_nnz_row ); } void x264_frame_deblock( x264_t *h ) @@ -816,6 +881,13 @@ void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ); #endif // ARCH_PPC +#ifdef HAVE_ARMV6 +void x264_deblock_v_luma_neon( uint8_t *, int, int, int, int8_t * ); +void x264_deblock_h_luma_neon( uint8_t *, int, int, int, int8_t * ); +void x264_deblock_v_chroma_neon( uint8_t *, int, int, int, int8_t * ); +void x264_deblock_h_chroma_neon( uint8_t *, int, int, int, int8_t * ); +#endif + void x264_deblock_init( int cpu, x264_deblock_function_t *pf ) { pf->deblock_v_luma = deblock_v_luma_c; @@ -850,13 +922,23 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf ) } #endif -#ifdef ARCH_PPC +#ifdef HAVE_ALTIVEC if( cpu&X264_CPU_ALTIVEC ) { pf->deblock_v_luma = x264_deblock_v_luma_altivec; pf->deblock_h_luma = x264_deblock_h_luma_altivec; } -#endif // ARCH_PPC +#endif // HAVE_ALTIVEC + +#ifdef HAVE_ARMV6 + if( cpu&X264_CPU_NEON ) + { + pf->deblock_v_luma = x264_deblock_v_luma_neon; + pf->deblock_h_luma = x264_deblock_h_luma_neon; + pf->deblock_v_chroma = x264_deblock_v_chroma_neon; + pf->deblock_h_chroma = x264_deblock_h_chroma_neon; + } +#endif } @@ -921,20 +1003,49 @@ void x264_frame_push_unused( x264_t *h, x264_frame_t *frame ) assert( frame->i_reference_count > 0 ); frame->i_reference_count--; if( frame->i_reference_count == 0 ) - x264_frame_push( h->frames.unused, frame ); - assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL ); + x264_frame_push( h->frames.unused[frame->b_fdec], frame ); } -x264_frame_t *x264_frame_pop_unused( x264_t *h ) +x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec ) { x264_frame_t *frame; - if( h->frames.unused[0] ) - frame = x264_frame_pop( h->frames.unused ); + if( h->frames.unused[b_fdec][0] ) + frame = x264_frame_pop( h->frames.unused[b_fdec] ); else - frame = x264_frame_new( h ); - assert( frame->i_reference_count == 0 ); + frame = x264_frame_new( h, b_fdec ); + if( !frame ) + return NULL; + frame->b_last_minigop_bframe = 0; frame->i_reference_count = 1; frame->b_intra_calculated = 0; + frame->b_scenecut = 1; + frame->b_keyframe = 0; + + memset( frame->weight, 0, sizeof(frame->weight) ); + memset( frame->f_weighted_cost_delta, 0, sizeof(frame->f_weighted_cost_delta) ); + + return frame; +} + +void x264_frame_push_blank_unused( x264_t *h, x264_frame_t *frame ) +{ + assert( frame->i_reference_count > 0 ); + frame->i_reference_count--; + if( frame->i_reference_count == 0 ) + x264_frame_push( h->frames.blank_unused, frame ); +} + +x264_frame_t *x264_frame_pop_blank_unused( x264_t *h ) +{ + x264_frame_t *frame; + if( h->frames.blank_unused[0] ) + frame = x264_frame_pop( h->frames.blank_unused ); + else + frame = x264_malloc( sizeof(x264_frame_t) ); + if( !frame ) + return NULL; + frame->b_duplicate = 1; + frame->i_reference_count = 1; return frame; } @@ -957,3 +1068,63 @@ void x264_frame_sort( x264_frame_t **list, int b_dts ) } } while( !b_ok ); } + +void x264_weight_scale_plane( x264_t *h, uint8_t *dst, int i_dst_stride, uint8_t *src, int i_src_stride, + int i_width, int i_height, x264_weight_t *w ) +{ + int x; + /* Weight horizontal strips of height 16. This was found to be the optimal height + * in terms of the cache loads. */ + while( i_height > 0 ) + { + for( x = 0; x < i_width; x += 16 ) + w->weightfn[16>>2]( dst+x, i_dst_stride, src+x, i_src_stride, w, X264_MIN( i_height, 16 ) ); + i_height -= 16; + dst += 16 * i_dst_stride; + src += 16 * i_src_stride; + } +} + +void x264_frame_delete_list( x264_frame_t **list ) +{ + int i = 0; + if( !list ) + return; + while( list[i] ) + x264_frame_delete( list[i++] ); + x264_free( list ); +} + +int x264_synch_frame_list_init( x264_synch_frame_list_t *slist, int max_size ) +{ + if( max_size < 0 ) + return -1; + slist->i_max_size = max_size; + slist->i_size = 0; + CHECKED_MALLOCZERO( slist->list, (max_size+1) * sizeof(x264_frame_t*) ); + if( x264_pthread_mutex_init( &slist->mutex, NULL ) || + x264_pthread_cond_init( &slist->cv_fill, NULL ) || + x264_pthread_cond_init( &slist->cv_empty, NULL ) ) + return -1; + return 0; +fail: + return -1; +} + +void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist ) +{ + x264_pthread_mutex_destroy( &slist->mutex ); + x264_pthread_cond_destroy( &slist->cv_fill ); + x264_pthread_cond_destroy( &slist->cv_empty ); + x264_frame_delete_list( slist->list ); +} + +void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *frame ) +{ + x264_pthread_mutex_lock( &slist->mutex ); + while( slist->i_size == slist->i_max_size ) + x264_pthread_cond_wait( &slist->cv_empty, &slist->mutex ); + slist->list[ slist->i_size++ ] = frame; + x264_pthread_mutex_unlock( &slist->mutex ); + x264_pthread_cond_broadcast( &slist->cv_fill ); +}