#endif
CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) );
+ PREALLOC_INIT
/* allocate frame data (+64 for extra data for me) */
i_width = h->mb.i_mb_width*16;
for( int i = 0; i < h->param.i_bframe + 2; i++ )
for( int j = 0; j < h->param.i_bframe + 2; j++ )
- CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
+ PREALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
frame->i_poc = -1;
frame->i_type = X264_TYPE_AUTO;
{
int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
int chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*chroma_padv));
- CHECKED_MALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
- frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH;
+ PREALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
if( PARAM_INTERLACED )
- {
- CHECKED_MALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
- frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH;
- }
+ PREALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
}
/* all 4 luma planes allocated together, since the cacheline split code
if( h->param.analyse.i_subpel_refine && b_fdec )
{
/* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */
- CHECKED_MALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) );
+ PREALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) );
if( PARAM_INTERLACED )
- CHECKED_MALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) );
- for( int i = 0; i < 4; i++ )
- {
- frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
- frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
- }
- frame->plane[p] = frame->filtered[p][0];
- frame->plane_fld[p] = frame->filtered_fld[p][0];
+ PREALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) );
}
else
{
- CHECKED_MALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) );
+ PREALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) );
if( PARAM_INTERLACED )
- CHECKED_MALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) );
- frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH;
- frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH;
+ PREALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) );
}
}
if( b_fdec ) /* fdec frame */
{
- CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
- CHECKED_MALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t));
- CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
- CHECKED_MALLOC( frame->mv16x16, 2*(i_mb_count+1) * sizeof(int16_t) );
- M32( frame->mv16x16[0] ) = 0;
- frame->mv16x16++;
- CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
+ PREALLOC( frame->mb_type, i_mb_count * sizeof(int8_t) );
+ PREALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t) );
+ PREALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
+ PREALLOC( frame->mv16x16, 2*(i_mb_count+1) * sizeof(int16_t) );
+ PREALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
if( h->param.i_bframe )
{
- CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
- CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
+ PREALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
+ PREALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
}
else
{
frame->mv[1] = NULL;
frame->ref[1] = NULL;
}
- CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
- CHECKED_MALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) );
- CHECKED_MALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) );
+ PREALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
+ PREALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) );
+ PREALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) );
if( h->param.analyse.i_me_method >= X264_ME_ESA )
- {
- CHECKED_MALLOC( frame->buffer[3],
- frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
- frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
- }
+ PREALLOC( frame->buffer[3], frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
if( PARAM_INTERLACED )
- CHECKED_MALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
+ PREALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
if( h->param.analyse.b_mb_info )
- CHECKED_MALLOC( frame->effective_qp, i_mb_count * sizeof(uint8_t) );
+ PREALLOC( frame->effective_qp, i_mb_count * sizeof(uint8_t) );
}
else /* fenc frame */
{
{
int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
- CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
- for( int i = 0; i < 4; i++ )
- frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size;
+ PREALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
for( int j = 0; j <= !!h->param.i_bframe; j++ )
for( int i = 0; i <= h->param.i_bframe; i++ )
{
- CHECKED_MALLOCZERO( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
- CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
+ PREALLOC( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
+ PREALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
}
- CHECKED_MALLOC( frame->i_propagate_cost, (i_mb_count+7) * sizeof(uint16_t) );
+ PREALLOC( frame->i_propagate_cost, (i_mb_count+7) * sizeof(uint16_t) );
for( int j = 0; j <= h->param.i_bframe+1; j++ )
for( int i = 0; i <= h->param.i_bframe+1; i++ )
- CHECKED_MALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
- frame->i_intra_cost = frame->lowres_costs[0][0];
- memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) );
+ PREALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
+
}
if( h->param.rc.i_aq_mode )
{
- CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
- CHECKED_MALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
+ PREALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
+ PREALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
if( h->frames.b_have_lowres )
+ PREALLOC( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
+ }
+ }
+
+ PREALLOC_END( frame->base );
+
+ if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 )
+ {
+ int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
+ frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH;
+ if( PARAM_INTERLACED )
+ frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH;
+ }
+
+ for( int p = 0; p < luma_plane_count; p++ )
+ {
+ int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
+ if( h->param.analyse.i_subpel_refine && b_fdec )
+ {
+ for( int i = 0; i < 4; i++ )
+ {
+ frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
+ frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
+ }
+ frame->plane[p] = frame->filtered[p][0];
+ frame->plane_fld[p] = frame->filtered_fld[p][0];
+ }
+ else
+ {
+ frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH;
+ frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH;
+ }
+ }
+
+ if( b_fdec )
+ {
+ M32( frame->mv16x16[0] ) = 0;
+ frame->mv16x16++;
+
+ if( h->param.analyse.i_me_method >= X264_ME_ESA )
+ frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
+ }
+ else
+ {
+ if( h->frames.b_have_lowres )
+ {
+ int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
+ for( int i = 0; i < 4; i++ )
+ frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size;
+
+ for( int j = 0; j <= !!h->param.i_bframe; j++ )
+ for( int i = 0; i <= h->param.i_bframe; i++ )
+ memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
+
+ frame->i_intra_cost = frame->lowres_costs[0][0];
+ memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) );
+
+ if( h->param.rc.i_aq_mode )
/* shouldn't really be initialized, just silences a valgrind false-positive in x264_mbtree_propagate_cost_sse2 */
- CHECKED_MALLOCZERO( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
+ memset( frame->i_inv_qscale_factor, 0, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
}
}
* so freeing those pointers would cause a double free later. */
if( !frame->b_duplicate )
{
- for( int i = 0; i < 4; i++ )
- {
- x264_free( frame->buffer[i] );
- x264_free( frame->buffer_fld[i] );
- }
- for( int i = 0; i < 4; i++ )
- x264_free( frame->buffer_lowres[i] );
- for( int i = 0; i < X264_BFRAME_MAX+2; i++ )
- for( int j = 0; j < X264_BFRAME_MAX+2; j++ )
- x264_free( frame->i_row_satds[i][j] );
- for( int j = 0; j < 2; j++ )
- for( int i = 0; i <= X264_BFRAME_MAX; i++ )
- {
- x264_free( frame->lowres_mvs[j][i] );
- x264_free( frame->lowres_mv_costs[j][i] );
- }
- x264_free( frame->i_propagate_cost );
- for( int j = 0; j <= X264_BFRAME_MAX+1; j++ )
- for( int i = 0; i <= X264_BFRAME_MAX+1; i++ )
- x264_free( frame->lowres_costs[j][i] );
- x264_free( frame->f_qp_offset );
- x264_free( frame->f_qp_offset_aq );
- x264_free( frame->i_inv_qscale_factor );
- x264_free( frame->i_row_bits );
- x264_free( frame->f_row_qp );
- x264_free( frame->f_row_qscale );
- x264_free( frame->field );
- x264_free( frame->effective_qp );
- x264_free( frame->mb_type );
- x264_free( frame->mb_partition );
- x264_free( frame->mv[0] );
- x264_free( frame->mv[1] );
- if( frame->mv16x16 )
- x264_free( frame->mv16x16-1 );
- x264_free( frame->ref[0] );
- x264_free( frame->ref[1] );
+ x264_free( frame->base );
+
if( frame->param && frame->param->param_free )
frame->param->param_free( frame->param );
if( frame->mb_info_free )
h->mb.b_interlaced = PARAM_INTERLACED;
- CHECKED_MALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
- CHECKED_MALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
- CHECKED_MALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
- CHECKED_MALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
- memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
+ PREALLOC_INIT
+
+ PREALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
+ PREALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
+ PREALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
+ PREALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
/* 0 -> 3 top(4), 4 -> 6 : left(3) */
- CHECKED_MALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
+ PREALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
/* all coeffs */
- CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
+ PREALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
if( h->param.b_cabac )
{
- CHECKED_MALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
- CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
- CHECKED_MALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
+ PREALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
+ PREALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
+ PREALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
if( h->param.i_bframe )
- CHECKED_MALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
+ PREALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
}
for( int i = 0; i < 2; i++ )
i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
for( int j = !i; j < i_refs; j++ )
- {
- CHECKED_MALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
- M32( h->mb.mvr[i][j][0] ) = 0;
- h->mb.mvr[i][j]++;
- }
+ PREALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
}
if( h->param.analyse.i_weighted_pred )
}
for( int i = 0; i < numweightbuf; i++ )
- CHECKED_MALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
+ PREALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
+ }
+
+ PREALLOC_END( h->mb.base );
+
+ memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
+
+ for( int i = 0; i < 2; i++ )
+ {
+ int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << PARAM_INTERLACED;
+ if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
+ i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
+
+ for( int j = !i; j < i_refs; j++ )
+ {
+ M32( h->mb.mvr[i][j][0] ) = 0;
+ h->mb.mvr[i][j]++;
+ }
}
return 0;
}
void x264_macroblock_cache_free( x264_t *h )
{
- for( int i = 0; i < 2; i++ )
- for( int j = !i; j < X264_REF_MAX*2; j++ )
- if( h->mb.mvr[i][j] )
- x264_free( h->mb.mvr[i][j]-1 );
- for( int i = 0; i < X264_REF_MAX; i++ )
- x264_free( h->mb.p_weight_buf[i] );
-
- if( h->param.b_cabac )
- {
- x264_free( h->mb.skipbp );
- x264_free( h->mb.chroma_pred_mode );
- x264_free( h->mb.mvd[0] );
- x264_free( h->mb.mvd[1] );
- }
- x264_free( h->mb.slice_table );
- x264_free( h->mb.intra4x4_pred_mode );
- x264_free( h->mb.non_zero_count );
- x264_free( h->mb.mb_transform_size );
- x264_free( h->mb.cbp );
- x264_free( h->mb.qp );
+ x264_free( h->mb.base );
}
int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )