#undef PPIXEL
}
-void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
+void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y )
{
- int b_start = !mb_y;
+ int pad_top = mb_y == 0;
+ int pad_bot = mb_y == h->mb.i_mb_height - (1 << SLICE_MBAFF);
+ int b_start = mb_y == h->i_threadslice_start;
+ int b_end = mb_y == h->i_threadslice_end - (1 << SLICE_MBAFF);
if( mb_y & SLICE_MBAFF )
return;
for( int i = 0; i < frame->i_plane; i++ )
int v_shift = i && CHROMA_V_SHIFT;
int stride = frame->i_stride[i];
int width = 16*h->mb.i_mb_width;
- int height = (b_end ? 16*(h->mb.i_mb_height - mb_y) >> SLICE_MBAFF : 16) >> v_shift;
+ int height = (pad_bot ? 16*(h->mb.i_mb_height - mb_y) >> SLICE_MBAFF : 16) >> v_shift;
int padh = PADH;
int padv = PADV >> v_shift;
// buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
if( b_end && !b_start )
height += 4 >> (v_shift + SLICE_MBAFF);
pixel *pix;
+ int starty = 16*mb_y - 4*!b_start;
if( SLICE_MBAFF )
{
// border samples for each field are extended separately
- pix = frame->plane_fld[i] + X264_MAX(0, (16*mb_y-4)*stride >> v_shift);
- plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end, h_shift );
- plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end, h_shift );
+ pix = frame->plane_fld[i] + (starty*stride >> v_shift);
+ plane_expand_border( pix, stride*2, width, height, padh, padv, pad_top, pad_bot, h_shift );
+ plane_expand_border( pix+stride, stride*2, width, height, padh, padv, pad_top, pad_bot, h_shift );
- height = (b_end ? 16*(h->mb.i_mb_height - mb_y) : 32) >> v_shift;
+ height = (pad_bot ? 16*(h->mb.i_mb_height - mb_y) : 32) >> v_shift;
if( b_end && !b_start )
height += 4 >> v_shift;
- pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> v_shift);
- plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end, h_shift );
+ pix = frame->plane[i] + (starty*stride >> v_shift);
+ plane_expand_border( pix, stride, width, height, padh, padv, pad_top, pad_bot, h_shift );
}
else
{
- pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> v_shift);
- plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end, h_shift );
+ pix = frame->plane[i] + (starty*stride >> v_shift);
+ plane_expand_border( pix, stride, width, height, padh, padv, pad_top, pad_bot, h_shift );
}
}
}
x264_pthread_mutex_unlock( &frame->mutex );
}
+void x264_threadslice_cond_broadcast( x264_t *h, int pass )
+{
+ x264_pthread_mutex_lock( &h->mutex );
+ h->i_threadslice_pass = pass;
+ if( pass > 0 )
+ x264_pthread_cond_broadcast( &h->cv );
+ x264_pthread_mutex_unlock( &h->mutex );
+}
+
+void x264_threadslice_cond_wait( x264_t *h, int pass )
+{
+ x264_pthread_mutex_lock( &h->mutex );
+ while( h->i_threadslice_pass < pass )
+ x264_pthread_cond_wait( &h->cv, &h->mutex );
+ x264_pthread_mutex_unlock( &h->mutex );
+}
+
/* list operators */
void x264_frame_push( x264_frame_t **list, x264_frame_t *frame )
return -10.0 * log10( inv_ssim );
}
+static int x264_threadpool_wait_all( x264_t *h )
+{
+ for( int i = 0; i < h->param.i_threads; i++ )
+ if( h->thread[i]->b_thread_active )
+ {
+ h->thread[i]->b_thread_active = 0;
+ if( (intptr_t)x264_threadpool_wait( h->threadpool, h->thread[i] ) < 0 )
+ return -1;
+ }
+ return 0;
+}
+
static void x264_frame_dump( x264_t *h )
{
FILE *f = fopen( h->param.psz_dump_yuv, "r+b" );
if( !f )
return;
+ /* Wait for the threads to finish deblocking */
+ if( h->param.b_sliced_threads )
+ x264_threadpool_wait_all( h );
+
/* Write the frame in display order */
int frame_size = FRAME_SIZE( h->param.i_height * h->param.i_width * sizeof(pixel) );
fseek( f, (uint64_t)h->fdec->i_frame * frame_size, SEEK_SET );
h->param.i_nal_hrd = X264_NAL_HRD_VBR;
}
- if( h->param.psz_dump_yuv )
- h->param.b_full_recon = 1;
-
/* ensure the booleans are 0 or 1 so they can be used in math */
#define BOOLIFY(x) h->param.x = !!h->param.x
BOOLIFY( b_cabac );
goto fail;
h->thread[0] = h;
- for( int i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
- CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );
+ for( int i = 0; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
+ {
+ if( i )
+ CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );
+ if( i < h->param.i_threads )
+ {
+ if( x264_pthread_mutex_init( &h->thread[i]->mutex, NULL ) )
+ goto fail;
+ if( x264_pthread_cond_init( &h->thread[i]->cv, NULL ) )
+ goto fail;
+ }
+ }
for( int i = 0; i < h->param.i_threads; i++ )
{
****************************************************************************/
int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
{
+ /* If the previous frame isn't done encoding, reconfiguring is probably dangerous. */
+ if( h->param.b_sliced_threads )
+ if( x264_threadpool_wait_all( h ) < 0 )
+ return -1;
+
int rc_reconfig = 0;
h = h->thread[h->thread[0]->i_thread_phase];
x264_set_aspect_ratio( h, param, 0 );
h->mb.pic.i_fref[1] = h->i_ref[1];
}
-static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
+static void x264_fdec_filter_row( x264_t *h, int mb_y, int pass )
{
/* mb_y is the mb to be encoded next, not the mb to be filtered here */
int b_hpel = h->fdec->b_kept_as_ref;
* above each MB, as bS=4 doesn't happen for the top of interlaced mbpairs. */
int minpix_y = min_y*16 - 4 * !b_start;
int maxpix_y = mb_y*16 - 4 * !b_end;
- b_deblock &= b_hpel || h->param.b_full_recon;
- if( h->param.b_sliced_threads && b_start && min_y && !b_inloop )
+ b_deblock &= b_hpel || h->param.b_full_recon || h->param.psz_dump_yuv;
+ if( h->param.b_sliced_threads )
{
- b_deblock = 0; /* We already deblocked on the inloop pass. */
- b_measure_quality = 0; /* We already measured quality on the inloop pass. */
+ switch( pass )
+ {
+ /* During encode: only do deblock if asked for */
+ default:
+ case 0:
+ b_deblock &= h->param.b_full_recon;
+ b_hpel = 0;
+ break;
+ /* During post-encode pass: do deblock if not done yet, do hpel for all
+ * rows except those between slices. */
+ case 1:
+ b_deblock &= !h->param.b_full_recon;
+ b_hpel &= !(b_start && min_y > 0);
+ b_measure_quality = 0;
+ break;
+ /* Final pass: do the rows between slices in sequence. */
+ case 2:
+ b_deblock = 0;
+ b_measure_quality = 0;
+ break;
+ }
}
if( mb_y & SLICE_MBAFF )
return;
/* FIXME: Prediction requires different borders for interlaced/progressive mc,
* but the actual image data is equivalent. For now, maintain this
* consistency by copying deblocked pixels between planes. */
- if( PARAM_INTERLACED )
+ if( PARAM_INTERLACED && (!h->param.b_sliced_threads || pass == 1) )
for( int p = 0; p < h->fdec->i_plane; p++ )
for( int i = minpix_y>>(CHROMA_V_SHIFT && p); i < maxpix_y>>(CHROMA_V_SHIFT && p); i++ )
memcpy( h->fdec->plane_fld[p] + i*h->fdec->i_stride[p],
h->fdec->plane[p] + i*h->fdec->i_stride[p],
h->mb.i_mb_width*16*sizeof(pixel) );
+ if( h->fdec->b_kept_as_ref && (!h->param.b_sliced_threads || pass == 1) )
+ x264_frame_expand_border( h, h->fdec, min_y );
if( b_hpel )
{
int end = mb_y == h->mb.i_mb_height;
- x264_frame_expand_border( h, h->fdec, min_y, end );
+ /* Can't do hpel until the previous slice is done encoding. */
if( h->param.analyse.i_subpel_refine )
{
x264_frame_filter( h, h->fdec, min_y, end );
}
}
- if( SLICE_MBAFF )
+ if( SLICE_MBAFF && pass == 0 )
for( int i = 0; i < 3; i++ )
{
XCHG( pixel *, h->intra_border_backup[0][i], h->intra_border_backup[3][i] );
int orig_last_mb = h->sh.i_last_mb;
uint8_t *last_emu_check;
x264_bs_bak_t bs_bak[2];
- b_deblock &= b_hpel || h->param.b_full_recon;
+ b_deblock &= b_hpel || h->param.b_full_recon || h->param.psz_dump_yuv;
bs_realign( &h->out.bs );
/* Slice */
if( !(i_mb_y & SLICE_MBAFF) && h->param.rc.i_vbv_buffer_size )
x264_bitstream_backup( h, &bs_bak[1], i_skip, 1 );
if( !h->mb.b_reencode_mb )
- x264_fdec_filter_row( h, i_mb_y, 1 );
+ x264_fdec_filter_row( h, i_mb_y, 0 );
}
if( !(i_mb_y & SLICE_MBAFF) && back_up_bitstream )
+ (h->out.i_nal*NALU_OVERHEAD * 8)
- h->stat.frame.i_tex_bits
- h->stat.frame.i_mv_bits;
- x264_fdec_filter_row( h, h->i_threadslice_end, 1 );
+ x264_fdec_filter_row( h, h->i_threadslice_end, 0 );
+
+ if( h->param.b_sliced_threads )
+ {
+ /* Tell the main thread we're done. */
+ x264_threadslice_cond_broadcast( h, 1 );
+ /* Do hpel now */
+ for( int mb_y = h->i_threadslice_start; mb_y <= h->i_threadslice_end; mb_y++ )
+ x264_fdec_filter_row( h, mb_y, 1 );
+ x264_threadslice_cond_broadcast( h, 2 );
+ /* Do the first row of hpel, now that the previous slice is done */
+ if( h->i_thread_idx > 0 )
+ {
+ x264_threadslice_cond_wait( h->thread[h->i_thread_idx-1], 2 );
+ x264_fdec_filter_row( h, h->i_threadslice_start + (1 << SLICE_MBAFF), 2 );
+ }
+ }
}
return 0;
#if HAVE_VISUALIZE
if( h->param.b_visualize )
if( x264_visualize_init( h ) )
- return (void *)-1;
+ goto fail;
#endif
/* init stats */
}
h->sh.i_last_mb = X264_MIN( h->sh.i_last_mb, last_thread_mb );
if( x264_stack_align( x264_slice_write, h ) )
- return (void *)-1;
+ goto fail;
h->sh.i_first_mb = h->sh.i_last_mb + 1;
// if i_first_mb is not the last mb in a row then go to the next mb in MBAFF order
if( SLICE_MBAFF && h->sh.i_first_mb % h->mb.i_mb_width )
#endif
return (void *)0;
+
+fail:
+ /* Tell other threads we're done, so they wouldn't wait for it */
+ if( h->param.b_sliced_threads )
+ x264_threadslice_cond_broadcast( h, 2 );
+ return (void *)-1;
}
static int x264_threaded_slices_write( x264_t *h )
x264_threads_distribute_ratecontrol( h );
- /* dispatch */
+ /* setup */
for( int i = 0; i < h->param.i_threads; i++ )
{
- x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h->thread[i] );
+ h->thread[i]->i_thread_idx = i;
h->thread[i]->b_thread_active = 1;
+ x264_threadslice_cond_broadcast( h->thread[i], 0 );
}
+ /* dispatch */
for( int i = 0; i < h->param.i_threads; i++ )
- {
- h->thread[i]->b_thread_active = 0;
- if( (intptr_t)x264_threadpool_wait( h->threadpool, h->thread[i] ) )
- return -1;
- }
-
- /* Go back and fix up the hpel on the borders between slices. */
- for( int i = 1; i < h->param.i_threads; i++ )
- {
- x264_fdec_filter_row( h->thread[i], h->thread[i]->i_threadslice_start + 1, 0 );
- if( SLICE_MBAFF )
- x264_fdec_filter_row( h->thread[i], h->thread[i]->i_threadslice_start + 2, 0 );
- }
+ x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h->thread[i] );
+ /* wait */
+ for( int i = 0; i < h->param.i_threads; i++ )
+ x264_threadslice_cond_wait( h->thread[i], 1 );
x264_threads_merge_ratecontrol( h );
x264_cpu_mask_misalign_sse();
#endif
- // ok to call this before encoding any frames, since the initial values of fdec have b_kept_as_ref=0
- if( x264_reference_update( h ) )
- return -1;
- h->fdec->i_lines_completed = -1;
-
/* no data out */
*pi_nal = 0;
*pp_nal = NULL;
/* ------------------- Get frame to be encoded ------------------------- */
/* 4: get picture to encode */
h->fenc = x264_frame_shift( h->frames.current );
+
+ /* If applicable, wait for previous frame reconstruction to finish */
+ if( h->param.b_sliced_threads )
+ if( x264_threadpool_wait_all( h ) < 0 )
+ return -1;
+
if( h->i_frame == h->i_thread_frames - 1 )
h->i_reordered_pts_delay = h->fenc->i_reordered_pts;
if( h->fenc->param )
h->fenc->param->param_free( h->fenc->param );
}
+ // ok to call this before encoding any frames, since the initial values of fdec have b_kept_as_ref=0
+ if( x264_reference_update( h ) )
+ return -1;
+ h->fdec->i_lines_completed = -1;
+
if( !IS_X264_TYPE_I( h->fenc->i_type ) )
{
int valid_refs_left = 0;
{
char psz_message[80];
- if( h->b_thread_active )
+ if( !h->param.b_sliced_threads && h->b_thread_active )
{
h->b_thread_active = 0;
if( (intptr_t)x264_threadpool_wait( h->threadpool, h ) )
x264_lookahead_delete( h );
+ if( h->param.b_sliced_threads )
+ x264_threadpool_wait_all( h );
if( h->param.i_threads > 1 )
x264_threadpool_delete( h->threadpool );
if( h->i_thread_frames > 1 )
x264_free( h->nal_buffer );
x264_analyse_free_costs( h );
- if( h->i_thread_frames > 1)
+ if( h->i_thread_frames > 1 )
h = h->thread[h->i_thread_phase];
/* frames */
}
x264_macroblock_thread_free( h->thread[i], 0 );
x264_free( h->thread[i]->out.p_bitstream );
- x264_free( h->thread[i]->out.nal);
+ x264_free( h->thread[i]->out.nal );
+ x264_pthread_mutex_destroy( &h->thread[i]->mutex );
+ x264_pthread_cond_destroy( &h->thread[i]->cv );
x264_free( h->thread[i] );
}
}