Tiny performance improvement with fast settings and lots of threads.
May help more on some OSs with slow thread creation, like OS X.
Unify inconsistent synchronized abbreviations to sync.
CONFIG := $(shell cat config.h)
-# Optional muxer module sources
+# Optional module sources
ifneq ($(findstring HAVE_AVS, $(CONFIG)),)
SRCCLI += input/avs.c
endif
ifneq ($(findstring HAVE_PTHREAD, $(CONFIG)),)
SRCCLI += input/thread.c
+SRCS += common/threadpool.c
endif
ifneq ($(findstring HAVE_LAVF, $(CONFIG)),)
#include "cabac.h"
#include "quant.h"
#include "cpu.h"
+#include "threadpool.h"
/****************************************************************************
* General functions
int i_last_keyframe;
int i_slicetype_length;
x264_frame_t *last_nonb;
- x264_synch_frame_list_t ifbuf;
- x264_synch_frame_list_t next;
- x264_synch_frame_list_t ofbuf;
+ x264_pthread_t thread_handle;
+ x264_sync_frame_list_t ifbuf;
+ x264_sync_frame_list_t next;
+ x264_sync_frame_list_t ofbuf;
} x264_lookahead_t;
typedef struct x264_ratecontrol_t x264_ratecontrol_t;
x264_param_t param;
x264_t *thread[X264_THREAD_MAX+1];
- x264_pthread_t thread_handle;
int b_thread_active;
int i_thread_phase; /* which thread to use for the next frame */
int i_threadslice_start; /* first row in this thread slice */
int i_threadslice_end; /* row after the end of this thread slice */
+ x264_threadpool_t *threadpool;
/* bitstream output */
struct
x264_free( list );
}
-int x264_synch_frame_list_init( x264_synch_frame_list_t *slist, int max_size )
+int x264_sync_frame_list_init( x264_sync_frame_list_t *slist, int max_size )
{
if( max_size < 0 )
return -1;
return -1;
}
-void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist )
+void x264_sync_frame_list_delete( x264_sync_frame_list_t *slist )
{
x264_pthread_mutex_destroy( &slist->mutex );
x264_pthread_cond_destroy( &slist->cv_fill );
x264_frame_delete_list( slist->list );
}
-void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *frame )
+void x264_sync_frame_list_push( x264_sync_frame_list_t *slist, x264_frame_t *frame )
{
x264_pthread_mutex_lock( &slist->mutex );
while( slist->i_size == slist->i_max_size )
x264_pthread_mutex_unlock( &slist->mutex );
x264_pthread_cond_broadcast( &slist->cv_fill );
}
+
+x264_frame_t *x264_sync_frame_list_pop( x264_sync_frame_list_t *slist )
+{
+ x264_frame_t *frame;
+ x264_pthread_mutex_lock( &slist->mutex );
+ while( !slist->i_size )
+ x264_pthread_cond_wait( &slist->cv_fill, &slist->mutex );
+ frame = slist->list[ --slist->i_size ];
+ slist->list[ slist->i_size ] = NULL;
+ x264_pthread_cond_broadcast( &slist->cv_empty );
+ x264_pthread_mutex_unlock( &slist->mutex );
+ return frame;
+}
x264_pthread_mutex_t mutex;
x264_pthread_cond_t cv_fill; /* event signaling that the list became fuller */
x264_pthread_cond_t cv_empty; /* event signaling that the list became emptier */
-} x264_synch_frame_list_t;
+} x264_sync_frame_list_t;
typedef void (*x264_deblock_inter_t)( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
typedef void (*x264_deblock_intra_t)( pixel *pix, int stride, int alpha, int beta );
void x264_frame_sort( x264_frame_t **list, int b_dts );
void x264_frame_delete_list( x264_frame_t **list );
-int x264_synch_frame_list_init( x264_synch_frame_list_t *slist, int nelem );
-void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist );
-void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *frame );
+int x264_sync_frame_list_init( x264_sync_frame_list_t *slist, int nelem );
+void x264_sync_frame_list_delete( x264_sync_frame_list_t *slist );
+void x264_sync_frame_list_push( x264_sync_frame_list_t *slist, x264_frame_t *frame );
+x264_frame_t *x264_sync_frame_list_pop( x264_sync_frame_list_t *slist );
#define x264_frame_sort_dts(list) x264_frame_sort(list, 1)
#define x264_frame_sort_pts(list) x264_frame_sort(list, 0)
--- /dev/null
+/*****************************************************************************
+ * threadpool.c: x264 threadpool module
+ *****************************************************************************
+ * Copyright (C) 2010 Steven Walters <kemuri9@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+#include "common.h"
+
+typedef struct
+{
+ void *(*func)(void *);
+ void *arg;
+ void *ret;
+} x264_threadpool_job_t;
+
+struct x264_threadpool_t
+{
+ int exit;
+ int threads;
+ x264_pthread_t *thread_handle;
+ void (*init_func)(void *);
+ void *init_arg;
+
+ /* requires a synchronized list structure and associated methods,
+ so use what is already implemented for frames */
+ x264_sync_frame_list_t uninit; /* list of jobs that are awaiting use */
+ x264_sync_frame_list_t run; /* list of jobs that are queued for processing by the pool */
+ x264_sync_frame_list_t done; /* list of jobs that have finished processing */
+};
+
+static void x264_threadpool_thread( x264_threadpool_t *pool )
+{
+ if( pool->init_func )
+ pool->init_func( pool->init_arg );
+
+ while( !pool->exit )
+ {
+ x264_threadpool_job_t *job = NULL;
+ x264_pthread_mutex_lock( &pool->run.mutex );
+ while( !pool->exit && !pool->run.i_size )
+ x264_pthread_cond_wait( &pool->run.cv_fill, &pool->run.mutex );
+ if( pool->run.i_size )
+ {
+ job = (void*)x264_frame_shift( pool->run.list );
+ pool->run.i_size--;
+ }
+ x264_pthread_mutex_unlock( &pool->run.mutex );
+ if( !job )
+ continue;
+ job->ret = job->func( job->arg ); /* execute the function */
+ x264_sync_frame_list_push( &pool->done, (void*)job );
+ }
+}
+
+int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
+ void (*init_func)(void *), void *init_arg )
+{
+ if( threads <= 0 )
+ return -1;
+
+ x264_threadpool_t *pool;
+ CHECKED_MALLOCZERO( pool, sizeof(x264_threadpool_t) );
+ *p_pool = pool;
+
+ pool->init_func = init_func;
+ pool->init_arg = init_arg;
+ pool->threads = X264_MIN( threads, X264_THREAD_MAX );
+
+ CHECKED_MALLOC( pool->thread_handle, pool->threads * sizeof(x264_pthread_t) );
+
+ if( x264_sync_frame_list_init( &pool->uninit, pool->threads ) ||
+ x264_sync_frame_list_init( &pool->run, pool->threads ) ||
+ x264_sync_frame_list_init( &pool->done, pool->threads ) )
+ goto fail;
+
+ for( int i = 0; i < pool->threads; i++ )
+ {
+ x264_threadpool_job_t *job;
+ CHECKED_MALLOC( job, sizeof(x264_threadpool_job_t) );
+ x264_sync_frame_list_push( &pool->uninit, (void*)job );
+ }
+ for( int i = 0; i < pool->threads; i++ )
+ if( x264_pthread_create( pool->thread_handle+i, NULL, (void*)x264_threadpool_thread, pool ) )
+ goto fail;
+
+ return 0;
+fail:
+ return -1;
+}
+
+void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg )
+{
+ x264_threadpool_job_t *job = (void*)x264_sync_frame_list_pop( &pool->uninit );
+ job->func = func;
+ job->arg = arg;
+ x264_sync_frame_list_push( &pool->run, (void*)job );
+}
+
+void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg )
+{
+ x264_threadpool_job_t *job = NULL;
+
+ x264_pthread_mutex_lock( &pool->done.mutex );
+ while( !job )
+ {
+ for( int i = 0; i < pool->done.i_size; i++ )
+ {
+ x264_threadpool_job_t *t = (void*)pool->done.list[i];
+ if( t->arg == arg )
+ {
+ job = (void*)x264_frame_shift( pool->done.list+i );
+ pool->done.i_size--;
+ }
+ }
+ if( !job )
+ x264_pthread_cond_wait( &pool->done.cv_fill, &pool->done.mutex );
+ }
+ x264_pthread_mutex_unlock( &pool->done.mutex );
+
+ void *ret = job->ret;
+ x264_sync_frame_list_push( &pool->uninit, (void*)job );
+ return ret;
+}
+
+static void x264_threadpool_list_delete( x264_sync_frame_list_t *slist )
+{
+ for( int i = 0; slist->list[i]; i++ )
+ {
+ x264_free( slist->list[i] );
+ slist->list[i] = NULL;
+ }
+ x264_sync_frame_list_delete( slist );
+}
+
+void x264_threadpool_delete( x264_threadpool_t *pool )
+{
+ x264_pthread_mutex_lock( &pool->run.mutex );
+ pool->exit = 1;
+ x264_pthread_cond_broadcast( &pool->run.cv_fill );
+ x264_pthread_mutex_unlock( &pool->run.mutex );
+ for( int i = 0; i < pool->threads; i++ )
+ x264_pthread_join( pool->thread_handle[i], NULL );
+
+ x264_threadpool_list_delete( &pool->uninit );
+ x264_threadpool_list_delete( &pool->run );
+ x264_threadpool_list_delete( &pool->done );
+ x264_free( pool->thread_handle );
+ x264_free( pool );
+}
--- /dev/null
+/*****************************************************************************
+ * threadpool.h: x264 threadpool module
+ *****************************************************************************
+ * Copyright (C) 2010 Steven Walters <kemuri9@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+#ifndef X264_THREADPOOL_H
+#define X264_THREADPOOL_H
+
+typedef struct x264_threadpool_t x264_threadpool_t;
+
+#if HAVE_PTHREAD
+int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
+ void (*init_func)(void *), void *init_arg );
+void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg );
+void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg );
+void x264_threadpool_delete( x264_threadpool_t *pool );
+#else
+#define x264_threadpool_init(p,t,f,a) -1
+#define x264_threadpool_run(p,f,a)
+#define x264_threadpool_wait(p,a) NULL
+#define x264_threadpool_delete(p)
+#endif
+
+#endif
return -1;
}
+#if HAVE_PTHREAD
+static void x264_encoder_thread_init( x264_t *h )
+{
+ if( h->param.i_sync_lookahead )
+ x264_lower_thread_priority( 10 );
+
+#if HAVE_MMX
+ /* Misalign mask has to be set separately for each thread. */
+ if( h->param.cpu&X264_CPU_SSE_MISALIGN )
+ x264_cpu_mask_misalign_sse();
+#endif
+}
+#endif
+
/****************************************************************************
*
****************************************************************************
CHECKED_MALLOC( h->nal_buffer, h->out.i_bitstream * 3/2 + 4 );
h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4;
+ if( h->param.i_threads > 1 &&
+ x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
+ goto fail;
+
h->thread[0] = h;
for( int i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );
{
int i_slice_num = 0;
int last_thread_mb = h->sh.i_last_mb;
- if( h->param.i_sync_lookahead )
- x264_lower_thread_priority( 10 );
-
-#if HAVE_MMX
- /* Misalign mask has to be set separately for each thread. */
- if( h->param.cpu&X264_CPU_SSE_MISALIGN )
- x264_cpu_mask_misalign_sse();
-#endif
#if HAVE_VISUALIZE
if( h->param.b_visualize )
static int x264_threaded_slices_write( x264_t *h )
{
- void *ret = NULL;
-#if HAVE_MMX
- if( h->param.cpu&X264_CPU_SSE_MISALIGN )
- x264_cpu_mask_misalign_sse();
-#endif
/* set first/last mb and sync contexts */
for( int i = 0; i < h->param.i_threads; i++ )
{
/* dispatch */
for( int i = 0; i < h->param.i_threads; i++ )
{
- if( x264_pthread_create( &h->thread[i]->thread_handle, NULL, (void*)x264_slices_write, (void*)h->thread[i] ) )
- return -1;
+ x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h->thread[i] );
h->thread[i]->b_thread_active = 1;
}
for( int i = 0; i < h->param.i_threads; i++ )
{
- x264_pthread_join( h->thread[i]->thread_handle, &ret );
h->thread[i]->b_thread_active = 0;
- if( (intptr_t)ret )
- return (intptr_t)ret;
+ if( (intptr_t)x264_threadpool_wait( h->threadpool, h->thread[i] ) )
+ return -1;
}
/* Go back and fix up the hpel on the borders between slices. */
thread_current =
thread_oldest = h;
}
+#if HAVE_MMX
+ if( h->i_thread_frames == 1 && h->param.cpu&X264_CPU_SSE_MISALIGN )
+ x264_cpu_mask_misalign_sse();
+#endif
// ok to call this before encoding any frames, since the initial values of fdec have b_kept_as_ref=0
if( x264_reference_update( h ) )
h->i_threadslice_end = h->mb.i_mb_height;
if( h->i_thread_frames > 1 )
{
- if( x264_pthread_create( &h->thread_handle, NULL, (void*)x264_slices_write, h ) )
- return -1;
+ x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h );
h->b_thread_active = 1;
}
else if( h->param.b_sliced_threads )
if( h->b_thread_active )
{
- void *ret = NULL;
- x264_pthread_join( h->thread_handle, &ret );
h->b_thread_active = 0;
- if( (intptr_t)ret )
- return (intptr_t)ret;
+ if( (intptr_t)x264_threadpool_wait( h->threadpool, h ) )
+ return -1;
}
if( !h->out.i_nal )
{
x264_lookahead_delete( h );
if( h->param.i_threads > 1 )
+ x264_threadpool_delete( h->threadpool );
+ if( h->i_thread_frames > 1 )
{
- // don't strictly have to wait for the other threads, but it's simpler than canceling them
- for( int i = 0; i < h->param.i_threads; i++ )
+ for( int i = 0; i < h->i_thread_frames; i++ )
if( h->thread[i]->b_thread_active )
- x264_pthread_join( h->thread[i]->thread_handle, NULL );
- if( h->i_thread_frames > 1 )
- {
- for( int i = 0; i < h->i_thread_frames; i++ )
- if( h->thread[i]->b_thread_active )
- {
- assert( h->thread[i]->fenc->i_reference_count == 1 );
- x264_frame_delete( h->thread[i]->fenc );
- }
+ {
+ assert( h->thread[i]->fenc->i_reference_count == 1 );
+ x264_frame_delete( h->thread[i]->fenc );
+ }
- x264_t *thread_prev = h->thread[h->i_thread_phase];
- x264_thread_sync_ratecontrol( h, thread_prev, h );
- x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
- h->i_frame = thread_prev->i_frame + 1 - h->i_thread_frames;
- }
+ x264_t *thread_prev = h->thread[h->i_thread_phase];
+ x264_thread_sync_ratecontrol( h, thread_prev, h );
+ x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
+ h->i_frame = thread_prev->i_frame + 1 - h->i_thread_frames;
}
h->i_frame++;
#include "common/common.h"
#include "analyse.h"
-static void x264_lookahead_shift( x264_synch_frame_list_t *dst, x264_synch_frame_list_t *src, int count )
+static void x264_lookahead_shift( x264_sync_frame_list_t *dst, x264_sync_frame_list_t *src, int count )
{
int i = count;
while( i-- )
look->i_slicetype_length = i_slicetype_length;
/* init frame lists */
- if( x264_synch_frame_list_init( &look->ifbuf, h->param.i_sync_lookahead+3 ) ||
- x264_synch_frame_list_init( &look->next, h->frames.i_delay+3 ) ||
- x264_synch_frame_list_init( &look->ofbuf, h->frames.i_delay+3 ) )
+ if( x264_sync_frame_list_init( &look->ifbuf, h->param.i_sync_lookahead+3 ) ||
+ x264_sync_frame_list_init( &look->next, h->frames.i_delay+3 ) ||
+ x264_sync_frame_list_init( &look->ofbuf, h->frames.i_delay+3 ) )
goto fail;
if( !h->param.i_sync_lookahead )
if( x264_macroblock_thread_allocate( look_h, 1 ) < 0 )
goto fail;
- if( x264_pthread_create( &look_h->thread_handle, NULL, (void *)x264_lookahead_thread, look_h ) )
+ if( x264_pthread_create( &look->thread_handle, NULL, (void*)x264_lookahead_thread, look_h ) )
goto fail;
look->b_thread_active = 1;
h->lookahead->b_exit_thread = 1;
x264_pthread_cond_broadcast( &h->lookahead->ifbuf.cv_fill );
x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
- x264_pthread_join( h->thread[h->param.i_threads]->thread_handle, NULL );
+ x264_pthread_join( h->lookahead->thread_handle, NULL );
x264_macroblock_cache_free( h->thread[h->param.i_threads] );
x264_macroblock_thread_free( h->thread[h->param.i_threads], 1 );
x264_free( h->thread[h->param.i_threads] );
}
- x264_synch_frame_list_delete( &h->lookahead->ifbuf );
- x264_synch_frame_list_delete( &h->lookahead->next );
+ x264_sync_frame_list_delete( &h->lookahead->ifbuf );
+ x264_sync_frame_list_delete( &h->lookahead->next );
if( h->lookahead->last_nonb )
x264_frame_push_unused( h, h->lookahead->last_nonb );
- x264_synch_frame_list_delete( &h->lookahead->ofbuf );
+ x264_sync_frame_list_delete( &h->lookahead->ofbuf );
x264_free( h->lookahead );
}
void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame )
{
if( h->param.i_sync_lookahead )
- x264_synch_frame_list_push( &h->lookahead->ifbuf, frame );
+ x264_sync_frame_list_push( &h->lookahead->ifbuf, frame );
else
- x264_synch_frame_list_push( &h->lookahead->next, frame );
+ x264_sync_frame_list_push( &h->lookahead->next, frame );
}
int x264_lookahead_is_empty( x264_t *h )
cli_input_t input;
hnd_t p_handle;
x264_picture_t pic;
- x264_pthread_t tid;
+ x264_threadpool_t *pool;
int next_frame;
int frame_total;
- int in_progress;
struct thread_input_arg_t *next_args;
} thread_hnd_t;
}
h->input = input;
h->p_handle = *p_handle;
- h->in_progress = 0;
h->next_frame = -1;
h->next_args = malloc( sizeof(thread_input_arg_t) );
if( !h->next_args )
thread_input.picture_alloc = h->input.picture_alloc;
thread_input.picture_clean = h->input.picture_clean;
+ if( x264_threadpool_init( &h->pool, 1, NULL, NULL ) )
+ return -1;
+
*p_handle = h;
return 0;
}
if( h->next_frame >= 0 )
{
- x264_pthread_join( h->tid, NULL );
+ x264_threadpool_wait( h->pool, h->next_args );
ret |= h->next_args->status;
- h->in_progress = 0;
}
if( h->next_frame == i_frame )
h->next_frame =
h->next_args->i_frame = i_frame+1;
h->next_args->pic = &h->pic;
- if( x264_pthread_create( &h->tid, NULL, (void*)read_frame_thread_int, h->next_args ) )
- return -1;
- h->in_progress = 1;
+ x264_threadpool_run( h->pool, (void*)read_frame_thread_int, h->next_args );
}
else
h->next_frame = -1;
static int close_file( hnd_t handle )
{
thread_hnd_t *h = handle;
- if( h->in_progress )
- x264_pthread_join( h->tid, NULL );
+ x264_threadpool_delete( h->pool );
h->input.close_file( h->p_handle );
h->input.picture_clean( &h->pic );
free( h->next_args );