Move lookahead into a separate thread, set to higher priority than the other threads, for optimal performance.
Reduces the amount that lookahead bottlenecks encoding, greatly increasing performance with lookahead-intensive settings (e.g. b-adapt 2) on many-core CPUs.
Buffer size can be controlled with --sync-lookahead, which defaults to auto (threads+bframes buffer size).
Note that this buffer is separate from the rc-lookahead value.
Note also that this does not split lookahead itself into multiple threads yet; this may be added in the future.
Additionally, split frames into "fdec" and "fenc" frame types and keep the two separate.
This split greatly reduces memory usage, which helps compensate for the larger lookahead size.
Extremely special thanks to Michael Kazmier and Alex Giladi of Avail Media, the original authors of this patch.
common/quant.c common/vlc.c \
encoder/analyse.c encoder/me.c encoder/ratecontrol.c \
encoder/set.c encoder/macroblock.c encoder/cabac.c \
- encoder/cavlc.c encoder/encoder.c
+ encoder/cavlc.c encoder/encoder.c encoder/lookahead.c
SRCCLI = x264.c matroska.c muxers.c
param->cpu = x264_cpu_detect();
param->i_threads = X264_THREADS_AUTO;
param->b_deterministic = 1;
+ param->i_sync_lookahead = X264_SYNC_LOOKAHEAD_AUTO;
/* Video properties */
param->i_csp = X264_CSP_I420;
else
p->i_threads = atoi(value);
}
+ OPT("sync-lookahead")
+ {
+ if( !strcmp(value, "auto") )
+ p->i_sync_lookahead = X264_SYNC_LOOKAHEAD_AUTO;
+ else
+ p->i_sync_lookahead = atoi(value);
+ }
OPT2("deterministic", "n-deterministic")
p->b_deterministic = atobool(value);
OPT2("level", "level-idc")
} x264_slice_header_t;
+typedef struct x264_lookahead_t
+{
+ uint8_t b_thread_active;
+ uint8_t b_exit_thread;
+ uint8_t b_analyse_keyframe;
+ int i_last_idr;
+ int i_slicetype_length;
+ x264_frame_t *last_nonb;
+ x264_synch_frame_list_t ifbuf;
+ x264_synch_frame_list_t next;
+ x264_synch_frame_list_t ofbuf;
+} x264_lookahead_t;
+
/* From ffmpeg
*/
#define X264_SCAN8_SIZE (6*8)
/* encoder parameters */
x264_param_t param;
- x264_t *thread[X264_THREAD_MAX];
+ x264_t *thread[X264_THREAD_MAX+1];
x264_pthread_t thread_handle;
int b_thread_active;
int i_thread_phase; /* which thread to use for the next frame */
struct
{
/* Frames to be encoded (whose types have been decided) */
- x264_frame_t *current[X264_LOOKAHEAD_MAX+3];
- /* Temporary buffer (frames types not yet decided) */
- x264_frame_t *next[X264_LOOKAHEAD_MAX+3];
- /* Unused frames */
- x264_frame_t *unused[X264_LOOKAHEAD_MAX + X264_THREAD_MAX*2 + 16+4];
- /* For adaptive B decision */
- x264_frame_t *last_nonb;
+ x264_frame_t **current;
+ /* Unused frames: 0 = fenc, 1 = fdec */
+ x264_frame_t **unused[2];
/* frames used for reference + sentinels */
x264_frame_t *reference[16+2];
#if VISUALIZE
struct visualize_t *visualize;
#endif
+ x264_lookahead_t *lookahead;
};
// included at the end because it needs x264_t
* gcc 4.2 introduced __attribute__((force_align_arg_pointer)) to fix this
* problem, but I don't want to require such a new version.
* This applies only to x86_32, since other architectures that need alignment
- * also have ABIs that ensure aligned stack. */
+ * either have ABIs that ensure aligned stack, or don't support it at all. */
#if defined(ARCH_X86) && defined(HAVE_MMX)
-int x264_stack_align( void (*func)(x264_t*), x264_t *arg );
-#define x264_stack_align(func,arg) x264_stack_align((void (*)(x264_t*))func,arg)
+int x264_stack_align( void (*func)(), ... );
+#define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
#else
-#define x264_stack_align(func,arg) func(arg)
+#define x264_stack_align(func,...) func(__VA_ARGS__)
#endif
typedef struct {
#define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))
-x264_frame_t *x264_frame_new( x264_t *h )
+x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
{
x264_frame_t *frame;
int i, j;
CHECKED_MALLOC( frame->buffer[i], chroma_plane_size );
frame->plane[i] = frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
}
+
+ for( i = 0; i < h->param.i_bframe + 2; i++ )
+ for( j = 0; j < h->param.i_bframe + 2; j++ )
+ CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
+
+ frame->i_poc = -1;
+ frame->i_type = X264_TYPE_AUTO;
+ frame->i_qpplus1 = 0;
+ frame->i_pts = -1;
+ frame->i_frame = -1;
+ frame->i_frame_num = -1;
+ frame->i_lines_completed = -1;
+ frame->b_fdec = b_fdec;
+
/* all 4 luma planes allocated together, since the cacheline split code
* requires them to be in-phase wrt cacheline alignment. */
- if( h->param.analyse.i_subpel_refine )
+ if( h->param.analyse.i_subpel_refine && b_fdec )
{
CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
for( i = 0; i < 4; i++ )
frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
}
- if( h->frames.b_have_lowres )
+ if( b_fdec ) /* fdec frame */
{
- frame->i_width_lowres = frame->i_width[0]/2;
- frame->i_stride_lowres = ALIGN( frame->i_width_lowres + 2*PADH, align );
- frame->i_lines_lowres = frame->i_lines[0]/2;
-
- luma_plane_size = frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv );
-
- CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size );
- for( i = 0; i < 4; i++ )
- frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * i_padv + PADH) + i * luma_plane_size;
-
- for( j = 0; j <= !!h->param.i_bframe; j++ )
- for( i = 0; i <= h->param.i_bframe; i++ )
- {
- CHECKED_MALLOCZERO( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
- CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
- }
- CHECKED_MALLOC( frame->i_propagate_cost, (i_mb_count+3) * sizeof(uint16_t) );
- for( j = 0; j <= h->param.i_bframe+1; j++ )
- for( i = 0; i <= h->param.i_bframe+1; i++ )
- {
- CHECKED_MALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
- CHECKED_MALLOC( frame->lowres_inter_types[j][i], (i_mb_count+3)/4 * sizeof(uint8_t) );
- }
- frame->i_intra_cost = frame->lowres_costs[0][0];
- memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) );
+ CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
+ CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
+ CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
+ if( h->param.i_bframe )
+ {
+ CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
+ CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
+ }
+ else
+ {
+ frame->mv[1] = NULL;
+ frame->ref[1] = NULL;
+ }
+ CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
+ CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
+ if( h->param.analyse.i_me_method >= X264_ME_ESA )
+ {
+ CHECKED_MALLOC( frame->buffer[3],
+ frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
+ frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
+ }
}
-
- if( h->param.analyse.i_me_method >= X264_ME_ESA )
+ else /* fenc frame */
{
- CHECKED_MALLOC( frame->buffer[3],
- frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
- frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
- }
-
- frame->i_poc = -1;
- frame->i_type = X264_TYPE_AUTO;
- frame->i_qpplus1 = 0;
- frame->i_pts = -1;
- frame->i_frame = -1;
- frame->i_frame_num = -1;
- frame->i_lines_completed = -1;
+ if( h->frames.b_have_lowres )
+ {
+ frame->i_width_lowres = frame->i_width[0]/2;
+ frame->i_stride_lowres = ALIGN( frame->i_width_lowres + 2*PADH, align );
+ frame->i_lines_lowres = frame->i_lines[0]/2;
- CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
- CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
- CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
- if( h->param.i_bframe )
- {
- CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
- CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
- }
- else
- {
- frame->mv[1] = NULL;
- frame->ref[1] = NULL;
- }
+ luma_plane_size = frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv );
- CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
- CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
- for( i = 0; i < h->param.i_bframe + 2; i++ )
- for( j = 0; j < h->param.i_bframe + 2; j++ )
- CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
+ CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size );
+ for( i = 0; i < 4; i++ )
+ frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * i_padv + PADH) + i * luma_plane_size;
- if( h->param.rc.i_aq_mode )
- {
- CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
- CHECKED_MALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
- if( h->frames.b_have_lowres )
- /* shouldn't really be initialized, just silences a valgrind false-positive in x264_mbtree_propagate_cost_sse2 */
- CHECKED_MALLOCZERO( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
+ for( j = 0; j <= !!h->param.i_bframe; j++ )
+ for( i = 0; i <= h->param.i_bframe; i++ )
+ {
+ CHECKED_MALLOCZERO( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
+ CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
+ }
+ CHECKED_MALLOC( frame->i_propagate_cost, (i_mb_count+3) * sizeof(uint16_t) );
+ for( j = 0; j <= h->param.i_bframe+1; j++ )
+ for( i = 0; i <= h->param.i_bframe+1; i++ )
+ {
+ CHECKED_MALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
+ CHECKED_MALLOC( frame->lowres_inter_types[j][i], (i_mb_count+3)/4 * sizeof(uint8_t) );
+ }
+ frame->i_intra_cost = frame->lowres_costs[0][0];
+ memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) );
+ }
+ if( h->param.rc.i_aq_mode )
+ {
+ CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
+ CHECKED_MALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
+ if( h->frames.b_have_lowres )
+ /* shouldn't really be initialized, just silences a valgrind false-positive in x264_mbtree_propagate_cost_sse2 */
+ CHECKED_MALLOCZERO( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
+ }
}
if( x264_pthread_mutex_init( &frame->mutex, NULL ) )
assert( frame->i_reference_count > 0 );
frame->i_reference_count--;
if( frame->i_reference_count == 0 )
- x264_frame_push( h->frames.unused, frame );
- assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL );
+ x264_frame_push( h->frames.unused[frame->b_fdec], frame );
}
-x264_frame_t *x264_frame_pop_unused( x264_t *h )
+x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec )
{
x264_frame_t *frame;
- if( h->frames.unused[0] )
- frame = x264_frame_pop( h->frames.unused );
+ if( h->frames.unused[b_fdec][0] )
+ frame = x264_frame_pop( h->frames.unused[b_fdec] );
else
- frame = x264_frame_new( h );
+ frame = x264_frame_new( h, b_fdec );
if( !frame )
return NULL;
+ frame->b_last_minigop_bframe = 0;
frame->i_reference_count = 1;
frame->b_intra_calculated = 0;
return frame;
}
} while( !b_ok );
}
+
+void x264_frame_delete_list( x264_frame_t **list )
+{
+ int i = 0;
+ while( list[i] )
+ x264_frame_delete( list[i++] );
+ x264_free( list );
+}
+
+int x264_synch_frame_list_init( x264_synch_frame_list_t *slist, int max_size )
+{
+ if( max_size < 0 )
+ return -1;
+ slist->i_max_size = max_size;
+ slist->i_size = 0;
+ CHECKED_MALLOCZERO( slist->list, (max_size+1) * sizeof(x264_frame_t*) );
+ if( x264_pthread_mutex_init( &slist->mutex, NULL ) ||
+ x264_pthread_cond_init( &slist->cv_fill, NULL ) ||
+ x264_pthread_cond_init( &slist->cv_empty, NULL ) )
+ return -1;
+ return 0;
+fail:
+ return -1;
+}
+
+void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist )
+{
+ x264_pthread_mutex_destroy( &slist->mutex );
+ x264_pthread_cond_destroy( &slist->cv_fill );
+ x264_pthread_cond_destroy( &slist->cv_empty );
+ x264_frame_delete_list( slist->list );
+}
+
+void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *frame )
+{
+ x264_pthread_mutex_lock( &slist->mutex );
+ while( slist->i_size == slist->i_max_size )
+ x264_pthread_cond_wait( &slist->cv_empty, &slist->mutex );
+ slist->list[ slist->i_size++ ] = frame;
+ x264_pthread_mutex_unlock( &slist->mutex );
+ x264_pthread_cond_broadcast( &slist->cv_fill );
+}
+
+int x264_synch_frame_list_get_size( x264_synch_frame_list_t *slist )
+{
+ int size;
+ x264_pthread_mutex_lock( &slist->mutex );
+ size = slist->i_size;
+ x264_pthread_mutex_unlock( &slist->mutex );
+ return size;
+}
int i_frame; /* Presentation frame number */
int i_frame_num; /* Coded frame number */
int b_kept_as_ref;
+ uint8_t b_fdec;
+ uint8_t b_last_minigop_bframe; /* this frame is the last b in a sequence of bframes */
+ uint8_t i_bframes; /* number of bframes following this nonb in coded order */
float f_qp_avg_rc; /* QPs as decided by ratecontrol */
float f_qp_avg_aq; /* QPs as decided by AQ in addition to ratecontrol */
} x264_frame_t;
+/* synchronized frame list */
+typedef struct
+{
+ x264_frame_t **list;
+ int i_max_size;
+ int i_size;
+ x264_pthread_mutex_t mutex;
+ x264_pthread_cond_t cv_fill; /* event signaling that the list became fuller */
+ x264_pthread_cond_t cv_empty; /* event signaling that the list became emptier */
+} x264_synch_frame_list_t;
+
typedef void (*x264_deblock_inter_t)( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
typedef void (*x264_deblock_intra_t)( uint8_t *pix, int stride, int alpha, int beta );
typedef struct
x264_deblock_intra_t deblock_h_chroma_intra;
} x264_deblock_function_t;
-x264_frame_t *x264_frame_new( x264_t *h );
+x264_frame_t *x264_frame_new( x264_t *h, int b_fdec );
void x264_frame_delete( x264_frame_t *frame );
int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src );
void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame );
x264_frame_t *x264_frame_shift( x264_frame_t **list );
void x264_frame_push_unused( x264_t *h, x264_frame_t *frame );
-x264_frame_t *x264_frame_pop_unused( x264_t *h );
+x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec );
void x264_frame_sort( x264_frame_t **list, int b_dts );
+void x264_frame_delete_list( x264_frame_t **list );
+
+int x264_synch_frame_list_init( x264_synch_frame_list_t *slist, int nelem );
+void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist );
+void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *frame );
+int x264_synch_frame_list_get_size( x264_synch_frame_list_t *slist );
+
#define x264_frame_sort_dts(list) x264_frame_sort(list, 1)
#define x264_frame_sort_pts(list) x264_frame_sort(list, 0)
for( j=0; j<3; j++ )
{
/* shouldn't really be initialized, just silences a valgrind false-positive in predict_8x8_filter_mmx */
- CHECKED_MALLOCZERO( h->mb.intra_border_backup[i][j], h->fdec->i_stride[j] );
+ CHECKED_MALLOCZERO( h->mb.intra_border_backup[i][j], (h->sps->i_mb_width*16+32)>>!!j );
h->mb.intra_border_backup[i][j] += 8;
}
#define x264_pthread_cond_destroy pthread_cond_destroy
#define x264_pthread_cond_broadcast pthread_cond_broadcast
#define x264_pthread_cond_wait pthread_cond_wait
+#define x264_pthread_attr_t pthread_attr_t
+#define x264_pthread_attr_init pthread_attr_init
+#define x264_pthread_attr_destroy pthread_attr_destroy
#else
#define x264_pthread_mutex_t int
#define x264_pthread_mutex_init(m,f) 0
#define x264_pthread_cond_destroy(c)
#define x264_pthread_cond_broadcast(c)
#define x264_pthread_cond_wait(c,m)
+#define x264_pthread_attr_t int
+#define x264_pthread_attr_init(a) 0
+#define x264_pthread_attr_destroy(a)
#endif
#define WORD_SIZE sizeof(void*)
}
#endif
+#if defined(SYS_LINUX) && defined(HAVE_PTHREAD)
+#include <unistd.h>
+#define x264_lower_thread_priority(p) { UNUSED int nice_ret = nice(p); }
+#else
+#define x264_lower_thread_priority(p)
+#endif
+
#endif /* X264_OSDEP_H */
cglobal x264_stack_align
push ebp
mov ebp, esp
- sub esp, 4
+ sub esp, 8
and esp, ~15
mov ecx, [ebp+8]
mov edx, [ebp+12]
mov [esp], edx
+ mov edx, [ebp+16]
+ mov [esp+4], edx
call ecx
leave
ret
void x264_slicetype_decide( x264_t *h );
int x264_lowres_context_alloc( x264_t *h );
+void x264_slicetype_analyse( x264_t *h, int keyframe );
+
+int x264_lookahead_init( x264_t *h, int i_slicetype_length );
+int x264_lookahead_is_empty( x264_t *h );
+void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame );
+void x264_lookahead_get_frames( x264_t *h );
+void x264_lookahead_delete( x264_t *h );
+
#endif
return -1;
}
- if( h->param.i_threads == 0 )
+ if( h->param.i_threads == X264_THREADS_AUTO )
h->param.i_threads = x264_cpu_num_processors() * 3/2;
h->param.i_threads = x264_clip3( h->param.i_threads, 1, X264_THREAD_MAX );
if( h->param.i_threads > 1 )
h->param.rc.b_mb_tree = 0;
if( h->param.rc.f_qcompress == 1 )
h->param.rc.b_mb_tree = 0;
+#ifdef HAVE_PTHREAD
+ if( h->param.i_sync_lookahead )
+ h->param.i_sync_lookahead = x264_clip3( h->param.i_sync_lookahead, h->param.i_threads + h->param.i_bframe, X264_LOOKAHEAD_MAX );
+ if( h->param.rc.b_stat_read || h->param.i_threads == 1 )
+ h->param.i_sync_lookahead = 0;
+#else
+ h->param.i_sync_lookahead = 0;
+#endif
h->mb.b_direct_auto_write = h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO
&& h->param.i_bframe
{
x264_t *h;
char buf[1000], *p;
- int i;
+ int i, i_slicetype_length;
CHECKED_MALLOCZERO( h, sizeof(x264_t) );
h->frames.i_delay = h->param.i_bframe;
if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size )
h->frames.i_delay = X264_MAX( h->frames.i_delay, h->param.rc.i_lookahead );
+ i_slicetype_length = h->frames.i_delay;
h->frames.i_delay += h->param.i_threads - 1;
h->frames.i_delay = X264_MIN( h->frames.i_delay, X264_LOOKAHEAD_MAX );
+ h->frames.i_delay += h->param.i_sync_lookahead;
h->frames.i_max_ref0 = h->param.i_frame_reference;
h->frames.i_max_ref1 = h->sps->vui.i_num_reorder_frames;
h->frames.i_last_idr = - h->param.i_keyint_max;
h->frames.i_input = 0;
- h->frames.last_nonb = NULL;
+
+ CHECKED_MALLOCZERO( h->frames.unused[0], (h->frames.i_delay + 3) * sizeof(x264_frame_t *) );
+ /* Allocate room for max refs plus a few extra just in case. */
+ CHECKED_MALLOCZERO( h->frames.unused[1], (h->param.i_threads + 20) * sizeof(x264_frame_t *) );
+ CHECKED_MALLOCZERO( h->frames.current, (h->param.i_sync_lookahead + h->param.i_bframe
+ + h->param.i_threads + 3) * sizeof(x264_frame_t *) );
h->i_ref0 = 0;
h->i_ref1 = 0;
h->thread[0] = h;
h->i_thread_num = 0;
- for( i = 1; i < h->param.i_threads; i++ )
+ for( i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );
for( i = 0; i < h->param.i_threads; i++ )
{
if( i > 0 )
*h->thread[i] = *h;
- h->thread[i]->fdec = x264_frame_pop_unused( h );
+ h->thread[i]->fdec = x264_frame_pop_unused( h, 1 );
if( !h->thread[i]->fdec )
goto fail;
CHECKED_MALLOC( h->thread[i]->out.p_bitstream, h->out.i_bitstream );
goto fail;
}
+ if( x264_lookahead_init( h, i_slicetype_length ) )
+ goto fail;
+
if( x264_ratecontrol_new( h ) < 0 )
goto fail;
static inline int x264_reference_update( x264_t *h )
{
- int i;
-
if( h->fdec->i_frame >= 0 )
h->i_frame++;
if( h->param.i_threads > 1 )
{
x264_frame_push_unused( h, h->fdec );
- h->fdec = x264_frame_pop_unused( h );
+ h->fdec = x264_frame_pop_unused( h, 1 );
if( !h->fdec )
return -1;
}
return 0;
}
- /* move lowres copy of the image to the ref frame */
- for( i = 0; i < 4; i++)
- {
- XCHG( uint8_t*, h->fdec->lowres[i], h->fenc->lowres[i] );
- XCHG( uint8_t*, h->fdec->buffer_lowres[i], h->fenc->buffer_lowres[i] );
- }
-
- /* adaptive B decision needs a pointer, since it can't use the ref lists */
- if( h->sh.i_type != SLICE_TYPE_B )
- h->frames.last_nonb = h->fdec;
-
/* move frame in the buffer */
x264_frame_push( h->frames.reference, h->fdec );
if( h->frames.reference[h->frames.i_max_dpb] )
x264_frame_push_unused( h, x264_frame_shift( h->frames.reference ) );
- h->fdec = x264_frame_pop_unused( h );
+ h->fdec = x264_frame_pop_unused( h, 1 );
if( !h->fdec )
return -1;
return 0;
{
int i_frame_size = 0;
int i_slice_num = 0;
+ if( h->param.i_sync_lookahead )
+ x264_lower_thread_priority( 10 );
#ifdef HAVE_MMX
/* Misalign mask has to be set separately for each thread. */
if( pic_in != NULL )
{
/* 1: Copy the picture to a frame and move it to a buffer */
- x264_frame_t *fenc = x264_frame_pop_unused( h );
+ x264_frame_t *fenc = x264_frame_pop_unused( h, 0 );
if( !fenc )
return -1;
fenc->i_frame = h->frames.i_input++;
- x264_frame_push( h->frames.next, fenc );
-
if( h->frames.b_have_lowres )
x264_frame_init_lowres( h, fenc );
else if( h->param.rc.i_aq_mode )
x264_adaptive_quant_frame( h, fenc );
+ /* 2: Place the frame into the queue for its slice type decision */
+ x264_lookahead_put_frame( h, fenc );
+
if( h->frames.i_input <= h->frames.i_delay + 1 - h->param.i_threads )
{
- /* Nothing yet to encode */
- /* waiting for filling bframe buffer */
+ /* Nothing yet to encode, waiting for filling of buffers */
pic_out->i_type = X264_TYPE_AUTO;
return 0;
}
}
-
- if( h->frames.current[0] == NULL )
+ else
{
- int bframes = 0;
- /* 2: Select frame types */
- if( h->frames.next[0] == NULL )
- {
- if( x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ) < 0 )
- return -1;
- return 0;
- }
+ /* signal kills for lookahead thread */
+ h->lookahead->b_exit_thread = 1;
+ x264_pthread_cond_broadcast( &h->lookahead->ifbuf.cv_fill );
+ }
- x264_stack_align( x264_slicetype_decide, h );
+ /* 3: The picture is analyzed in the lookahead */
+ if( !h->frames.current[0] )
+ x264_lookahead_get_frames( h );
- /* 3: move some B-frames and 1 non-B to encode queue */
- while( IS_X264_TYPE_B( h->frames.next[bframes]->i_type ) )
- bframes++;
- x264_frame_push( h->frames.current, x264_frame_shift( &h->frames.next[bframes] ) );
- /* FIXME: when max B-frames > 3, BREF may no longer be centered after GOP closing */
- if( h->param.b_bframe_pyramid && bframes > 1 )
- {
- x264_frame_t *mid = x264_frame_shift( &h->frames.next[bframes/2] );
- mid->i_type = X264_TYPE_BREF;
- x264_frame_push( h->frames.current, mid );
- bframes--;
- }
- while( bframes-- )
- x264_frame_push( h->frames.current, x264_frame_shift( h->frames.next ) );
- }
+ if( !h->frames.current[0] && x264_lookahead_is_empty( h ) )
+ return x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out );
/* ------------------- Get frame to be encoded ------------------------- */
/* 4: get picture to encode */
h->fenc = x264_frame_shift( h->frames.current );
- if( h->fenc == NULL )
- {
- /* Nothing yet to encode (ex: waiting for I/P with B frames) */
- /* waiting for filling bframe buffer */
- pic_out->i_type = X264_TYPE_AUTO;
- return 0;
- }
-
if( h->fenc->param )
{
x264_encoder_reconfig( h, h->fenc->param );
if( h->fenc->i_type == X264_TYPE_IDR )
{
h->frames.i_last_idr = h->fenc->i_frame;
+ h->i_frame_num = 0;
}
/* ------------------- Setup frame context ----------------------------- */
|| h->stat.i_mb_count[SLICE_TYPE_P][I_PCM]
|| h->stat.i_mb_count[SLICE_TYPE_B][I_PCM];
+ x264_lookahead_delete( h );
+
for( i=0; i<h->param.i_threads; i++ )
{
// don't strictly have to wait for the other threads, but it's simpler than canceling them
h = h->thread[ h->i_thread_phase % h->param.i_threads ];
/* frames */
- for( i = 0; h->frames.current[i]; i++ )
- {
- assert( h->frames.current[i]->i_reference_count == 1 );
- x264_frame_delete( h->frames.current[i] );
- }
- for( i = 0; h->frames.next[i]; i++ )
- {
- assert( h->frames.next[i]->i_reference_count == 1 );
- x264_frame_delete( h->frames.next[i] );
- }
- for( i = 0; h->frames.unused[i]; i++ )
- {
- assert( h->frames.unused[i]->i_reference_count == 0 );
- x264_frame_delete( h->frames.unused[i] );
- }
+ x264_frame_delete_list( h->frames.unused[0] );
+ x264_frame_delete_list( h->frames.unused[1] );
+ x264_frame_delete_list( h->frames.current );
h = h->thread[0];
h = h->thread[ h->i_thread_phase % h->param.i_threads ];
for( i=0; h->frames.current[i]; i++ )
delayed_frames++;
- for( i=0; h->frames.next[i]; i++ )
- delayed_frames++;
+ delayed_frames += x264_synch_frame_list_get_size( &h->lookahead->ifbuf );
+ delayed_frames += x264_synch_frame_list_get_size( &h->lookahead->next );
+ delayed_frames += x264_synch_frame_list_get_size( &h->lookahead->ofbuf );
return delayed_frames;
}
--- /dev/null
+/*****************************************************************************
+ * lookahead.c: Lookahead slicetype decisions for x264
+ *****************************************************************************
+ * Lookahead.c and associated modifications:
+ * Copyright (C) 2008 Avail Media
+ *
+ * Authors: Michael Kazmier <mkazmier@availmedia.com>
+ * Alex Giladi <agiladi@availmedia.com>
+ * Steven Walters <kemuri9@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+/* LOOKAHEAD (threaded and non-threaded mode)
+ *
+ * Lookahead types:
+ * [1] Slice type / scene cut;
+ *
+ * In non-threaded mode, we run the existing slicetype decision code as it was.
+ * In threaded mode, we run in a separate thread, that lives between the calls
+ * to x264_encoder_open() and x264_encoder_close(), and performs lookahead for
+ * the number of frames specified in rc_lookahead. Recommended setting is
+ * # of bframes + # of threads.
+ */
+#include "common/common.h"
+#include "common/cpu.h"
+#include "analyse.h"
+
+static void x264_lookahead_shift( x264_synch_frame_list_t *dst, x264_synch_frame_list_t *src, int count )
+{
+ int i = count;
+ while( i-- )
+ {
+ assert( dst->i_size != dst->i_max_size );
+ assert( src->i_size );
+ dst->list[ dst->i_size++ ] = x264_frame_shift( src->list );
+ src->i_size--;
+ }
+ if( count )
+ {
+ x264_pthread_cond_broadcast( &dst->cv_fill );
+ x264_pthread_cond_broadcast( &src->cv_empty );
+ }
+}
+
+static void x264_lookahead_update_last_nonb( x264_t *h, x264_frame_t *new_nonb )
+{
+ if( h->lookahead->last_nonb )
+ x264_frame_push_unused( h, h->lookahead->last_nonb );
+ h->lookahead->last_nonb = new_nonb;
+ new_nonb->i_reference_count++;
+}
+
+#ifdef HAVE_PTHREAD
+static void x264_lookahead_slicetype_decide( x264_t *h )
+{
+ int bframes = 0;
+ x264_stack_align( x264_slicetype_decide, h );
+
+ while( IS_X264_TYPE_B( h->lookahead->next.list[bframes]->i_type ) )
+ bframes++;
+ x264_lookahead_update_last_nonb( h, h->lookahead->next.list[bframes] );
+
+ x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex );
+ while( h->lookahead->ofbuf.i_size == h->lookahead->ofbuf.i_max_size )
+ x264_pthread_cond_wait( &h->lookahead->ofbuf.cv_empty, &h->lookahead->ofbuf.mutex );
+
+ x264_pthread_mutex_lock( &h->lookahead->next.mutex );
+ x264_lookahead_shift( &h->lookahead->ofbuf, &h->lookahead->next, bframes + 1 );
+ x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
+
+ /* For MB-tree and VBV lookahead, we have to perform propagation analysis on I-frames too. */
+ if( h->lookahead->b_analyse_keyframe && IS_X264_TYPE_I( h->lookahead->last_nonb->i_type ) )
+ x264_stack_align( x264_slicetype_analyse, h, 1 );
+
+ x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
+}
+
+static void x264_lookahead_thread( x264_t *h )
+{
+ int shift;
+#ifdef HAVE_MMX
+ if( h->param.cpu&X264_CPU_SSE_MISALIGN )
+ x264_cpu_mask_misalign_sse();
+#endif
+ h->lookahead->b_thread_active = 1;
+ while( !h->lookahead->b_exit_thread )
+ {
+ x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex );
+ x264_pthread_mutex_lock( &h->lookahead->next.mutex );
+ shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size );
+ x264_lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, shift );
+ x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
+ if( h->lookahead->next.i_size <= h->lookahead->i_slicetype_length )
+ {
+ while( !h->lookahead->ifbuf.i_size && !h->lookahead->b_exit_thread )
+ x264_pthread_cond_wait( &h->lookahead->ifbuf.cv_fill, &h->lookahead->ifbuf.mutex );
+ x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
+ }
+ else
+ {
+ x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
+ x264_lookahead_slicetype_decide( h );
+ }
+ } /* end of input frames */
+ x264_pthread_mutex_lock( &h->lookahead->next.mutex );
+ x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex );
+ x264_lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, h->lookahead->ifbuf.i_size );
+ x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
+ x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
+ while( h->lookahead->next.i_size )
+ x264_lookahead_slicetype_decide( h );
+ h->lookahead->b_thread_active = 0;
+}
+#endif
+
+int x264_lookahead_init( x264_t *h, int i_slicetype_length )
+{
+ x264_lookahead_t *look;
+ CHECKED_MALLOCZERO( look, sizeof(x264_lookahead_t) );
+ int i;
+ for( i = 0; i < h->param.i_threads; i++ )
+ h->thread[i]->lookahead = look;
+
+ look->i_last_idr = - h->param.i_keyint_max;
+ look->b_analyse_keyframe = (h->param.rc.b_mb_tree || (h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead))
+ && !h->param.rc.b_stat_read;
+ look->i_slicetype_length = i_slicetype_length;
+
+ /* init frame lists */
+ if( x264_synch_frame_list_init( &look->ifbuf, h->param.i_sync_lookahead+3 ) ||
+ x264_synch_frame_list_init( &look->next, h->frames.i_delay+3 ) ||
+ x264_synch_frame_list_init( &look->ofbuf, h->frames.i_delay+3 ) )
+ goto fail;
+
+ if( !h->param.i_sync_lookahead )
+ return 0;
+
+ x264_t *look_h = h->thread[h->param.i_threads];
+ *look_h = *h;
+ if( x264_macroblock_cache_init( look_h ) )
+ goto fail;
+
+ UNUSED x264_pthread_attr_t attr;
+ if( x264_pthread_attr_init( &attr ) )
+ goto fail;
+#if defined(USE_REAL_PTHREAD) && !defined(SYS_LINUX)
+ int offset = sched_get_priority_max( SCHED_OTHER );
+ x264_log( h, X264_LOG_DEBUG, "setting priority of lookahead thread to %d\n", offset );
+ struct sched_param sp;
+ pthread_attr_getschedparam( &attr, &sp );
+ sp.sched_priority = offset;
+ pthread_attr_setschedparam( &attr, &sp );
+#endif
+
+ if( x264_pthread_create( &look_h->thread_handle, &attr, (void *)x264_lookahead_thread, look_h ) )
+ goto fail;
+
+ x264_pthread_attr_destroy( &attr );
+
+ return 0;
+fail:
+ x264_free( look );
+ return -1;
+}
+
+void x264_lookahead_delete( x264_t *h )
+{
+ if( h->param.i_sync_lookahead )
+ {
+ h->lookahead->b_exit_thread = 1;
+ x264_pthread_cond_broadcast( &h->lookahead->ifbuf.cv_fill );
+ x264_pthread_join( h->thread[h->param.i_threads]->thread_handle, NULL );
+ x264_macroblock_cache_end( h->thread[h->param.i_threads] );
+ x264_free( h->thread[h->param.i_threads] );
+ }
+ x264_synch_frame_list_delete( &h->lookahead->ifbuf );
+ x264_synch_frame_list_delete( &h->lookahead->next );
+ x264_synch_frame_list_delete( &h->lookahead->ofbuf );
+ if( h->lookahead->last_nonb )
+ x264_frame_delete( h->lookahead->last_nonb );
+ x264_free( h->lookahead );
+}
+
+void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame )
+{
+ if( h->param.i_sync_lookahead )
+ x264_synch_frame_list_push( &h->lookahead->ifbuf, frame );
+ else
+ x264_synch_frame_list_push( &h->lookahead->next, frame );
+}
+
+int x264_lookahead_is_empty( x264_t *h )
+{
+ return !x264_synch_frame_list_get_size( &h->lookahead->ofbuf ) &&
+ !x264_synch_frame_list_get_size( &h->lookahead->next );
+}
+
+static void x264_lookahead_encoder_shift( x264_t *h )
+{
+ int bframes = 0;
+ int i_frames = 0;
+
+ while( h->lookahead->ofbuf.list[i_frames] )
+ {
+ while( h->lookahead->b_thread_active && !h->lookahead->ofbuf.i_size )
+ x264_pthread_cond_wait( &h->lookahead->ofbuf.cv_fill, &h->lookahead->ofbuf.mutex );
+ if( IS_X264_TYPE_B( h->lookahead->ofbuf.list[bframes]->i_type ) )
+ bframes++;
+ else
+ break;
+ i_frames++;
+ }
+ if( h->lookahead->ofbuf.list[i_frames] )
+ {
+ x264_frame_push( h->frames.current, x264_frame_shift( &h->lookahead->ofbuf.list[bframes] ) );
+ h->lookahead->ofbuf.i_size--;
+ if( h->param.b_bframe_pyramid && bframes > 1 )
+ {
+ x264_frame_t *mid = x264_frame_shift( &h->lookahead->ofbuf.list[bframes/2] );
+ h->lookahead->ofbuf.i_size--;
+ mid->i_type = X264_TYPE_BREF;
+ x264_frame_push( h->frames.current, mid );
+ bframes--;
+ }
+ while( bframes-- )
+ {
+ x264_frame_push( h->frames.current, x264_frame_shift( h->lookahead->ofbuf.list ) );
+ h->lookahead->ofbuf.i_size--;
+ }
+ x264_pthread_cond_broadcast( &h->lookahead->ofbuf.cv_empty );
+ }
+}
+
+void x264_lookahead_get_frames( x264_t *h )
+{
+ if( h->param.i_sync_lookahead )
+ { /* We have a lookahead thread, so get frames from there */
+ x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex );
+ while( !h->lookahead->ofbuf.i_size && h->lookahead->b_thread_active )
+ x264_pthread_cond_wait( &h->lookahead->ofbuf.cv_fill, &h->lookahead->ofbuf.mutex );
+ x264_lookahead_encoder_shift( h );
+ x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
+ }
+ else
+ { /* We are not running a lookahead thread, so perform all the slicetype decide on the fly */
+
+ if( h->frames.current[0] || !h->lookahead->next.i_size )
+ return;
+
+ x264_stack_align( x264_slicetype_decide, h );
+
+ int bframes=0;
+ while( IS_X264_TYPE_B( h->lookahead->next.list[bframes]->i_type ) )
+ bframes++;
+
+ x264_lookahead_update_last_nonb( h, h->lookahead->next.list[bframes] );
+ x264_lookahead_shift( &h->lookahead->ofbuf, &h->lookahead->next, bframes + 1 );
+
+ /* For MB-tree and VBV lookahead, we have to perform propagation analysis on I-frames too. */
+ if( h->lookahead->b_analyse_keyframe && IS_X264_TYPE_I( h->lookahead->last_nonb->i_type ) )
+ x264_stack_align( x264_slicetype_analyse, h, 1 );
+
+ x264_lookahead_encoder_shift( h );
+ }
+}
}
if( h->sh.i_type != SLICE_TYPE_B )
- {
- rc->bframes = 0;
- while( h->frames.current[rc->bframes] && IS_X264_TYPE_B(h->frames.current[rc->bframes]->i_type) )
- rc->bframes++;
- }
+ rc->bframes = h->fenc->i_bframes;
if( i_force_qp )
{
if( h->sh.i_type == SLICE_TYPE_B )
{
rc->bframe_bits += bits;
- if( !h->frames.current[0] || !IS_X264_TYPE_B(h->frames.current[0]->i_type) )
+ if( h->fenc->b_last_minigop_bframe )
{
update_predictor( rc->pred_b_from_p, qp2qscale(rc->qpa_rc),
h->fref1[h->i_ref1-1]->i_satd, rc->bframe_bits / rc->bframes );
/* Uses strings due to the fact that the speed of the control functions is
negligable compared to the cost of running slicetype_frame_cost, and because
it makes debugging easier. */
-static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int max_bframes, int buffer_size, char (*best_paths)[X264_LOOKAHEAD_MAX] )
+static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int max_bframes, char (*best_paths)[X264_LOOKAHEAD_MAX] )
{
char paths[X264_BFRAME_MAX+2][X264_LOOKAHEAD_MAX] = {{0}};
int num_paths = X264_MIN(max_bframes+1, length);
int icost = frame->i_cost_est[0][0];
int pcost = frame->i_cost_est[p1-p0][0];
float f_bias;
- int i_gop_size = frame->i_frame - h->frames.i_last_idr;
+ int i_gop_size = frame->i_frame - h->lookahead->i_last_idr;
float f_thresh_max = h->param.i_scenecut_threshold / 100.0;
/* magic numbers pulled out of thin air */
float f_thresh_min = f_thresh_max * h->param.i_keyint_min
return res;
}
-static void x264_slicetype_analyse( x264_t *h, int keyframe )
+void x264_slicetype_analyse( x264_t *h, int keyframe )
{
x264_mb_analysis_t a;
x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, };
- int num_frames;
- int keyint_limit;
- int i,j;
+ int num_frames, keyint_limit, idr_frame_type, i, j;
int i_mb_count = NUM_MBS;
int cost1p0, cost2p0, cost1b1, cost2p1;
- int idr_frame_type;
+ int i_max_search = X264_MIN( h->lookahead->next.i_size, X264_LOOKAHEAD_MAX );
+ if( h->param.b_deterministic )
+ i_max_search = X264_MIN( i_max_search, h->lookahead->i_slicetype_length + !keyframe );
assert( h->frames.b_have_lowres );
- if( !h->frames.last_nonb )
+ if( !h->lookahead->last_nonb )
return;
- frames[0] = h->frames.last_nonb;
- for( j = 0; h->frames.next[j] && h->frames.next[j]->i_type == X264_TYPE_AUTO; j++ )
- frames[j+1] = h->frames.next[j];
+ frames[0] = h->lookahead->last_nonb;
+ for( j = 0; j < i_max_search && h->lookahead->next.list[j]->i_type == X264_TYPE_AUTO; j++ )
+ frames[j+1] = h->lookahead->next.list[j];
if( !j )
return;
- keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->frames.i_last_idr - 1;
+ keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->lookahead->i_last_idr - 1;
num_frames = X264_MIN( j, keyint_limit );
x264_lowres_context_init( h, &a );
- idr_frame_type = frames[1]->i_frame - h->frames.i_last_idr >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I;
+ idr_frame_type = frames[1]->i_frame - h->lookahead->i_last_idr >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I;
/* This is important psy-wise: if we have a non-scenecut keyframe,
* there will be significant visual artifacts if the frames just before
{
/* Perform the frametype analysis. */
for( n = 2; n < num_frames-1; n++ )
- x264_slicetype_path( h, &a, frames, n, max_bframes, num_frames-max_bframes, best_paths );
+ x264_slicetype_path( h, &a, frames, n, max_bframes, best_paths );
if( num_frames > 1 )
{
num_bframes = strspn( best_paths[num_frames-2], "B" );
int bframes;
int i;
- if( h->frames.next[0] == NULL )
+ if( !h->lookahead->next.i_size )
return;
if( h->param.rc.b_stat_read )
{
/* Use the frame types from the first pass */
- for( i = 0; h->frames.next[i] != NULL; i++ )
- h->frames.next[i]->i_type =
- x264_ratecontrol_slice_type( h, h->frames.next[i]->i_frame );
+ for( i = 0; i < h->lookahead->next.i_size; i++ )
+ h->lookahead->next.list[i]->i_type =
+ x264_ratecontrol_slice_type( h, h->lookahead->next.list[i]->i_frame );
}
else if( (h->param.i_bframe && h->param.i_bframe_adaptive)
|| h->param.i_scenecut_threshold
for( bframes = 0;; bframes++ )
{
- frm = h->frames.next[bframes];
+ frm = h->lookahead->next.list[bframes];
/* Limit GOP size */
- if( frm->i_frame - h->frames.i_last_idr >= h->param.i_keyint_max )
+ if( frm->i_frame - h->lookahead->i_last_idr >= h->param.i_keyint_max )
{
if( frm->i_type == X264_TYPE_AUTO )
frm->i_type = X264_TYPE_IDR;
if( frm->i_type == X264_TYPE_IDR )
{
/* Close GOP */
+ h->lookahead->i_last_idr = frm->i_frame;
if( bframes > 0 )
{
bframes--;
- h->frames.next[bframes]->i_type = X264_TYPE_P;
- }
- else
- {
- h->i_frame_num = 0;
+ h->lookahead->next.list[bframes]->i_type = X264_TYPE_P;
}
}
- if( bframes == h->param.i_bframe
- || h->frames.next[bframes+1] == NULL )
+ if( bframes == h->param.i_bframe ||
+ !h->lookahead->next.list[bframes+1] )
{
if( IS_X264_TYPE_B( frm->i_type ) )
x264_log( h, X264_LOG_WARNING, "specified frame type is not compatible with max B-frames\n" );
else if( !IS_X264_TYPE_B( frm->i_type ) ) break;
}
+
+ if( bframes )
+ h->lookahead->next.list[bframes-1]->b_last_minigop_bframe = 1;
+ h->lookahead->next.list[bframes]->i_bframes = bframes;
+
+ /* calculate the frame costs ahead of time for x264_rc_analyse_slice while we still have lowres */
+ if( h->param.rc.i_rc_method != X264_RC_CQP )
+ {
+ x264_mb_analysis_t a;
+ x264_frame_t *frames[X264_BFRAME_MAX+2] = { NULL, };
+ int p0=0, p1, b;
+
+ x264_lowres_context_init( h, &a );
+
+ if( IS_X264_TYPE_I( h->lookahead->next.list[bframes]->i_type ) )
+ p1 = b = 0;
+ else // P
+ p1 = b = bframes + 1;
+ frames[p0] = h->lookahead->last_nonb;
+ frames[b] = h->lookahead->next.list[bframes];
+
+ x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
+ }
}
int x264_rc_analyse_slice( x264_t *h )
{
- x264_mb_analysis_t a;
- x264_frame_t *frames[X264_LOOKAHEAD_MAX+2] = { NULL, };
+ x264_frame_t *frames[X264_BFRAME_MAX+2] = { NULL, };
int p0=0, p1, b;
int cost;
- x264_lowres_context_init( h, &a );
-
if( IS_X264_TYPE_I(h->fenc->i_type) )
- {
p1 = b = 0;
- /* For MB-tree and VBV lookahead, we have to perform propagation analysis on I-frames too. */
- if( h->param.rc.b_mb_tree || (h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead) )
- {
- h->frames.last_nonb = h->fenc;
- x264_slicetype_analyse( h, 1 );
- }
- }
- else if( X264_TYPE_P == h->fenc->i_type )
- {
- p1 = 0;
- while( h->frames.current[p1] && IS_X264_TYPE_B( h->frames.current[p1]->i_type ) )
- p1++;
- p1++;
- b = p1;
- }
- else //B
- {
- p1 = (h->fref1[0]->i_poc - h->fref0[0]->i_poc)/2;
- b = (h->fref1[0]->i_poc - h->fenc->i_poc)/2;
- frames[p1] = h->fref1[0];
- }
+ else // P
+ p1 = b = h->fenc->i_bframes + 1;
frames[p0] = h->fref0[0];
frames[b] = h->fenc;
- cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
+ /* cost should have been already calculated by x264_slicetype_decide */
+ cost = frames[b]->i_cost_est[b-p0][p1-b];
+ assert( cost >= 0 );
if( h->param.rc.b_mb_tree && !h->param.rc.b_stat_read )
cost = x264_slicetype_frame_cost_recalculate( h, frames, p0, p1, b );
H0( " --ssim Enable SSIM computation\n" );
H0( " --threads <integer> Force a specific number of threads\n" );
H1( " --thread-input Run Avisynth in its own thread\n" );
+ H1( " --sync-lookahead <integer> Number of buffer frames for threaded lookahead\n" );
H1( " --non-deterministic Slightly improve quality of SMP, at the cost of repeatability\n" );
H1( " --asm <integer> Override CPU detection\n" );
H1( " --no-asm Disable all CPU optimizations\n" );
{ "slice-max-mbs", required_argument, NULL, 0 },
{ "slices", required_argument, NULL, 0 },
{ "thread-input", no_argument, NULL, OPT_THREAD_INPUT },
+ { "sync-lookahead", required_argument, NULL, 0 },
{ "non-deterministic", no_argument, NULL, 0 },
{ "psnr", no_argument, NULL, 0 },
{ "ssim", no_argument, NULL, 0 },
#ifdef HAVE_PTHREAD
if( b_thread_input || param->i_threads > 1
- || (param->i_threads == 0 && x264_cpu_num_processors() > 1) )
+ || (param->i_threads == X264_THREADS_AUTO && x264_cpu_num_processors() > 1) )
{
if( open_file_thread( NULL, &opt->hin, param ) )
{
#include <stdarg.h>
-#define X264_BUILD 74
+#define X264_BUILD 75
/* x264_t:
* opaque handler for encoder */
/* Threading */
#define X264_THREADS_AUTO 0 /* Automatically select optimal number of threads */
+#define X264_SYNC_LOOKAHEAD_AUTO -1 /* Automatically select optimal lookahead thread buffer size */
/* Zones: override ratecontrol or other options for specific sections of the video.
* See x264_encoder_reconfig() for which options can be changed.
unsigned int cpu;
int i_threads; /* encode multiple frames in parallel */
int b_deterministic; /* whether to allow non-deterministic optimizations when threaded */
+ int i_sync_lookahead; /* threaded lookahead buffer */
/* Video Properties */
int i_width;