#endif
//#define DEBUG_MB_TYPE
-//#define DEBUG_DUMP_FRAME
-//#define DEBUG_BENCHMARK
-
-#ifdef DEBUG_BENCHMARK
-static int64_t i_mtime_encode_frame = 0;
-static int64_t i_mtime_analyse = 0;
-static int64_t i_mtime_encode = 0;
-static int64_t i_mtime_write = 0;
-static int64_t i_mtime_filter = 0;
-#define TIMER_START( d ) \
- { \
- int64_t d##start = x264_mdate();
-
-#define TIMER_STOP( d ) \
- d += x264_mdate() - d##start;\
- }
-#else
-#define TIMER_START( d )
-#define TIMER_STOP( d )
-#endif
#define NALU_OVERHEAD 5 // startcode + NAL type costs 5 bytes per frame
return (float)(-10.0 * log( f_mse ) / log( 10.0 ));
}
-#ifdef DEBUG_DUMP_FRAME
-static void x264_frame_dump( x264_t *h, x264_frame_t *fr, char *name )
+static void x264_frame_dump( x264_t *h )
{
- FILE *f = fopen( name, "r+b" );
+ FILE *f = fopen( h->param.psz_dump_yuv, "r+b" );
int i, y;
if( !f )
return;
-
/* Write the frame in display order */
- fseek( f, fr->i_frame * h->param.i_height * h->param.i_width * 3 / 2, SEEK_SET );
-
- for( i = 0; i < fr->i_plane; i++ )
- {
- for( y = 0; y < h->param.i_height / ( i == 0 ? 1 : 2 ); y++ )
- {
- fwrite( &fr->plane[i][y*fr->i_stride[i]], 1, h->param.i_width / ( i == 0 ? 1 : 2 ), f );
- }
- }
+ fseek( f, h->fdec->i_frame * h->param.i_height * h->param.i_width * 3/2, SEEK_SET );
+ for( i = 0; i < h->fdec->i_plane; i++ )
+ for( y = 0; y < h->param.i_height >> !!i; y++ )
+ fwrite( &h->fdec->plane[i][y*h->fdec->i_stride[i]], 1, h->param.i_width >> !!i, f );
fclose( f );
}
-#endif
/* Fill "default" values */
}
}
+/* If we are within a reasonable distance of the end of the memory allocated for the bitstream, */
+/* reallocate, adding an arbitrary amount of space (100 kilobytes). */
+static void x264_bitstream_check_buffer( x264_t *h )
+{
+ if( ( h->param.b_cabac && (h->cabac.p_end - h->cabac.p < 2500) )
+ || ( h->out.bs.p_end - h->out.bs.p < 2500 ) )
+ {
+ uint8_t *bs_bak = h->out.p_bitstream;
+ intptr_t delta;
+ int i;
+
+ h->out.i_bitstream += 100000;
+ h->out.p_bitstream = x264_realloc( h->out.p_bitstream, h->out.i_bitstream );
+ delta = h->out.p_bitstream - bs_bak;
+
+ h->out.bs.p_start += delta;
+ h->out.bs.p += delta;
+ h->out.bs.p_end = h->out.p_bitstream + h->out.i_bitstream;
+
+ h->cabac.p_start += delta;
+ h->cabac.p += delta;
+ h->cabac.p_end = h->out.p_bitstream + h->out.i_bitstream;
+
+ for( i = 0; i <= h->out.i_nal; i++ )
+ h->out.nal[i].p_payload += delta;
+ }
+}
+
/****************************************************************************
*
****************************************************************************
static int x264_validate_parameters( x264_t *h )
{
+#ifdef HAVE_MMX
+ if( !(x264_cpu_detect() & X264_CPU_MMXEXT) )
+ {
+ x264_log( h, X264_LOG_ERROR, "your cpu does not support MMXEXT, but x264 was compiled with asm support\n");
+ x264_log( h, X264_LOG_ERROR, "to run x264, recompile without asm support (configure --disable-asm)\n");
+ return -1;
+ }
+#endif
if( h->param.i_width <= 0 || h->param.i_height <= 0 )
{
x264_log( h, X264_LOG_ERROR, "invalid width x height (%dx%d)\n",
h->param.analyse.b_fast_pskip = 0;
h->param.analyse.i_noise_reduction = 0;
h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 1, 6 );
- h->param.rc.i_aq_mode = 0;
}
if( h->param.rc.i_rc_method == X264_RC_CQP )
{
float qp_b = qp_p + 6*log(h->param.rc.f_pb_factor)/log(2);
h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, 51 );
h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, 51 );
+ h->param.rc.i_aq_mode = 0;
}
if( ( h->param.i_width % 16 || h->param.i_height % 16 )
|| h->param.rc.i_rc_method == X264_RC_CRF
|| h->param.b_bframe_adaptive
|| h->param.b_pre_scenecut );
+ h->frames.b_have_lowres |= (h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0);
h->frames.i_last_idr = - h->param.i_keyint_max;
h->frames.i_input = 0;
p = buf + sprintf( buf, "using cpu capabilities:" );
for( i=0; x264_cpu_names[i].flags; i++ )
+ {
+ if( !strcmp(x264_cpu_names[i].name, "SSE2")
+ && param->cpu & (X264_CPU_SSE2_IS_FAST|X264_CPU_SSE2_IS_SLOW) )
+ continue;
+ if( !strcmp(x264_cpu_names[i].name, "SSE3")
+ && (param->cpu & X264_CPU_SSSE3 || !(param->cpu & X264_CPU_CACHELINE_64)) )
+ continue;
if( (param->cpu & x264_cpu_names[i].flags) == x264_cpu_names[i].flags
&& (!i || x264_cpu_names[i].flags != x264_cpu_names[i-1].flags) )
p += sprintf( p, " %s", x264_cpu_names[i].name );
+ }
if( !param->cpu )
p += sprintf( p, " none!" );
x264_log( h, X264_LOG_INFO, "%s\n", buf );
if( x264_ratecontrol_new( h ) < 0 )
return NULL;
-#ifdef DEBUG_DUMP_FRAME
+ if( h->param.psz_dump_yuv )
{
/* create or truncate the reconstructed video file */
- FILE *f = fopen( "fdec.yuv", "w" );
+ FILE *f = fopen( h->param.psz_dump_yuv, "w" );
if( f )
fclose( f );
else
return NULL;
}
}
-#endif
return h;
}
int b_deblock = !h->sh.i_disable_deblocking_filter_idc;
int b_end = mb_y == h->sps->i_mb_height;
int min_y = mb_y - (1 << h->sh.b_mbaff);
-#ifndef DEBUG_DUMP_FRAME
- b_deblock &= b_hpel;
-#endif
+ int max_y = b_end ? h->sps->i_mb_height : mb_y;
+ b_deblock &= b_hpel || h->param.psz_dump_yuv;
if( mb_y & h->sh.b_mbaff )
return;
if( min_y < 0 )
if( b_deblock )
{
- int max_y = b_end ? h->sps->i_mb_height : mb_y;
int y;
for( y = min_y; y < max_y; y += (1 << h->sh.b_mbaff) )
x264_frame_deblock_row( h, y );
{
x264_frame_cond_broadcast( h->fdec, mb_y*16 + (b_end ? 10000 : -(X264_THREAD_HEIGHT << h->sh.b_mbaff)) );
}
+
+ min_y = X264_MAX( min_y*16-8, 0 );
+ max_y = b_end ? h->param.i_height : mb_y*16-8;
+
+ if( h->param.analyse.b_psnr )
+ {
+ int i;
+ for( i=0; i<3; i++ )
+ h->stat.frame.i_ssd[i] +=
+ x264_pixel_ssd_wxh( &h->pixf,
+ h->fdec->plane[i] + (min_y>>!!i) * h->fdec->i_stride[i], h->fdec->i_stride[i],
+ h->fenc->plane[i] + (min_y>>!!i) * h->fenc->i_stride[i], h->fenc->i_stride[i],
+ h->param.i_width >> !!i, (max_y-min_y) >> !!i );
+ }
+
+ if( h->param.analyse.b_ssim )
+ {
+ x264_emms();
+ /* offset by 2 pixels to avoid alignment of ssim blocks with dct blocks,
+ * and overlap by 4 */
+ min_y += min_y == 0 ? 2 : -6;
+ h->stat.frame.f_ssim +=
+ x264_pixel_ssim_wxh( &h->pixf,
+ h->fdec->plane[0] + 2+min_y*h->fdec->i_stride[0], h->fdec->i_stride[0],
+ h->fenc->plane[0] + 2+min_y*h->fenc->i_stride[0], h->fenc->i_stride[0],
+ h->param.i_width-2, max_y-min_y );
+ }
}
static inline void x264_reference_update( x264_t *h )
static void x264_slice_write( x264_t *h )
{
int i_skip;
- int mb_xy;
+ int mb_xy, i_mb_x, i_mb_y;
int i;
/* init stats */
h->mb.i_last_qp = h->sh.i_qp;
h->mb.i_last_dqp = 0;
- for( mb_xy = h->sh.i_first_mb, i_skip = 0; mb_xy < h->sh.i_last_mb; )
+ i_mb_y = h->sh.i_first_mb / h->sps->i_mb_width;
+ i_mb_x = h->sh.i_first_mb % h->sps->i_mb_width;
+ i_skip = 0;
+
+ while( (mb_xy = i_mb_x + i_mb_y * h->sps->i_mb_width) < h->sh.i_last_mb )
{
- const int i_mb_y = mb_xy / h->sps->i_mb_width;
- const int i_mb_x = mb_xy % h->sps->i_mb_width;
int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
if( i_mb_x == 0 )
* Slice I: choose I_4x4 or I_16x16 mode
* Slice P: choose between using P mode or intra (4x4 or 16x16)
* */
- TIMER_START( i_mtime_analyse );
x264_macroblock_analyse( h );
- TIMER_STOP( i_mtime_analyse );
/* encode this macroblock -> be careful it can change the mb type to P_SKIP if needed */
- TIMER_START( i_mtime_encode );
x264_macroblock_encode( h );
- TIMER_STOP( i_mtime_encode );
- TIMER_START( i_mtime_write );
+ x264_bitstream_check_buffer( h );
+
if( h->param.b_cabac )
{
if( mb_xy > h->sh.i_first_mb && !(h->sh.b_mbaff && (i_mb_y&1)) )
x264_macroblock_write_cavlc( h, &h->out.bs );
}
}
- TIMER_STOP( i_mtime_write );
#if VISUALIZE
if( h->param.b_visualize )
if( h->sh.b_mbaff )
{
- if( (i_mb_y&1) && i_mb_x == h->sps->i_mb_width - 1 )
- mb_xy++;
- else if( i_mb_y&1 )
- mb_xy += 1 - h->sps->i_mb_width;
- else
- mb_xy += h->sps->i_mb_width;
+ i_mb_x += i_mb_y & 1;
+ i_mb_y ^= i_mb_x < h->sps->i_mb_width;
}
else
- mb_xy++;
+ i_mb_x++;
+ if(i_mb_x == h->sps->i_mb_width)
+ {
+ i_mb_y++;
+ i_mb_x = 0;
+ }
}
if( h->param.b_cabac )
x264_nal_end( h );
+ x264_fdec_filter_row( h, h->sps->i_mb_height );
+
/* Compute misc bits */
h->stat.frame.i_misc_bits = bs_pos( &h->out.bs )
+ NALU_OVERHEAD * 8
x264_stack_align( x264_slice_write, h );
i_frame_size = h->out.nal[h->out.i_nal-1].i_payload;
- x264_fdec_filter_row( h, h->sps->i_mb_height );
#if VISUALIZE
if( h->param.b_visualize )
*pp_nal = NULL;
/* ------------------- Setup new frame from picture -------------------- */
- TIMER_START( i_mtime_encode_frame );
if( pic_in != NULL )
{
/* 1: Copy the picture to a frame and move it to a buffer */
while( bframes-- )
x264_frame_push( h->frames.current, x264_frame_shift( h->frames.next ) );
}
- TIMER_STOP( i_mtime_encode_frame );
/* ------------------- Get frame to be encoded ------------------------- */
/* 4: get picture to encode */
/* ------------------- Setup frame context ----------------------------- */
/* 5: Init data dependent of frame type */
- TIMER_START( i_mtime_encode_frame );
if( h->fenc->i_type == X264_TYPE_IDR )
{
/* reset ref pictures */
x264_slices_write( h );
/* restore CPU state (before using float again) */
- x264_cpu_restore( h->param.cpu );
+ x264_emms();
if( h->sh.i_type == SLICE_TYPE_P && !h->param.rc.b_stat_read
&& h->param.i_scenecut_threshold >= 0
/* ---------------------- Update encoder state ------------------------- */
/* update rc */
- x264_cpu_restore( h->param.cpu );
+ x264_emms();
x264_ratecontrol_end( h, h->out.i_frame_size * 8 );
/* restore CPU state (before using float again) */
- x264_cpu_restore( h->param.cpu );
-
- x264_noise_reduction_update( h );
+ x264_emms();
- TIMER_STOP( i_mtime_encode_frame );
+ x264_noise_reduction_update( thread_current );
/* ---------------------- Compute/Print statistics --------------------- */
x264_thread_sync_stat( h, h->thread[0] );
psz_message[0] = '\0';
if( h->param.analyse.b_psnr )
{
- int64_t sqe[3];
-
- for( i=0; i<3; i++ )
- {
- sqe[i] = x264_pixel_ssd_wxh( &h->pixf,
- h->fdec->plane[i], h->fdec->i_stride[i],
- h->fenc->plane[i], h->fenc->i_stride[i],
- h->param.i_width >> !!i, h->param.i_height >> !!i );
- }
- x264_cpu_restore( h->param.cpu );
+ int64_t sqe[3] = {
+ h->stat.frame.i_ssd[0],
+ h->stat.frame.i_ssd[1],
+ h->stat.frame.i_ssd[2],
+ };
h->stat.i_sqe_global[h->sh.i_type] += sqe[0] + sqe[1] + sqe[2];
h->stat.f_psnr_average[h->sh.i_type] += x264_psnr( sqe[0] + sqe[1] + sqe[2], 3 * h->param.i_width * h->param.i_height / 2 );
if( h->param.analyse.b_ssim )
{
- // offset by 2 pixels to avoid alignment of ssim blocks with dct blocks
- float ssim_y = x264_pixel_ssim_wxh( &h->pixf,
- h->fdec->plane[0] + 2+2*h->fdec->i_stride[0], h->fdec->i_stride[0],
- h->fenc->plane[0] + 2+2*h->fenc->i_stride[0], h->fenc->i_stride[0],
- h->param.i_width-2, h->param.i_height-2 );
+ double ssim_y = h->stat.frame.f_ssim
+ / (((h->param.i_width-6)>>2) * ((h->param.i_height-6)>>2));
h->stat.f_ssim_mean_y[h->sh.i_type] += ssim_y;
snprintf( psz_message + strlen(psz_message), 80 - strlen(psz_message),
" SSIM Y:%.5f", ssim_y );
}
#endif
-#ifdef DEBUG_DUMP_FRAME
- /* Dump reconstructed frame */
- x264_frame_dump( h, h->fdec, "fdec.yuv" );
-#endif
+ if( h->param.psz_dump_yuv )
+ x264_frame_dump( h );
}
/****************************************************************************
****************************************************************************/
void x264_encoder_close ( x264_t *h )
{
-#ifdef DEBUG_BENCHMARK
- int64_t i_mtime_total = i_mtime_analyse + i_mtime_encode + i_mtime_write + i_mtime_filter + 1;
-#endif
int64_t i_yuv_size = 3 * h->param.i_width * h->param.i_height / 2;
int i;
for( i=0; i<h->param.i_threads; i++ )
{
- // don't strictly have to wait for the other threads, but it's simpler than cancelling them
+ // don't strictly have to wait for the other threads, but it's simpler than canceling them
if( h->thread[i]->b_thread_active )
x264_pthread_join( h->thread[i]->thread_handle, NULL );
}
-#ifdef DEBUG_BENCHMARK
- x264_log( h, X264_LOG_INFO,
- "analyse=%d(%lldms) encode=%d(%lldms) write=%d(%lldms) filter=%d(%lldms)\n",
- (int)(100*i_mtime_analyse/i_mtime_total), i_mtime_analyse/1000,
- (int)(100*i_mtime_encode/i_mtime_total), i_mtime_encode/1000,
- (int)(100*i_mtime_write/i_mtime_total), i_mtime_write/1000,
- (int)(100*i_mtime_filter/i_mtime_total), i_mtime_filter/1000 );
-#endif
-
/* Slices used and PSNR */
for( i=0; i<5; i++ )
{