X-Git-Url: https://git.sesse.net/?p=nageru;a=blobdiff_plain;f=x264_speed_control.cpp;h=ca9f52a6003def7f29209aad08a51629b7bb8727;hp=3ed3ece8e5b95ed0a69d8788511d489eb39217eb;hb=703e00da89118df9be0354dda621bed023e6030e;hpb=16c0e5da7fa7b4eeea79470c24697a1ba193f071 diff --git a/x264_speed_control.cpp b/x264_speed_control.cpp index 3ed3ece..ca9f52a 100644 --- a/x264_speed_control.cpp +++ b/x264_speed_control.cpp @@ -1,18 +1,29 @@ #include "x264_speed_control.h" -#include "flags.h" - -#include - +#include +#include +#include +#include #include +#include +#include +#include +#include + +#include "flags.h" +#include "metrics.h" using namespace std; +using namespace std::chrono; + +#define SC_PRESETS 25 X264SpeedControl::X264SpeedControl(x264_t *x264, float f_speed, int i_buffer_size, float f_buffer_init) - : x264(x264), f_speed(f_speed) + : dyn(load_x264_for_bit_depth(global_flags.x264_bit_depth)), + x264(x264), f_speed(f_speed) { x264_param_t param; - x264_encoder_parameters(x264, ¶m); + dyn.x264_encoder_parameters(x264, ¶m); float fps = (float)param.i_fps_num / param.i_fps_den; uspf = 1e6 / fps; @@ -20,12 +31,23 @@ X264SpeedControl::X264SpeedControl(x264_t *x264, float f_speed, int i_buffer_siz buffer_fill = buffer_size * f_buffer_init; buffer_fill = max(buffer_fill, uspf); buffer_fill = min(buffer_fill, buffer_size); - timestamp = mdate(); + timestamp = steady_clock::now(); preset = -1; cplx_num = 3e3; //FIXME estimate initial complexity cplx_den = .1; stat.min_buffer = buffer_size; stat.max_buffer = 0; + stat.avg_preset = 0.0; + stat.den = 0; + + metric_x264_speedcontrol_buffer_available_seconds = buffer_fill * 1e-6; + metric_x264_speedcontrol_buffer_size_seconds = buffer_size * 1e-6; + metric_x264_speedcontrol_preset_used_frames.init_uniform(SC_PRESETS); + global_metrics.add("x264_speedcontrol_preset_used_frames", &metric_x264_speedcontrol_preset_used_frames); + global_metrics.add("x264_speedcontrol_buffer_available_seconds", &metric_x264_speedcontrol_buffer_available_seconds, Metrics::TYPE_GAUGE); + global_metrics.add("x264_speedcontrol_buffer_size_seconds", &metric_x264_speedcontrol_buffer_size_seconds, Metrics::TYPE_GAUGE); + global_metrics.add("x264_speedcontrol_idle_frames", &metric_x264_speedcontrol_idle_frames); + global_metrics.add("x264_speedcontrol_late_frames", &metric_x264_speedcontrol_late_frames); } X264SpeedControl::~X264SpeedControl() @@ -35,6 +57,9 @@ X264SpeedControl::~X264SpeedControl() (float)stat.min_buffer / buffer_size, (float)stat.max_buffer / buffer_size ); // x264_log( x264, X264_LOG_INFO, "speedcontrol: avg cplx=%.5f\n", cplx_num / cplx_den ); + if (dyn.handle) { + dlclose(dyn.handle); + } } typedef struct @@ -56,95 +81,92 @@ typedef struct // all presets are benchmarked with --weightp 1 --mbtree --rc-lookahead 20 // on top of the given settings (equivalent settings to the "faster" preset). // Timings and SSIM measurements were done on a quadcore Haswell i5 3.2 GHz -// on the first 1000 frames of "Tears of Steel" in 1080p. +// on the first 1000 frames of "Elephants Dream" in 1080p. +// See experiments/measure-x264.pl for a way to reproduce. // // Note that the two first and the two last are also used for extrapolation // should the desired time be outside the range. Thus, it is disadvantageous if // they are chosen so that the timings are too close to each other. -#define SC_PRESETS 26 static const sc_preset_t presets[SC_PRESETS] = { #define I4 X264_ANALYSE_I4x4 #define I8 X264_ANALYSE_I8x8 #define P4 X264_ANALYSE_PSUB8x8 #define P8 X264_ANALYSE_PSUB16x16 #define B8 X264_ANALYSE_BSUB16x16 - // Preset 0: 14.179db, --preset superfast --b-adapt 0 --bframes 0 + // Preset 0: 16.583db, --preset superfast --b-adapt 0 --bframes 0 { .time= 1.000, .subme=1, .me=X264_ME_DIA, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4, .badapt=0, .bframes=0, .direct=0, .merange=16 }, - // Preset 1: 14.459db, --preset superfast - { .time= 1.283, .subme=1, .me=X264_ME_DIA, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4, .badapt=1, .bframes=3, .direct=1, .merange=16 }, + // Preset 1: 17.386db, --preset superfast + { .time= 1.288, .subme=1, .me=X264_ME_DIA, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4, .badapt=1, .bframes=3, .direct=1, .merange=16 }, - // Preset 2: 14.761db, --preset superfast --subme 2 - { .time= 1.603, .subme=2, .me=X264_ME_DIA, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4, .badapt=1, .bframes=3, .direct=1, .merange=16 }, + // Preset 2: 17.919db, --preset superfast --subme 2 + { .time= 2.231, .subme=2, .me=X264_ME_DIA, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4, .badapt=1, .bframes=3, .direct=1, .merange=16 }, - // Preset 3: 15.543db, --preset veryfast - { .time= 1.843, .subme=2, .me=X264_ME_HEX, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, + // Preset 3: 18.051db, --preset veryfast + { .time= 2.403, .subme=2, .me=X264_ME_HEX, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, - // Preset 4: 15.716db, --preset veryfast --subme 3 - { .time= 2.452, .subme=3, .me=X264_ME_HEX, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, + // Preset 4: 18.422db, --preset veryfast --subme 3 + { .time= 2.636, .subme=3, .me=X264_ME_HEX, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, - // Preset 5: 15.786db, --preset veryfast --subme 3 --ref 2 - { .time= 2.733, .subme=3, .me=X264_ME_HEX, .refs=2, .mix=0, .trellis=0, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, + // Preset 5: 18.514db, --preset veryfast --subme 3 --ref 2 + { .time= 2.844, .subme=3, .me=X264_ME_HEX, .refs=2, .mix=0, .trellis=0, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, - // Preset 6: 15.813db, --preset veryfast --subme 4 --ref 2 - { .time= 3.085, .subme=4, .me=X264_ME_HEX, .refs=2, .mix=0, .trellis=0, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, + // Preset 6: 18.564db, --preset veryfast --subme 4 --ref 2 + { .time= 3.366, .subme=4, .me=X264_ME_HEX, .refs=2, .mix=0, .trellis=0, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, - // Preset 7: 15.849db, --preset faster - { .time= 3.101, .subme=4, .me=X264_ME_HEX, .refs=2, .mix=0, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, + // Preset 7: 18.411db, --preset faster + { .time= 3.450, .subme=4, .me=X264_ME_HEX, .refs=2, .mix=0, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, - // Preset 8: 15.857db, --preset faster --mixed-refs - { .time= 3.284, .subme=4, .me=X264_ME_HEX, .refs=2, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, + // Preset 8: 18.429db, --preset faster --mixed-refs + { .time= 3.701, .subme=4, .me=X264_ME_HEX, .refs=2, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, - // Preset 9: 15.869db, --preset faster --mixed-refs --subme 5 - { .time= 3.587, .subme=5, .me=X264_ME_HEX, .refs=2, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, + // Preset 9: 18.454db, --preset faster --mixed-refs --subme 5 + { .time= 4.297, .subme=5, .me=X264_ME_HEX, .refs=2, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, - // Preset 10: 16.051db, --preset fast - { .time= 3.947, .subme=6, .me=X264_ME_HEX, .refs=2, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, + // Preset 10: 18.528db, --preset fast + { .time= 5.181, .subme=6, .me=X264_ME_HEX, .refs=2, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, - // Preset 11: 16.356db, --preset fast --subme 7 - { .time= 4.041, .subme=7, .me=X264_ME_HEX, .refs=2, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, + // Preset 11: 18.762db, --preset fast --subme 7 + { .time= 5.357, .subme=7, .me=X264_ME_HEX, .refs=2, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, - // Preset 12: 16.418db, --preset fast --subme 7 --ref 3 - { .time= 4.406, .subme=7, .me=X264_ME_HEX, .refs=3, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, + // Preset 12: 18.819db, --preset medium + { .time= 6.040, .subme=7, .me=X264_ME_HEX, .refs=3, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, - // Preset 13: 16.460db, --preset medium - { .time= 4.707, .subme=7, .me=X264_ME_HEX, .refs=3, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, + // Preset 13: 18.889db, --preset medium --subme 8 + { .time= 7.408, .subme=8, .me=X264_ME_HEX, .refs=3, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, - // Preset 14: 16.517db, --preset medium --subme 8 - { .time= 5.133, .subme=8, .me=X264_ME_HEX, .refs=3, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, + // Preset 14: 19.127db, --preset medium --subme 8 --trellis 2 + { .time=10.124, .subme=8, .me=X264_ME_HEX, .refs=3, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, - // Preset 15: 16.523db, --preset medium --subme 8 --me umh - { .time= 6.050, .subme=8, .me=X264_ME_UMH, .refs=3, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 }, + // Preset 15: 19.118db, --preset medium --subme 8 --trellis 2 --direct auto + { .time=10.144, .subme=8, .me=X264_ME_HEX, .refs=3, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=3, .merange=16 }, - // Preset 16: 16.543db, --preset medium --subme 8 --me umh --direct auto --b-adapt 2 - { .time= 6.849, .subme=8, .me=X264_ME_UMH, .refs=3, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=2, .bframes=3, .direct=3, .merange=16 }, + // Preset 16: 19.172db, --preset slow + { .time=11.142, .subme=8, .me=X264_ME_HEX, .refs=5, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=3, .merange=16 }, - // Preset 17: 16.613db, --preset slow - { .time= 8.042, .subme=8, .me=X264_ME_UMH, .refs=5, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=2, .bframes=3, .direct=3, .merange=16 }, + // Preset 17: 19.309db, --preset slow --b-adapt 2 --subme 9 + { .time=11.168, .subme=9, .me=X264_ME_HEX, .refs=5, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .badapt=2, .bframes=3, .direct=3, .merange=16 }, - // Preset 18: 16.641db, --preset slow --subme 9 - { .time= 8.972, .subme=9, .me=X264_ME_UMH, .refs=5, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=2, .bframes=3, .direct=3, .merange=16 }, + // Preset 18: 19.316db, --preset slow --b-adapt 2 --subme 9 --me umh + { .time=12.942, .subme=9, .me=X264_ME_UMH, .refs=5, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .badapt=2, .bframes=3, .direct=3, .merange=16 }, - // Preset 19: 16.895db, --preset slow --subme 9 --trellis 2 - { .time=10.073, .subme=9, .me=X264_ME_UMH, .refs=5, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .badapt=2, .bframes=3, .direct=3, .merange=16 }, + // Preset 19: 19.342db, --preset slow --b-adapt 2 --subme 9 --me umh --ref 6 + { .time=14.302, .subme=9, .me=X264_ME_UMH, .refs=6, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .badapt=2, .bframes=3, .direct=3, .merange=16 }, - // Preset 20: 16.918db, --preset slow --subme 9 --trellis 2 --ref 6 - { .time=11.147, .subme=9, .me=X264_ME_UMH, .refs=6, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .badapt=2, .bframes=3, .direct=3, .merange=16 }, + // Preset 20: 19.365db, --preset slow --b-adapt 2 --subme 9 --me umh --ref 7 + { .time=15.554, .subme=9, .me=X264_ME_UMH, .refs=7, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .badapt=2, .bframes=3, .direct=3, .merange=16 }, - // Preset 21: 16.934db, --preset slow --subme 9 --trellis 2 --ref 7 - { .time=12.267, .subme=9, .me=X264_ME_UMH, .refs=7, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .badapt=2, .bframes=3, .direct=3, .merange=16 }, + // Preset 21: 19.396db, --preset slower + { .time=17.551, .subme=9, .me=X264_ME_UMH, .refs=8, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8|P4, .badapt=2, .bframes=3, .direct=3, .merange=16 }, - // Preset 22: 16.948db, --preset slower - { .time=13.829, .subme=9, .me=X264_ME_UMH, .refs=8, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8|P4, .badapt=2, .bframes=3, .direct=3, .merange=16 }, + // Preset 22: 19.491db, --preset slower --subme 10 + { .time=21.321, .subme=10, .me=X264_ME_UMH, .refs=8, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8|P4, .badapt=2, .bframes=3, .direct=3, .merange=16 }, - // Preset 23: 17.058db, --preset slower --subme 10 - { .time=14.831, .subme=10, .me=X264_ME_UMH, .refs=8, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8|P4, .badapt=2, .bframes=3, .direct=3, .merange=16 }, + // Preset 23: 19.764db, --preset slower --subme 10 --bframes 8 + { .time=23.200, .subme=10, .me=X264_ME_UMH, .refs=8, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8|P4, .badapt=2, .bframes=8, .direct=3, .merange=16 }, - // Preset 24: 17.268db, --preset slower --subme 10 --bframes 8 - { .time=18.705, .subme=10, .me=X264_ME_UMH, .refs=8, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8|P4, .badapt=2, .bframes=8, .direct=3, .merange=16 }, - - // Preset 25: 17.297db, --preset veryslow - { .time=31.419, .subme=10, .me=X264_ME_UMH, .refs=16, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8|P4, .badapt=2, .bframes=8, .direct=3, .merange=24 }, + // Preset 24: 19.807db, --preset veryslow + { .time=36.922, .subme=10, .me=X264_ME_UMH, .refs=16, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8|P4, .badapt=2, .bframes=8, .direct=3, .merange=24 }, #undef I4 #undef I8 #undef P4 @@ -161,45 +183,54 @@ void X264SpeedControl::before_frame(float new_buffer_fill, int new_buffer_size, set_buffer_size(new_buffer_size); } buffer_fill = buffer_size * new_buffer_fill; + metric_x264_speedcontrol_buffer_available_seconds = buffer_fill * 1e-6; - int64_t t, delta_t; + steady_clock::time_point t; // update buffer state after encoding and outputting the previous frame(s) if (first) { - t = timestamp = mdate(); + t = timestamp = steady_clock::now(); first = false; } else { - t = mdate(); + t = steady_clock::now(); } - delta_t = t - timestamp; + auto delta_t = t - timestamp; timestamp = t; // update the time predictor - int cpu_time = cpu_time_last_frame; - cplx_num *= cplx_decay; - cplx_den *= cplx_decay; - cplx_num += cpu_time / presets[preset].time; - ++cplx_den; - - stat.avg_preset += preset; - ++stat.den; + if (preset >= 0) { + int cpu_time = duration_cast(cpu_time_last_frame).count(); + cplx_num *= cplx_decay; + cplx_den *= cplx_decay; + cplx_num += cpu_time / presets[preset].time; + ++cplx_den; + + stat.avg_preset += preset; + ++stat.den; + } stat.min_buffer = min(buffer_fill, stat.min_buffer); stat.max_buffer = max(buffer_fill, stat.max_buffer); if (buffer_fill >= buffer_size) { // oops, cpu was idle // not really an error, but we'll warn for debugging purposes - static int64_t idle_t = 0, print_interval = 0; + static int64_t idle_t = 0; + static steady_clock::time_point print_interval; + static bool first = false; idle_t += buffer_fill - buffer_size; - if (t - print_interval > 1e6) { + if (first || duration(t - print_interval).count() > 0.1) { //fprintf(stderr, "speedcontrol idle (%.6f sec)\n", idle_t/1e6); print_interval = t; idle_t = 0; + first = false; } buffer_fill = buffer_size; + metric_x264_speedcontrol_buffer_available_seconds = buffer_fill * 1e-6; + ++metric_x264_speedcontrol_idle_frames; } else if (buffer_fill <= 0) { // oops, we're late // fprintf(stderr, "speedcontrol underflow (%.6f sec)\n", buffer_fill/1e6); + ++metric_x264_speedcontrol_late_frames; } { @@ -243,8 +274,8 @@ void X264SpeedControl::before_frame(float new_buffer_fill, int new_buffer_size, if (global_flags.x264_speedcontrol_verbose) { static float cpu, wall, tgt, den; const float decay = 1-1/100.; - cpu = cpu*decay + cpu_time_last_frame; - wall = wall*decay + delta_t; + cpu = cpu*decay + duration_cast(cpu_time_last_frame).count(); + wall = wall*decay + duration_cast(delta_t).count(); tgt = tgt*decay + target; den = den*decay + 1; fprintf(stderr, "speed: %.2f+%.2f %d[%.5f] (t/c/w: %6.0f/%6.0f/%6.0f = %.4f) fps=%.2f\r", @@ -257,7 +288,7 @@ void X264SpeedControl::before_frame(float new_buffer_fill, int new_buffer_size, void X264SpeedControl::after_frame() { - cpu_time_last_frame = mdate() - timestamp; + cpu_time_last_frame = steady_clock::now() - timestamp; } void X264SpeedControl::set_buffer_size(int new_buffer_size) @@ -266,6 +297,7 @@ void X264SpeedControl::set_buffer_size(int new_buffer_size) buffer_size = new_buffer_size * uspf; cplx_decay = 1 - 1./new_buffer_size; compensation_period = buffer_size/4; + metric_x264_speedcontrol_buffer_size_seconds = buffer_size * 1e-6; } int X264SpeedControl::dither_preset(float f) @@ -289,7 +321,7 @@ void X264SpeedControl::apply_preset(int new_preset) const sc_preset_t *s = &presets[new_preset]; x264_param_t p; - x264_encoder_parameters(x264, &p); + dyn.x264_encoder_parameters(x264, &p); p.i_frame_reference = s->refs; p.i_bframe_adaptive = s->badapt; @@ -301,13 +333,11 @@ void X264SpeedControl::apply_preset(int new_preset) p.analyse.b_mixed_references = s->mix; p.analyse.i_direct_mv_pred = s->direct; p.analyse.i_me_range = s->merange; - x264_encoder_reconfig(x264, &p); + if (override_func) { + override_func(&p); + } + dyn.x264_encoder_reconfig(x264, &p); preset = new_preset; -} -int64_t X264SpeedControl::mdate() -{ - timespec now; - clock_gettime(CLOCK_MONOTONIC, &now); - return now.tv_sec * 1000000 + now.tv_nsec / 1000; + metric_x264_speedcontrol_preset_used_frames.count_event(new_preset); }