#include "x264_speed_control.h"
-#include "flags.h"
-
-#include <time.h>
-
+#include <dlfcn.h>
+#include <math.h>
+#include <stdio.h>
+#include <x264.h>
#include <algorithm>
+#include <chrono>
+#include <cmath>
+#include <ratio>
+#include <type_traits>
+
+#include "flags.h"
+#include "metrics.h"
using namespace std;
+using namespace std::chrono;
+
+#define SC_PRESETS 23
X264SpeedControl::X264SpeedControl(x264_t *x264, float f_speed, int i_buffer_size, float f_buffer_init)
- : x264(x264), f_speed(f_speed)
+ : dyn(load_x264_for_bit_depth(global_flags.x264_bit_depth)),
+ x264(x264), f_speed(f_speed)
{
x264_param_t param;
- x264_encoder_parameters(x264, ¶m);
+ dyn.x264_encoder_parameters(x264, ¶m);
float fps = (float)param.i_fps_num / param.i_fps_den;
uspf = 1e6 / fps;
buffer_fill = buffer_size * f_buffer_init;
buffer_fill = max<int64_t>(buffer_fill, uspf);
buffer_fill = min(buffer_fill, buffer_size);
- timestamp = mdate();
+ timestamp = steady_clock::now();
preset = -1;
cplx_num = 3e3; //FIXME estimate initial complexity
cplx_den = .1;
stat.min_buffer = buffer_size;
stat.max_buffer = 0;
+ stat.avg_preset = 0.0;
+ stat.den = 0;
+
+ metric_x264_speedcontrol_buffer_available_seconds = buffer_fill * 1e-6;
+ metric_x264_speedcontrol_buffer_size_seconds = buffer_size * 1e-6;
+ metric_x264_speedcontrol_preset_used_frames.init_uniform(SC_PRESETS);
+ global_metrics.add("x264_speedcontrol_preset_used_frames", &metric_x264_speedcontrol_preset_used_frames);
+ global_metrics.add("x264_speedcontrol_buffer_available_seconds", &metric_x264_speedcontrol_buffer_available_seconds, Metrics::TYPE_GAUGE);
+ global_metrics.add("x264_speedcontrol_buffer_size_seconds", &metric_x264_speedcontrol_buffer_size_seconds, Metrics::TYPE_GAUGE);
+ global_metrics.add("x264_speedcontrol_idle_frames", &metric_x264_speedcontrol_idle_frames);
+ global_metrics.add("x264_speedcontrol_late_frames", &metric_x264_speedcontrol_late_frames);
}
X264SpeedControl::~X264SpeedControl()
(float)stat.min_buffer / buffer_size,
(float)stat.max_buffer / buffer_size );
// x264_log( x264, X264_LOG_INFO, "speedcontrol: avg cplx=%.5f\n", cplx_num / cplx_den );
+ if (dyn.handle) {
+ dlclose(dyn.handle);
+ }
}
typedef struct
int mix;
int trellis;
int partitions;
- int badapt;
- int bframes;
int direct;
int merange;
} sc_preset_t;
// The actual presets, including the equivalent commandline options. Note that
// all presets are benchmarked with --weightp 1 --mbtree --rc-lookahead 20
-// on top of the given settings (equivalent settings to the "faster" preset).
-// Timings and SSIM measurements were done on a quadcore Haswell i5 3.2 GHz
-// on the first 1000 frames of "Tears of Steel" in 1080p.
+// --b-adapt 1 --bframes 3 on top of the given settings (equivalent settings to
+// the "faster" preset). Timings and SSIM measurements were done on a four cores
+// of a 6-core Coffee Lake i5 2.8 GHz on the first 1000 frames of “Elephants
+// Dream” in 1080p. See experiments/measure-x264.pl for a way to reproduce.
//
// Note that the two first and the two last are also used for extrapolation
// should the desired time be outside the range. Thus, it is disadvantageous if
// they are chosen so that the timings are too close to each other.
-#define SC_PRESETS 26
static const sc_preset_t presets[SC_PRESETS] = {
#define I4 X264_ANALYSE_I4x4
#define I8 X264_ANALYSE_I8x8
#define P4 X264_ANALYSE_PSUB8x8
#define P8 X264_ANALYSE_PSUB16x16
#define B8 X264_ANALYSE_BSUB16x16
- // Preset 0: 14.179db, --preset superfast --b-adapt 0 --bframes 0
- { .time= 1.000, .subme=1, .me=X264_ME_DIA, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4, .badapt=0, .bframes=0, .direct=0, .merange=16 },
- // Preset 1: 14.459db, --preset superfast
- { .time= 1.283, .subme=1, .me=X264_ME_DIA, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4, .badapt=1, .bframes=3, .direct=1, .merange=16 },
+ // Preset 0: 17.386db, --preset superfast
+ { .time= 1.000, .subme=1, .me=X264_ME_DIA, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4, .direct=1, .merange=16 },
- // Preset 2: 14.761db, --preset superfast --subme 2
- { .time= 1.603, .subme=2, .me=X264_ME_DIA, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4, .badapt=1, .bframes=3, .direct=1, .merange=16 },
+ // Preset 1: 17.919db, --preset superfast --subme 2
+ { .time= 1.707, .subme=2, .me=X264_ME_DIA, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4, .direct=1, .merange=16 },
- // Preset 3: 15.543db, --preset veryfast
- { .time= 1.843, .subme=2, .me=X264_ME_HEX, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 },
+ // Preset 2: 18.051db, --preset veryfast
+ { .time= 1.832, .subme=2, .me=X264_ME_HEX, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4|P8|B8, .direct=1, .merange=16 },
- // Preset 4: 15.716db, --preset veryfast --subme 3
- { .time= 2.452, .subme=3, .me=X264_ME_HEX, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 },
+ // Preset 3: 18.422db, --preset veryfast --subme 3
+ { .time= 1.853, .subme=3, .me=X264_ME_HEX, .refs=1, .mix=0, .trellis=0, .partitions=I8|I4|P8|B8, .direct=1, .merange=16 },
- // Preset 5: 15.786db, --preset veryfast --subme 3 --ref 2
- { .time= 2.733, .subme=3, .me=X264_ME_HEX, .refs=2, .mix=0, .trellis=0, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 },
+ // Preset 4: 18.514db, --preset veryfast --subme 3 --ref 2
+ { .time= 1.925, .subme=3, .me=X264_ME_HEX, .refs=2, .mix=0, .trellis=0, .partitions=I8|I4|P8|B8, .direct=1, .merange=16 },
- // Preset 6: 15.813db, --preset veryfast --subme 4 --ref 2
- { .time= 3.085, .subme=4, .me=X264_ME_HEX, .refs=2, .mix=0, .trellis=0, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 },
+ // Preset 5: 18.564db, --preset veryfast --subme 4 --ref 2
+ { .time= 2.111, .subme=4, .me=X264_ME_HEX, .refs=2, .mix=0, .trellis=0, .partitions=I8|I4|P8|B8, .direct=1, .merange=16 },
- // Preset 7: 15.849db, --preset faster
- { .time= 3.101, .subme=4, .me=X264_ME_HEX, .refs=2, .mix=0, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 },
+ // Preset 6: 18.411db, --preset faster
+ { .time= 2.240, .subme=4, .me=X264_ME_HEX, .refs=2, .mix=0, .trellis=1, .partitions=I8|I4|P8|B8, .direct=1, .merange=16 },
- // Preset 8: 15.857db, --preset faster --mixed-refs
- { .time= 3.284, .subme=4, .me=X264_ME_HEX, .refs=2, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 },
+ // Preset 7: 18.429db, --preset faster --mixed-refs
+ { .time= 2.414, .subme=4, .me=X264_ME_HEX, .refs=2, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .direct=1, .merange=16 },
- // Preset 9: 15.869db, --preset faster --mixed-refs --subme 5
- { .time= 3.587, .subme=5, .me=X264_ME_HEX, .refs=2, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 },
+ // Preset 8: 18.454db, --preset faster --mixed-refs --subme 5
+ { .time= 2.888, .subme=5, .me=X264_ME_HEX, .refs=2, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .direct=1, .merange=16 },
- // Preset 10: 16.051db, --preset fast
- { .time= 3.947, .subme=6, .me=X264_ME_HEX, .refs=2, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 },
+ // Preset 9: 18.528db, --preset fast
+ { .time= 3.570, .subme=6, .me=X264_ME_HEX, .refs=2, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .direct=1, .merange=16 },
- // Preset 11: 16.356db, --preset fast --subme 7
- { .time= 4.041, .subme=7, .me=X264_ME_HEX, .refs=2, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 },
+ // Preset 10: 18.762db, --preset fast --subme 7
+ { .time= 3.698, .subme=7, .me=X264_ME_HEX, .refs=2, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .direct=1, .merange=16 },
- // Preset 12: 16.418db, --preset fast --subme 7 --ref 3
- { .time= 4.406, .subme=7, .me=X264_ME_HEX, .refs=3, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 },
+ // Preset 11: 18.819db, --preset medium
+ { .time= 4.174, .subme=7, .me=X264_ME_HEX, .refs=3, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .direct=1, .merange=16 },
- // Preset 13: 16.460db, --preset medium
- { .time= 4.707, .subme=7, .me=X264_ME_HEX, .refs=3, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 },
+ // Preset 12: 18.889db, --preset medium --subme 8
+ { .time= 5.155, .subme=8, .me=X264_ME_HEX, .refs=3, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .direct=1, .merange=16 },
- // Preset 14: 16.517db, --preset medium --subme 8
- { .time= 5.133, .subme=8, .me=X264_ME_HEX, .refs=3, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 },
+ // Preset 13: 19.127db, --preset medium --subme 8 --trellis 2
+ { .time= 7.237, .subme=8, .me=X264_ME_HEX, .refs=3, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .direct=1, .merange=16 },
- // Preset 15: 16.523db, --preset medium --subme 8 --me umh
- { .time= 6.050, .subme=8, .me=X264_ME_UMH, .refs=3, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=1, .bframes=3, .direct=1, .merange=16 },
+ // Preset 14: 19.118db, --preset medium --subme 8 --trellis 2 --direct auto
+ { .time= 7.240, .subme=8, .me=X264_ME_HEX, .refs=3, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .direct=3, .merange=16 },
- // Preset 16: 16.543db, --preset medium --subme 8 --me umh --direct auto --b-adapt 2
- { .time= 6.849, .subme=8, .me=X264_ME_UMH, .refs=3, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=2, .bframes=3, .direct=3, .merange=16 },
+ // Preset 15: 19.172db, --preset slow
+ { .time= 7.910, .subme=8, .me=X264_ME_HEX, .refs=5, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .direct=3, .merange=16 },
- // Preset 17: 16.613db, --preset slow
- { .time= 8.042, .subme=8, .me=X264_ME_UMH, .refs=5, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=2, .bframes=3, .direct=3, .merange=16 },
+ // Preset 16: 19.208db, --preset slow --subme 9
+ { .time= 8.091, .subme=9, .me=X264_ME_HEX, .refs=5, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .direct=3, .merange=16 },
- // Preset 18: 16.641db, --preset slow --subme 9
- { .time= 8.972, .subme=9, .me=X264_ME_UMH, .refs=5, .mix=1, .trellis=1, .partitions=I8|I4|P8|B8, .badapt=2, .bframes=3, .direct=3, .merange=16 },
+ // Preset 17: 19.216db, --preset slow --subme 9 --me umh
+ { .time= 9.539, .subme=9, .me=X264_ME_UMH, .refs=5, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .direct=3, .merange=16 },
- // Preset 19: 16.895db, --preset slow --subme 9 --trellis 2
- { .time=10.073, .subme=9, .me=X264_ME_UMH, .refs=5, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .badapt=2, .bframes=3, .direct=3, .merange=16 },
+ // Preset 18: 19.253db, --preset slow --subme 9 --me umh --ref 6
+ { .time=10.521, .subme=9, .me=X264_ME_UMH, .refs=6, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .direct=3, .merange=16 },
- // Preset 20: 16.918db, --preset slow --subme 9 --trellis 2 --ref 6
- { .time=11.147, .subme=9, .me=X264_ME_UMH, .refs=6, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .badapt=2, .bframes=3, .direct=3, .merange=16 },
+ // Preset 19: 19.275db, --preset slow --subme 9 --me umh --ref 7
+ { .time=11.461, .subme=9, .me=X264_ME_UMH, .refs=7, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .direct=3, .merange=16 },
- // Preset 21: 16.934db, --preset slow --subme 9 --trellis 2 --ref 7
- { .time=12.267, .subme=9, .me=X264_ME_UMH, .refs=7, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8, .badapt=2, .bframes=3, .direct=3, .merange=16 },
+ // Preset 20: 19.314db, --preset slower
+ { .time=13.145, .subme=9, .me=X264_ME_UMH, .refs=8, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8|P4, .direct=3, .merange=16 },
- // Preset 22: 16.948db, --preset slower
- { .time=13.829, .subme=9, .me=X264_ME_UMH, .refs=8, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8|P4, .badapt=2, .bframes=3, .direct=3, .merange=16 },
+ // Preset 21: 19.407db, --preset slower --subme 10
+ { .time=16.386, .subme=10, .me=X264_ME_UMH, .refs=8, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8|P4, .direct=3, .merange=16 },
- // Preset 23: 17.058db, --preset slower --subme 10
- { .time=14.831, .subme=10, .me=X264_ME_UMH, .refs=8, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8|P4, .badapt=2, .bframes=3, .direct=3, .merange=16 },
+ // Preset 22: 19.483db, --preset veryslow
+ { .time=26.861, .subme=10, .me=X264_ME_UMH, .refs=16, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8|P4, .direct=3, .merange=24 },
- // Preset 24: 17.268db, --preset slower --subme 10 --bframes 8
- { .time=18.705, .subme=10, .me=X264_ME_UMH, .refs=8, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8|P4, .badapt=2, .bframes=8, .direct=3, .merange=16 },
-
- // Preset 25: 17.297db, --preset veryslow
- { .time=31.419, .subme=10, .me=X264_ME_UMH, .refs=16, .mix=1, .trellis=2, .partitions=I8|I4|P8|B8|P4, .badapt=2, .bframes=8, .direct=3, .merange=24 },
#undef I4
#undef I8
#undef P4
set_buffer_size(new_buffer_size);
}
buffer_fill = buffer_size * new_buffer_fill;
+ metric_x264_speedcontrol_buffer_available_seconds = buffer_fill * 1e-6;
- int64_t t, delta_t;
+ steady_clock::time_point t;
// update buffer state after encoding and outputting the previous frame(s)
if (first) {
- t = timestamp = mdate();
+ t = timestamp = steady_clock::now();
first = false;
} else {
- t = mdate();
+ t = steady_clock::now();
}
- delta_t = t - timestamp;
+ auto delta_t = t - timestamp;
timestamp = t;
// update the time predictor
- int cpu_time = cpu_time_last_frame;
- cplx_num *= cplx_decay;
- cplx_den *= cplx_decay;
- cplx_num += cpu_time / presets[preset].time;
- ++cplx_den;
-
- stat.avg_preset += preset;
- ++stat.den;
+ if (preset >= 0) {
+ int cpu_time = duration_cast<microseconds>(cpu_time_last_frame).count();
+ cplx_num *= cplx_decay;
+ cplx_den *= cplx_decay;
+ cplx_num += cpu_time / presets[preset].time;
+ ++cplx_den;
+
+ stat.avg_preset += preset;
+ ++stat.den;
+ }
stat.min_buffer = min(buffer_fill, stat.min_buffer);
stat.max_buffer = max(buffer_fill, stat.max_buffer);
if (buffer_fill >= buffer_size) { // oops, cpu was idle
// not really an error, but we'll warn for debugging purposes
- static int64_t idle_t = 0, print_interval = 0;
+ static int64_t idle_t = 0;
+ static steady_clock::time_point print_interval;
+ static bool first = false;
idle_t += buffer_fill - buffer_size;
- if (t - print_interval > 1e6) {
+ if (first || duration<double>(t - print_interval).count() > 0.1) {
//fprintf(stderr, "speedcontrol idle (%.6f sec)\n", idle_t/1e6);
print_interval = t;
idle_t = 0;
+ first = false;
}
buffer_fill = buffer_size;
+ metric_x264_speedcontrol_buffer_available_seconds = buffer_fill * 1e-6;
+ ++metric_x264_speedcontrol_idle_frames;
} else if (buffer_fill <= 0) { // oops, we're late
// fprintf(stderr, "speedcontrol underflow (%.6f sec)\n", buffer_fill/1e6);
+ ++metric_x264_speedcontrol_late_frames;
}
{
if (global_flags.x264_speedcontrol_verbose) {
static float cpu, wall, tgt, den;
const float decay = 1-1/100.;
- cpu = cpu*decay + cpu_time_last_frame;
- wall = wall*decay + delta_t;
+ cpu = cpu*decay + duration_cast<microseconds>(cpu_time_last_frame).count();
+ wall = wall*decay + duration_cast<microseconds>(delta_t).count();
tgt = tgt*decay + target;
den = den*decay + 1;
fprintf(stderr, "speed: %.2f+%.2f %d[%.5f] (t/c/w: %6.0f/%6.0f/%6.0f = %.4f) fps=%.2f\r",
void X264SpeedControl::after_frame()
{
- cpu_time_last_frame = mdate() - timestamp;
+ cpu_time_last_frame = steady_clock::now() - timestamp;
}
void X264SpeedControl::set_buffer_size(int new_buffer_size)
buffer_size = new_buffer_size * uspf;
cplx_decay = 1 - 1./new_buffer_size;
compensation_period = buffer_size/4;
+ metric_x264_speedcontrol_buffer_size_seconds = buffer_size * 1e-6;
}
int X264SpeedControl::dither_preset(float f)
const sc_preset_t *s = &presets[new_preset];
x264_param_t p;
- x264_encoder_parameters(x264, &p);
+ dyn.x264_encoder_parameters(x264, &p);
p.i_frame_reference = s->refs;
- p.i_bframe_adaptive = s->badapt;
- p.i_bframe = s->bframes;
p.analyse.inter = s->partitions;
p.analyse.i_subpel_refine = s->subme;
p.analyse.i_me_method = s->me;
p.analyse.b_mixed_references = s->mix;
p.analyse.i_direct_mv_pred = s->direct;
p.analyse.i_me_range = s->merange;
- x264_encoder_reconfig(x264, &p);
+ if (override_func) {
+ override_func(&p);
+ }
+ dyn.x264_encoder_reconfig(x264, &p);
preset = new_preset;
-}
-int64_t X264SpeedControl::mdate()
-{
- timespec now;
- clock_gettime(CLOCK_MONOTONIC, &now);
- return now.tv_sec * 1000000 + now.tv_nsec / 1000;
+ metric_x264_speedcontrol_preset_used_frames.count_event(new_preset);
}