]> git.sesse.net Git - nageru/commitdiff
Merge branch 'mjpeg'
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Tue, 4 Dec 2018 17:39:41 +0000 (18:39 +0100)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Tue, 4 Dec 2018 17:43:07 +0000 (18:43 +0100)
253 files changed:
.gitignore
.gitmodules
NEWS
README
bmusb [deleted submodule]
futatabi/add_base_flow.frag [new file with mode: 0644]
futatabi/blend.frag [new file with mode: 0644]
futatabi/chroma_subsample.frag [new file with mode: 0644]
futatabi/chroma_subsample.vert [new file with mode: 0644]
futatabi/chroma_subsampler.cpp [new file with mode: 0644]
futatabi/chroma_subsampler.h [new file with mode: 0644]
futatabi/clip_list.cpp [new file with mode: 0644]
futatabi/clip_list.h [new file with mode: 0644]
futatabi/db.cpp [new file with mode: 0644]
futatabi/db.h [new file with mode: 0644]
futatabi/defs.h [new file with mode: 0644]
futatabi/densify.frag [new file with mode: 0644]
futatabi/densify.vert [new file with mode: 0644]
futatabi/derivatives.frag [new file with mode: 0644]
futatabi/diffusivity.frag [new file with mode: 0644]
futatabi/embedded_files.h [new file with mode: 0644]
futatabi/equations.frag [new file with mode: 0644]
futatabi/equations.vert [new file with mode: 0644]
futatabi/eval.cpp [new file with mode: 0644]
futatabi/flags.cpp [new file with mode: 0644]
futatabi/flags.h [new file with mode: 0644]
futatabi/flow.cpp [new file with mode: 0644]
futatabi/flow.h [new file with mode: 0644]
futatabi/flow_main.cpp [new file with mode: 0644]
futatabi/frame.proto [new file with mode: 0644]
futatabi/frame_on_disk.cpp [new file with mode: 0644]
futatabi/frame_on_disk.h [new file with mode: 0644]
futatabi/gpu_timers.cpp [new file with mode: 0644]
futatabi/gpu_timers.h [new file with mode: 0644]
futatabi/gray.frag [new file with mode: 0644]
futatabi/hole_blend.frag [new file with mode: 0644]
futatabi/hole_fill.frag [new file with mode: 0644]
futatabi/hole_fill.vert [new file with mode: 0644]
futatabi/jpeg_destroyer.h [new file with mode: 0644]
futatabi/jpeg_frame.h [new file with mode: 0644]
futatabi/jpeg_frame_view.cpp [new file with mode: 0644]
futatabi/jpeg_frame_view.h [new file with mode: 0644]
futatabi/main.cpp [new file with mode: 0644]
futatabi/mainwindow.cpp [new file with mode: 0644]
futatabi/mainwindow.h [new file with mode: 0644]
futatabi/mainwindow.ui [new file with mode: 0644]
futatabi/make-example-video.sh [new file with mode: 0644]
futatabi/meson.build [new file with mode: 0644]
futatabi/motion_search.frag [new file with mode: 0644]
futatabi/motion_search.vert [new file with mode: 0644]
futatabi/player.cpp [new file with mode: 0644]
futatabi/player.h [new file with mode: 0644]
futatabi/prewarp.frag [new file with mode: 0644]
futatabi/queue_spot_holder.h [new file with mode: 0644]
futatabi/resize_flow.frag [new file with mode: 0644]
futatabi/sobel.frag [new file with mode: 0644]
futatabi/sor.frag [new file with mode: 0644]
futatabi/sor.vert [new file with mode: 0644]
futatabi/splat.frag [new file with mode: 0644]
futatabi/splat.vert [new file with mode: 0644]
futatabi/state.proto [new file with mode: 0644]
futatabi/util.cpp [new file with mode: 0644]
futatabi/util.h [new file with mode: 0644]
futatabi/vaapi_jpeg_decoder.cpp [new file with mode: 0644]
futatabi/vaapi_jpeg_decoder.h [new file with mode: 0644]
futatabi/variational_refinement.txt [new file with mode: 0644]
futatabi/video_stream.cpp [new file with mode: 0644]
futatabi/video_stream.h [new file with mode: 0644]
futatabi/vis.cpp [new file with mode: 0644]
futatabi/vs.vert [new file with mode: 0644]
futatabi/ycbcr_converter.cpp [new file with mode: 0644]
futatabi/ycbcr_converter.h [new file with mode: 0644]
meson.build
nageru/Nageru-Grafana.json [moved from Nageru-Grafana.json with 100% similarity]
nageru/aboutdialog.cpp [moved from aboutdialog.cpp with 100% similarity]
nageru/aboutdialog.h [moved from aboutdialog.h with 100% similarity]
nageru/aboutdialog.ui [moved from aboutdialog.ui with 99% similarity]
nageru/akai_midimix.midimapping [moved from akai_midimix.midimapping with 100% similarity]
nageru/alsa_input.cpp [moved from alsa_input.cpp with 99% similarity]
nageru/alsa_input.h [moved from alsa_input.h with 100% similarity]
nageru/alsa_output.cpp [moved from alsa_output.cpp with 100% similarity]
nageru/alsa_output.h [moved from alsa_output.h with 100% similarity]
nageru/alsa_pool.cpp [moved from alsa_pool.cpp with 100% similarity]
nageru/alsa_pool.h [moved from alsa_pool.h with 100% similarity]
nageru/analyzer.cpp [moved from analyzer.cpp with 99% similarity]
nageru/analyzer.h [moved from analyzer.h with 100% similarity]
nageru/analyzer.ui [moved from analyzer.ui with 100% similarity]
nageru/audio_encoder.cpp [moved from audio_encoder.cpp with 99% similarity]
nageru/audio_encoder.h [moved from audio_encoder.h with 97% similarity]
nageru/audio_expanded_view.ui [moved from audio_expanded_view.ui with 100% similarity]
nageru/audio_miniview.ui [moved from audio_miniview.ui with 100% similarity]
nageru/audio_mixer.cpp [moved from audio_mixer.cpp with 99% similarity]
nageru/audio_mixer.h [moved from audio_mixer.h with 99% similarity]
nageru/basic_stats.cpp [moved from basic_stats.cpp with 99% similarity]
nageru/basic_stats.h [moved from basic_stats.h with 100% similarity]
nageru/benchmark_audio_mixer.cpp [moved from benchmark_audio_mixer.cpp with 99% similarity]
nageru/bg.jpeg [moved from bg.jpeg with 100% similarity]
nageru/bmusb [new submodule]
nageru/cbcr_subsample.frag [new file with mode: 0644]
nageru/cbcr_subsample.vert [new file with mode: 0644]
nageru/cef_capture.cpp [moved from cef_capture.cpp with 100% similarity]
nageru/cef_capture.h [moved from cef_capture.h with 100% similarity]
nageru/chroma_subsampler.cpp [moved from chroma_subsampler.cpp with 72% similarity]
nageru/chroma_subsampler.h [moved from chroma_subsampler.h with 100% similarity]
nageru/clickable_label.h [moved from clickable_label.h with 100% similarity]
nageru/compression_reduction_meter.cpp [moved from compression_reduction_meter.cpp with 100% similarity]
nageru/compression_reduction_meter.h [moved from compression_reduction_meter.h with 100% similarity]
nageru/context.h [moved from context.h with 100% similarity]
nageru/context_menus.cpp [moved from context_menus.cpp with 100% similarity]
nageru/context_menus.h [moved from context_menus.h with 100% similarity]
nageru/correlation_measurer.cpp [moved from correlation_measurer.cpp with 100% similarity]
nageru/correlation_measurer.h [moved from correlation_measurer.h with 100% similarity]
nageru/correlation_meter.cpp [moved from correlation_meter.cpp with 100% similarity]
nageru/correlation_meter.h [moved from correlation_meter.h with 100% similarity]
nageru/decibel.h [moved from db.h with 74% similarity]
nageru/decklink/DeckLinkAPI.h [moved from decklink/DeckLinkAPI.h with 100% similarity]
nageru/decklink/DeckLinkAPIConfiguration.h [moved from decklink/DeckLinkAPIConfiguration.h with 100% similarity]
nageru/decklink/DeckLinkAPIDeckControl.h [moved from decklink/DeckLinkAPIDeckControl.h with 100% similarity]
nageru/decklink/DeckLinkAPIDiscovery.h [moved from decklink/DeckLinkAPIDiscovery.h with 100% similarity]
nageru/decklink/DeckLinkAPIDispatch.cpp [moved from decklink/DeckLinkAPIDispatch.cpp with 100% similarity]
nageru/decklink/DeckLinkAPIModes.h [moved from decklink/DeckLinkAPIModes.h with 100% similarity]
nageru/decklink/DeckLinkAPITypes.h [moved from decklink/DeckLinkAPITypes.h with 100% similarity]
nageru/decklink/LinuxCOM.h [moved from decklink/LinuxCOM.h with 100% similarity]
nageru/decklink_capture.cpp [moved from decklink_capture.cpp with 99% similarity]
nageru/decklink_capture.h [moved from decklink_capture.h with 100% similarity]
nageru/decklink_output.cpp [moved from decklink_output.cpp with 99% similarity]
nageru/decklink_output.h [moved from decklink_output.h with 98% similarity]
nageru/decklink_util.cpp [moved from decklink_util.cpp with 100% similarity]
nageru/decklink_util.h [moved from decklink_util.h with 100% similarity]
nageru/defs.h [moved from defs.h with 53% similarity]
nageru/display.ui [moved from display.ui with 100% similarity]
nageru/ebu_r128_proc.cc [moved from ebu_r128_proc.cc with 100% similarity]
nageru/ebu_r128_proc.h [moved from ebu_r128_proc.h with 100% similarity]
nageru/ellipsis_label.h [moved from ellipsis_label.h with 100% similarity]
nageru/embedded_files.h [new file with mode: 0644]
nageru/experiments/measure-x264.pl [moved from experiments/measure-x264.pl with 100% similarity]
nageru/experiments/presets.txt [moved from experiments/presets.txt with 100% similarity]
nageru/experiments/queue_drop_policy.cpp [moved from experiments/queue_drop_policy.cpp with 100% similarity]
nageru/ffmpeg_capture.cpp [moved from ffmpeg_capture.cpp with 99% similarity]
nageru/ffmpeg_capture.h [moved from ffmpeg_capture.h with 99% similarity]
nageru/ffmpeg_util.cpp [moved from ffmpeg_util.cpp with 100% similarity]
nageru/ffmpeg_util.h [moved from ffmpeg_util.h with 100% similarity]
nageru/filter.cpp [moved from filter.cpp with 100% similarity]
nageru/filter.h [moved from filter.h with 100% similarity]
nageru/flags.cpp [moved from flags.cpp with 100% similarity]
nageru/flags.h [moved from flags.h with 100% similarity]
nageru/glwidget.cpp [moved from glwidget.cpp with 99% similarity]
nageru/glwidget.h [moved from glwidget.h with 100% similarity]
nageru/image_input.cpp [moved from image_input.cpp with 99% similarity]
nageru/image_input.h [moved from image_input.h with 100% similarity]
nageru/input_mapping.cpp [moved from input_mapping.cpp with 100% similarity]
nageru/input_mapping.h [moved from input_mapping.h with 100% similarity]
nageru/input_mapping.ui [moved from input_mapping.ui with 100% similarity]
nageru/input_mapping_dialog.cpp [moved from input_mapping_dialog.cpp with 99% similarity]
nageru/input_mapping_dialog.h [moved from input_mapping_dialog.h with 100% similarity]
nageru/input_state.h [moved from input_state.h with 100% similarity]
nageru/json.proto [moved from json.proto with 100% similarity]
nageru/kaeru.cpp [moved from kaeru.cpp with 97% similarity]
nageru/lrameter.cpp [moved from lrameter.cpp with 100% similarity]
nageru/lrameter.h [moved from lrameter.h with 100% similarity]
nageru/main.cpp [moved from main.cpp with 98% similarity]
nageru/mainwindow.cpp [moved from mainwindow.cpp with 99% similarity]
nageru/mainwindow.h [moved from mainwindow.h with 100% similarity]
nageru/mainwindow.ui [moved from mainwindow.ui with 100% similarity]
nageru/meson.build [new file with mode: 0644]
nageru/midi_mapper.cpp [moved from midi_mapper.cpp with 100% similarity]
nageru/midi_mapper.h [moved from midi_mapper.h with 100% similarity]
nageru/midi_mapping.proto [moved from midi_mapping.proto with 100% similarity]
nageru/midi_mapping.ui [moved from midi_mapping.ui with 100% similarity]
nageru/midi_mapping_dialog.cpp [moved from midi_mapping_dialog.cpp with 99% similarity]
nageru/midi_mapping_dialog.h [moved from midi_mapping_dialog.h with 100% similarity]
nageru/mixer.cpp [moved from mixer.cpp with 99% similarity]
nageru/mixer.h [moved from mixer.h with 99% similarity]
nageru/mjpeg_encoder.cpp [moved from mjpeg_encoder.cpp with 99% similarity]
nageru/mjpeg_encoder.h [moved from mjpeg_encoder.h with 99% similarity]
nageru/nageru_cef_app.cpp [moved from nageru_cef_app.cpp with 100% similarity]
nageru/nageru_cef_app.h [moved from nageru_cef_app.h with 100% similarity]
nageru/nonlinear_fader.cpp [moved from nonlinear_fader.cpp with 100% similarity]
nageru/nonlinear_fader.h [moved from nonlinear_fader.h with 100% similarity]
nageru/patches/zita-resampler-sse.diff [moved from patches/zita-resampler-sse.diff with 100% similarity]
nageru/pbo_frame_allocator.cpp [moved from pbo_frame_allocator.cpp with 100% similarity]
nageru/pbo_frame_allocator.h [moved from pbo_frame_allocator.h with 100% similarity]
nageru/piecewise_interpolator.cpp [moved from piecewise_interpolator.cpp with 100% similarity]
nageru/piecewise_interpolator.h [moved from piecewise_interpolator.h with 100% similarity]
nageru/print_latency.cpp [moved from print_latency.cpp with 99% similarity]
nageru/print_latency.h [moved from print_latency.h with 97% similarity]
nageru/quicksync_encoder.cpp [moved from quicksync_encoder.cpp with 99% similarity]
nageru/quicksync_encoder.h [moved from quicksync_encoder.h with 98% similarity]
nageru/quicksync_encoder_impl.h [moved from quicksync_encoder_impl.h with 99% similarity]
nageru/quittable_sleeper.h [moved from quittable_sleeper.h with 100% similarity]
nageru/ref.raw [moved from ref.raw with 100% similarity]
nageru/ref_counted_frame.cpp [moved from ref_counted_frame.cpp with 100% similarity]
nageru/ref_counted_frame.h [moved from ref_counted_frame.h with 100% similarity]
nageru/resampling_queue.cpp [moved from resampling_queue.cpp with 100% similarity]
nageru/resampling_queue.h [moved from resampling_queue.h with 100% similarity]
nageru/scripts/compile_cef_dll_wrapper.sh [moved from scripts/compile_cef_dll_wrapper.sh with 100% similarity]
nageru/scripts/setup_nageru_symlink.sh [moved from scripts/setup_nageru_symlink.sh with 100% similarity]
nageru/simple.lua [moved from simple.lua with 100% similarity]
nageru/state.proto [moved from state.proto with 100% similarity]
nageru/stereocompressor.cpp [moved from stereocompressor.cpp with 100% similarity]
nageru/stereocompressor.h [moved from stereocompressor.h with 100% similarity]
nageru/theme.cpp [moved from theme.cpp with 100% similarity]
nageru/theme.h [moved from theme.h with 100% similarity]
nageru/theme.lua [moved from theme.lua with 100% similarity]
nageru/timecode.frag [new file with mode: 0644]
nageru/timecode.vert [new file with mode: 0644]
nageru/timecode_10bit.frag [new file with mode: 0644]
nageru/timecode_renderer.cpp [moved from timecode_renderer.cpp with 80% similarity]
nageru/timecode_renderer.h [moved from timecode_renderer.h with 100% similarity]
nageru/tweaked_inputs.cpp [moved from tweaked_inputs.cpp with 100% similarity]
nageru/tweaked_inputs.h [moved from tweaked_inputs.h with 100% similarity]
nageru/uyvy_subsample.frag [new file with mode: 0644]
nageru/uyvy_subsample.vert [new file with mode: 0644]
nageru/v210_converter.cpp [moved from v210_converter.cpp with 100% similarity]
nageru/v210_converter.h [moved from v210_converter.h with 100% similarity]
nageru/v210_subsample.comp [new file with mode: 0644]
nageru/video_encoder.cpp [moved from video_encoder.cpp with 97% similarity]
nageru/video_encoder.h [moved from video_encoder.h with 98% similarity]
nageru/vu_common.cpp [moved from vu_common.cpp with 100% similarity]
nageru/vu_common.h [moved from vu_common.h with 100% similarity]
nageru/vumeter.cpp [moved from vumeter.cpp with 100% similarity]
nageru/vumeter.h [moved from vumeter.h with 100% similarity]
nageru/x264_dynamic.cpp [moved from x264_dynamic.cpp with 100% similarity]
nageru/x264_dynamic.h [moved from x264_dynamic.h with 100% similarity]
nageru/x264_encoder.cpp [moved from x264_encoder.cpp with 99% similarity]
nageru/x264_encoder.h [moved from x264_encoder.h with 99% similarity]
nageru/x264_speed_control.cpp [moved from x264_speed_control.cpp with 99% similarity]
nageru/x264_speed_control.h [moved from x264_speed_control.h with 99% similarity]
nageru/ycbcr_interpretation.h [moved from ycbcr_interpretation.h with 100% similarity]
shared/bin2h.cpp [new file with mode: 0644]
shared/context.cpp [moved from context.cpp with 70% similarity]
shared/context.h [new file with mode: 0644]
shared/disk_space_estimator.cpp [moved from disk_space_estimator.cpp with 65% similarity]
shared/disk_space_estimator.h [moved from disk_space_estimator.h with 61% similarity]
shared/ffmpeg_raii.cpp [moved from ffmpeg_raii.cpp with 100% similarity]
shared/ffmpeg_raii.h [moved from ffmpeg_raii.h with 100% similarity]
shared/httpd.cpp [moved from httpd.cpp with 95% similarity]
shared/httpd.h [moved from httpd.h with 95% similarity]
shared/memcpy_interleaved.cpp [moved from memcpy_interleaved.cpp with 95% similarity]
shared/memcpy_interleaved.h [moved from memcpy_interleaved.h with 100% similarity]
shared/meson.build [new file with mode: 0644]
shared/metacube2.cpp [moved from metacube2.cpp with 100% similarity]
shared/metacube2.h [moved from metacube2.h with 100% similarity]
shared/metrics.cpp [moved from metrics.cpp with 99% similarity]
shared/metrics.h [moved from metrics.h with 100% similarity]
shared/mux.cpp [moved from mux.cpp with 88% similarity]
shared/mux.h [moved from mux.h with 87% similarity]
shared/post_to_main_thread.h [moved from post_to_main_thread.h with 100% similarity]
shared/read_file.cpp [new file with mode: 0644]
shared/read_file.h [new file with mode: 0644]
shared/ref_counted_gl_sync.h [moved from ref_counted_gl_sync.h with 95% similarity]
shared/shared_defs.h [new file with mode: 0644]
shared/timebase.h [moved from timebase.h with 93% similarity]

index 2416a678e7d3f25ea206b6982136c2f49dcbb1f1..c0b5588694a5fad6b6192b6aa67a6ed2226b7d32 100644 (file)
@@ -1 +1,2 @@
 obj/
+.ycm_extra_conf.py
index 5a478772ee8b3a953e5e3fd22796a53993d0d019..3e64f4b0fdc3262eddf6a36cd8f5a0411db31744 100644 (file)
@@ -1,3 +1,3 @@
 [submodule "bmusb"]
-       path = bmusb
+       path = nageru/bmusb
        url = http://git.sesse.net/bmusb
diff --git a/NEWS b/NEWS
index 714e80c555c5da1bdf86b9e5f3a849270bfa84a9..309c9d8dc3b8540e97a6d80c49481a33489e3607 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,15 @@
+Nageru 1.7.5, November 11th, 2018
+
+  - Fix a bug where --record-x264-video would not work when VA-API was
+    not present, making the option rather useless (broken in 1.7.2).
+    Bug reported by Peter De Schrijver.
+
+  - The build system has been switched to Meson; see the README for new
+    build instructions.
+
+  - Various smaller fixes.
+
+
 Nageru 1.7.4, August 31st, 2018
 
   - Rework the x264 speedcontrol presets, again. (They earlier assumed
diff --git a/README b/README
index e02ea23d39cd9668be1e04689894aaf58b866f4a..5c899114b3d95ba2b8d4db37af2e151bba68d167 100644 (file)
--- a/README
+++ b/README
@@ -117,7 +117,7 @@ Nageru uses Meson to build. For a default build, type
 
   meson obj && cd obj && ninja
 
-To start it, just hook up your equipment, and then type “./nageru”.
+To start it, just hook up your equipment, and then type “cd nageru && ./nageru”.
 
 It is strongly recommended to have the rights to run at real-time priority;
 it will make the USB3 threads do so, which will make them a lot more stable.
diff --git a/bmusb b/bmusb
deleted file mode 160000 (submodule)
index e9aa80b..0000000
--- a/bmusb
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit e9aa80b8e6a09cf2c8b3458a1c395b6f1b6a58cc
diff --git a/futatabi/add_base_flow.frag b/futatabi/add_base_flow.frag
new file mode 100644 (file)
index 0000000..ac56101
--- /dev/null
@@ -0,0 +1,11 @@
+#version 450 core
+
+in vec3 tc;
+out vec2 diff_flow;
+
+uniform sampler2DArray diff_flow_tex;
+
+void main()
+{
+       diff_flow = texture(diff_flow_tex, tc).xy;
+}
diff --git a/futatabi/blend.frag b/futatabi/blend.frag
new file mode 100644 (file)
index 0000000..eb3fc80
--- /dev/null
@@ -0,0 +1,49 @@
+#version 450 core
+
+in vec3 tc;
+
+#ifdef SPLIT_YCBCR_OUTPUT
+out float Y;
+out vec2 CbCr;
+#else
+out vec4 rgba;
+#endif
+
+uniform sampler2DArray image_tex;
+uniform sampler2D flow_tex;
+uniform float alpha;
+
+void main()
+{
+       vec2 flow = texture(flow_tex, tc.xy).xy;
+       vec4 I_0 = texture(image_tex, vec3(tc.xy - alpha * flow, 0));
+       vec4 I_1 = texture(image_tex, vec3(tc.xy + (1.0f - alpha) * flow, 1));
+
+       // Occlusion reasoning:
+
+       vec2 size = textureSize(image_tex, 0).xy;
+
+       // Follow the flow back to the initial point (where we sample I_0 from), then forward again.
+       // See how well we match the point we started at, which is out flow consistency.
+       float d0 = alpha * length(size * (texture(flow_tex, vec2(tc.xy - alpha * flow)).xy - flow));
+
+       // Same for d1.
+       float d1 = (1.0f - alpha) * length(size * (texture(flow_tex, vec2(tc.xy + (1.0f - alpha) * flow)).xy - flow));
+
+       vec4 result;
+       if (max(d0, d1) < 3.0f) {  // Arbitrary constant, not all that tuned. The UW paper says 1.0 is fine for ground truth.
+               // Both are visible, so blend.
+               result = I_0 + alpha * (I_1 - I_0);
+       } else if (d0 < d1) {
+               result = I_0;
+       } else {
+               result = I_1;
+       }
+
+#ifdef SPLIT_YCBCR_OUTPUT
+       Y = result.r;
+       CbCr = result.gb;
+#else
+       rgba = result;
+#endif
+}
diff --git a/futatabi/chroma_subsample.frag b/futatabi/chroma_subsample.frag
new file mode 100644 (file)
index 0000000..9a4155f
--- /dev/null
@@ -0,0 +1,10 @@
+#version 450 core
+in vec2 tc0, tc1;
+uniform sampler2D cbcr_tex;
+out float Cb, Cr;
+void main() {
+       vec2 result = 0.5 * (texture(cbcr_tex, tc0).rg + texture(cbcr_tex, tc1).rg);
+       Cb = result.r;
+       Cr = result.g;
+}
+
diff --git a/futatabi/chroma_subsample.vert b/futatabi/chroma_subsample.vert
new file mode 100644 (file)
index 0000000..81e1004
--- /dev/null
@@ -0,0 +1,21 @@
+#version 450 core
+
+layout(location=0) in vec2 position;
+out vec2 tc0, tc1;
+uniform vec2 chroma_offset_0;
+uniform vec2 chroma_offset_1;
+
+void main()
+{
+       // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+       //
+       //   2.000  0.000  0.000 -1.000
+       //   0.000  2.000  0.000 -1.000
+       //   0.000  0.000 -2.000 -1.000
+       //   0.000  0.000  0.000  1.000
+       gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
+       vec2 flipped_tc = position;
+       tc0 = flipped_tc + chroma_offset_0;
+       tc1 = flipped_tc + chroma_offset_1;
+}
+
diff --git a/futatabi/chroma_subsampler.cpp b/futatabi/chroma_subsampler.cpp
new file mode 100644 (file)
index 0000000..d064bc7
--- /dev/null
@@ -0,0 +1,122 @@
+#include "chroma_subsampler.h"
+
+#include <movit/util.h>
+#include <string>
+
+#include "embedded_files.h"
+
+#define BUFFER_OFFSET(i) ((char *)nullptr + (i))
+
+using namespace std;
+
+string read_file(const string &filename, const unsigned char *start = nullptr, const size_t size = 0);
+GLuint compile_shader(const string &shader_src, GLenum type);
+GLuint link_program(GLuint vs_obj, GLuint fs_obj);
+void bind_sampler(GLuint program, GLint location, GLuint texture_unit, GLuint tex, GLuint sampler);
+
+extern GLuint linear_sampler;
+
+ChromaSubsampler::ChromaSubsampler()
+{
+       // Set up stuff for 4:2:2 conversion.
+       //
+       // Note: Due to the horizontally co-sited chroma/luma samples in H.264
+       // (chroma position is left for horizontal),
+       // we need to be a bit careful in our subsampling. A diagram will make
+       // this clearer, showing some luma and chroma samples:
+       //
+       //     a   b   c   d
+       //   +---+---+---+---+
+       //   |   |   |   |   |
+       //   | Y | Y | Y | Y |
+       //   |   |   |   |   |
+       //   +---+---+---+---+
+       //
+       // +-------+-------+
+       // |       |       |
+       // |   C   |   C   |
+       // |       |       |
+       // +-------+-------+
+       //
+       // Clearly, the rightmost chroma sample here needs to be equivalent to
+       // b/4 + c/2 + d/4. (We could also implement more sophisticated filters,
+       // of course, but as long as the upsampling is not going to be equally
+       // sophisticated, it's probably not worth it.) If we sample once with
+       // no mipmapping, we get just c, ie., no actual filtering in the
+       // horizontal direction. (For the vertical direction, we can just
+       // sample in the middle to get the right filtering.) One could imagine
+       // we could use mipmapping (assuming we can create mipmaps cheaply),
+       // but then, what we'd get is this:
+       //
+       //    (a+b)/2 (c+d)/2
+       //   +-------+-------+
+       //   |       |       |
+       //   |   Y   |   Y   |
+       //   |       |       |
+       //   +-------+-------+
+       //
+       // +-------+-------+
+       // |       |       |
+       // |   C   |   C   |
+       // |       |       |
+       // +-------+-------+
+       //
+       // which ends up sampling equally from a and b, which clearly isn't right. Instead,
+       // we need to do two (non-mipmapped) chroma samples, both hitting exactly in-between
+       // source pixels.
+       //
+       // Sampling in-between b and c gives us the sample (b+c)/2, and similarly for c and d.
+       // Taking the average of these gives of (b+c)/4 + (c+d)/4 = b/4 + c/2 + d/4, which is
+       // exactly what we want.
+       //
+       // See also http://www.poynton.com/PDFs/Merging_RGB_and_422.pdf, pages 6–7.
+
+       cbcr_vs_obj = compile_shader(read_file("chroma_subsample.vert", _binary_chroma_subsample_vert_data, _binary_chroma_subsample_vert_size), GL_VERTEX_SHADER);
+       cbcr_fs_obj = compile_shader(read_file("chroma_subsample.frag", _binary_chroma_subsample_frag_data, _binary_chroma_subsample_frag_size), GL_FRAGMENT_SHADER);
+       cbcr_program = link_program(cbcr_vs_obj, cbcr_fs_obj);
+
+       // Set up the VAO containing all the required position data.
+       glCreateVertexArrays(1, &vao);
+       glBindVertexArray(vao);
+
+       float vertices[] = {
+               0.0f, 2.0f,
+               0.0f, 0.0f,
+               2.0f, 0.0f
+       };
+       glCreateBuffers(1, &vbo);
+       glNamedBufferData(vbo, sizeof(vertices), vertices, GL_STATIC_DRAW);
+       glBindBuffer(GL_ARRAY_BUFFER, vbo);
+
+       GLint position_attrib = 0;  // Hard-coded in every vertex shader.
+       glEnableVertexArrayAttrib(vao, position_attrib);
+       glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+       uniform_cbcr_tex = glGetUniformLocation(cbcr_program, "cbcr_tex");
+       uniform_chroma_offset_0 = glGetUniformLocation(cbcr_program, "chroma_offset_0");
+       uniform_chroma_offset_1 = glGetUniformLocation(cbcr_program, "chroma_offset_1");
+}
+
+ChromaSubsampler::~ChromaSubsampler()
+{
+       glDeleteProgram(cbcr_program);
+       check_error();
+       glDeleteBuffers(1, &vbo);
+       check_error();
+       glDeleteVertexArrays(1, &vao);
+       check_error();
+}
+
+void ChromaSubsampler::subsample_chroma(GLuint cbcr_tex, unsigned width, unsigned height, GLuint cb_tex, GLuint cr_tex)
+{
+       glUseProgram(cbcr_program);
+       bind_sampler(cbcr_program, uniform_cbcr_tex, 0, cbcr_tex, linear_sampler);
+       glProgramUniform2f(cbcr_program, uniform_chroma_offset_0, -1.0f / width, 0.0f);
+       glProgramUniform2f(cbcr_program, uniform_chroma_offset_1, -0.0f / width, 0.0f);
+
+       glViewport(0, 0, width / 2, height);
+       fbos.render_to(cb_tex, cr_tex);
+
+       glBindVertexArray(vao);
+       glDrawArrays(GL_TRIANGLES, 0, 3);
+}
diff --git a/futatabi/chroma_subsampler.h b/futatabi/chroma_subsampler.h
new file mode 100644 (file)
index 0000000..ec43fe0
--- /dev/null
@@ -0,0 +1,29 @@
+#ifndef _CHROMA_SUBSAMPLER_H
+#define _CHROMA_SUBSAMPLER_H 1
+
+#include "flow.h"
+
+#include <epoxy/gl.h>
+
+class ChromaSubsampler {
+public:
+       ChromaSubsampler();
+       ~ChromaSubsampler();
+
+       // Subsamples chroma (packed Cb and Cr) 2x1 to yield chroma suitable for
+       // planar 4:2:2. Chroma positioning is left (H.264 convention).
+       // width and height are the dimensions (in pixels) of the input texture.
+       void subsample_chroma(GLuint cbcr_tex, unsigned width, unsigned height, GLuint cb_tex, GLuint cr_tex);
+
+private:
+       PersistentFBOSet<2> fbos;
+
+       GLuint vao;
+       GLuint vbo;  // Holds position data.
+
+       GLuint cbcr_vs_obj, cbcr_fs_obj, cbcr_program;
+       GLuint uniform_cbcr_tex;
+       GLuint uniform_chroma_offset_0, uniform_chroma_offset_1;
+};
+
+#endif  // !defined(_CHROMA_SUBSAMPLER_H)
diff --git a/futatabi/clip_list.cpp b/futatabi/clip_list.cpp
new file mode 100644 (file)
index 0000000..9ccbb71
--- /dev/null
@@ -0,0 +1,513 @@
+#include "clip_list.h"
+
+#include "mainwindow.h"
+#include "shared/timebase.h"
+#include "ui_mainwindow.h"
+
+#include <math.h>
+#include <string>
+#include <vector>
+
+using namespace std;
+
+string pts_to_string(int64_t pts)
+{
+       int64_t t = lrint((pts / double(TIMEBASE)) * 1e3);  // In milliseconds.
+       int ms = t % 1000;
+       t /= 1000;
+       int sec = t % 60;
+       t /= 60;
+       int min = t % 60;
+       t /= 60;
+       int hour = t;
+
+       char buf[256];
+       snprintf(buf, sizeof(buf), "%d:%02d:%02d.%03d", hour, min, sec, ms);
+       return buf;
+}
+
+string duration_to_string(int64_t pts_diff)
+{
+       int64_t t = lrint((pts_diff / double(TIMEBASE)) * 1e3);  // In milliseconds.
+       int ms = t % 1000;
+       t /= 1000;
+       int sec = t % 60;
+       t /= 60;
+       int min = t;
+
+       char buf[256];
+       snprintf(buf, sizeof(buf), "%d:%02d.%03d", min, sec, ms);
+       return buf;
+}
+
+int ClipList::rowCount(const QModelIndex &parent) const
+{
+       if (parent.isValid())
+               return 0;
+       return clips.size();
+}
+
+int PlayList::rowCount(const QModelIndex &parent) const
+{
+       if (parent.isValid())
+               return 0;
+       return clips.size();
+}
+
+int ClipList::columnCount(const QModelIndex &parent) const
+{
+       if (parent.isValid())
+               return 0;
+       return int(Column::NUM_COLUMNS);
+}
+
+int PlayList::columnCount(const QModelIndex &parent) const
+{
+       if (parent.isValid())
+               return 0;
+       return int(Column::NUM_COLUMNS);
+}
+
+QVariant ClipList::data(const QModelIndex &parent, int role) const
+{
+       if (!parent.isValid())
+               return QVariant();
+       const int row = parent.row(), column = parent.column();
+       if (size_t(row) >= clips.size())
+               return QVariant();
+
+       if (role == Qt::TextAlignmentRole) {
+               switch (Column(column)) {
+               case Column::IN:
+               case Column::OUT:
+               case Column::DURATION:
+                       return Qt::AlignRight + Qt::AlignVCenter;
+               default:
+                       return Qt::AlignLeft + Qt::AlignVCenter;
+               }
+       }
+
+       if (role != Qt::DisplayRole && role != Qt::EditRole)
+               return QVariant();
+
+       switch (Column(column)) {
+       case Column::IN:
+               return QString::fromStdString(pts_to_string(clips[row].pts_in));
+       case Column::OUT:
+               if (clips[row].pts_out >= 0) {
+                       return QString::fromStdString(pts_to_string(clips[row].pts_out));
+               } else {
+                       return QVariant();
+               }
+       case Column::DURATION:
+               if (clips[row].pts_out >= 0) {
+                       return QString::fromStdString(duration_to_string(clips[row].pts_out - clips[row].pts_in));
+               } else {
+                       return QVariant();
+               }
+       case Column::CAMERA_1:
+       case Column::CAMERA_2:
+       case Column::CAMERA_3:
+       case Column::CAMERA_4: {
+               unsigned stream_idx = column - int(Column::CAMERA_1);
+               return QString::fromStdString(clips[row].descriptions[stream_idx]);
+       }
+       default:
+               return "";
+       }
+}
+
+QVariant PlayList::data(const QModelIndex &parent, int role) const
+{
+       if (!parent.isValid())
+               return QVariant();
+       const int row = parent.row(), column = parent.column();
+       if (size_t(row) >= clips.size())
+               return QVariant();
+
+       if (role == Qt::TextAlignmentRole) {
+               switch (Column(column)) {
+               case Column::PLAYING:
+                       return Qt::AlignCenter;
+               case Column::IN:
+               case Column::OUT:
+               case Column::DURATION:
+               case Column::FADE_TIME:
+                       return Qt::AlignRight + Qt::AlignVCenter;
+               case Column::CAMERA:
+                       return Qt::AlignCenter;
+               default:
+                       return Qt::AlignLeft + Qt::AlignVCenter;
+               }
+       }
+       if (role == Qt::BackgroundRole) {
+               if (Column(column) == Column::PLAYING) {
+                       auto it = current_progress.find(row);
+                       if (it != current_progress.end()) {
+                               double play_progress = it->second;
+
+                               // This only really works well for the first column, for whatever odd Qt reason.
+                               QLinearGradient grad(QPointF(0, 0), QPointF(1, 0));
+                               grad.setCoordinateMode(grad.QGradient::ObjectBoundingMode);
+                               grad.setColorAt(0.0f, QColor::fromRgbF(0.0f, 0.0f, 1.0f, 0.2f));
+                               grad.setColorAt(play_progress, QColor::fromRgbF(0.0f, 0.0f, 1.0f, 0.2f));
+                               if (play_progress + 0.01f <= 1.0f) {
+                                       grad.setColorAt(play_progress + 0.01f, QColor::fromRgbF(0.0f, 0.0f, 1.0f, 0.0f));
+                               }
+                               return QBrush(grad);
+                       } else {
+                               return QVariant();
+                       }
+               } else {
+                       return QVariant();
+               }
+       }
+
+       if (role != Qt::DisplayRole && role != Qt::EditRole)
+               return QVariant();
+
+       switch (Column(column)) {
+       case Column::PLAYING:
+               return current_progress.count(row) ? "→" : "";
+       case Column::IN:
+               return QString::fromStdString(pts_to_string(clips[row].pts_in));
+       case Column::OUT:
+               if (clips[row].pts_out >= 0) {
+                       return QString::fromStdString(pts_to_string(clips[row].pts_out));
+               } else {
+                       return QVariant();
+               }
+       case Column::DURATION:
+               if (clips[row].pts_out >= 0) {
+                       return QString::fromStdString(duration_to_string(clips[row].pts_out - clips[row].pts_in));
+               } else {
+                       return QVariant();
+               }
+       case Column::CAMERA:
+               return qlonglong(clips[row].stream_idx + 1);
+       case Column::DESCRIPTION:
+               return QString::fromStdString(clips[row].descriptions[clips[row].stream_idx]);
+       case Column::FADE_TIME: {
+               stringstream ss;
+               ss.imbue(locale("C"));
+               ss.precision(3);
+               ss << fixed << clips[row].fade_time_seconds;
+               return QString::fromStdString(ss.str());
+       }
+       default:
+               return "";
+       }
+}
+
+QVariant ClipList::headerData(int section, Qt::Orientation orientation, int role) const
+{
+       if (role != Qt::DisplayRole)
+               return QVariant();
+       if (orientation != Qt::Horizontal)
+               return QVariant();
+
+       switch (Column(section)) {
+       case Column::IN:
+               return "In";
+       case Column::OUT:
+               return "Out";
+       case Column::DURATION:
+               return "Duration";
+       case Column::CAMERA_1:
+               return "Camera 1";
+       case Column::CAMERA_2:
+               return "Camera 2";
+       case Column::CAMERA_3:
+               return "Camera 3";
+       case Column::CAMERA_4:
+               return "Camera 4";
+       default:
+               return "";
+       }
+}
+
+QVariant PlayList::headerData(int section, Qt::Orientation orientation, int role) const
+{
+       if (role != Qt::DisplayRole)
+               return QVariant();
+       if (orientation != Qt::Horizontal)
+               return QVariant();
+
+       switch (Column(section)) {
+       case Column::PLAYING:
+               return "";
+       case Column::IN:
+               return "In";
+       case Column::OUT:
+               return "Out";
+       case Column::DURATION:
+               return "Duration";
+       case Column::CAMERA:
+               return "Camera";
+       case Column::DESCRIPTION:
+               return "Description";
+       case Column::FADE_TIME:
+               return "Fade time";
+       default:
+               return "";
+       }
+}
+
+Qt::ItemFlags ClipList::flags(const QModelIndex &index) const
+{
+       if (!index.isValid())
+               return Qt::ItemIsEnabled | Qt::ItemIsSelectable;
+       const int row = index.row(), column = index.column();
+       if (size_t(row) >= clips.size())
+               return Qt::ItemIsEnabled | Qt::ItemIsSelectable;
+
+       switch (Column(column)) {
+       case Column::CAMERA_1:
+       case Column::CAMERA_2:
+       case Column::CAMERA_3:
+       case Column::CAMERA_4:
+               return Qt::ItemIsEnabled | Qt::ItemIsSelectable | Qt::ItemIsEditable | Qt::ItemIsDragEnabled;
+       default:
+               return Qt::ItemIsEnabled | Qt::ItemIsSelectable;
+       }
+}
+
+Qt::ItemFlags PlayList::flags(const QModelIndex &index) const
+{
+       if (!index.isValid())
+               return Qt::ItemIsEnabled | Qt::ItemIsSelectable;
+       const int row = index.row(), column = index.column();
+       if (size_t(row) >= clips.size())
+               return Qt::ItemIsEnabled | Qt::ItemIsSelectable;
+
+       switch (Column(column)) {
+       case Column::DESCRIPTION:
+       case Column::CAMERA:
+       case Column::FADE_TIME:
+               return Qt::ItemIsEnabled | Qt::ItemIsSelectable | Qt::ItemIsEditable;
+               return Qt::ItemIsEnabled | Qt::ItemIsSelectable | Qt::ItemIsEditable;
+       default:
+               return Qt::ItemIsEnabled | Qt::ItemIsSelectable;
+       }
+}
+
+bool ClipList::setData(const QModelIndex &index, const QVariant &value, int role)
+{
+       if (!index.isValid() || role != Qt::EditRole) {
+               return false;
+       }
+
+       const int row = index.row(), column = index.column();
+       if (size_t(row) >= clips.size())
+               return false;
+
+       switch (Column(column)) {
+       case Column::CAMERA_1:
+       case Column::CAMERA_2:
+       case Column::CAMERA_3:
+       case Column::CAMERA_4: {
+               unsigned stream_idx = column - int(Column::CAMERA_1);
+               clips[row].descriptions[stream_idx] = value.toString().toStdString();
+               emit_data_changed(row);
+               return true;
+       }
+       default:
+               return false;
+       }
+}
+
+bool PlayList::setData(const QModelIndex &index, const QVariant &value, int role)
+{
+       if (!index.isValid() || role != Qt::EditRole) {
+               return false;
+       }
+
+       const int row = index.row(), column = index.column();
+       if (size_t(row) >= clips.size())
+               return false;
+
+       switch (Column(column)) {
+       case Column::DESCRIPTION:
+               clips[row].descriptions[clips[row].stream_idx] = value.toString().toStdString();
+               emit_data_changed(row);
+               return true;
+       case Column::CAMERA: {
+               bool ok;
+               int camera_idx = value.toInt(&ok);
+               if (!ok || camera_idx < 1 || camera_idx > NUM_CAMERAS) {
+                       return false;
+               }
+               clips[row].stream_idx = camera_idx - 1;
+               emit_data_changed(row);
+               return true;
+       }
+       case Column::FADE_TIME: {
+               bool ok;
+               double val = value.toDouble(&ok);
+               if (!ok || !(val >= 0.0)) {
+                       return false;
+               }
+               clips[row].fade_time_seconds = val;
+               emit_data_changed(row);
+               return true;
+       }
+       default:
+               return false;
+       }
+}
+
+void ClipList::add_clip(const Clip &clip)
+{
+       beginInsertRows(QModelIndex(), clips.size(), clips.size());
+       clips.push_back(clip);
+       endInsertRows();
+       emit any_content_changed();
+}
+
+void PlayList::add_clip(const Clip &clip)
+{
+       beginInsertRows(QModelIndex(), clips.size(), clips.size());
+       clips.push_back(clip);
+       endInsertRows();
+       emit any_content_changed();
+}
+
+void PlayList::duplicate_clips(size_t first, size_t last)
+{
+       beginInsertRows(QModelIndex(), first, last);
+       clips.insert(clips.begin() + first, clips.begin() + first, clips.begin() + last + 1);
+       endInsertRows();
+       emit any_content_changed();
+}
+
+void PlayList::erase_clips(size_t first, size_t last)
+{
+       beginRemoveRows(QModelIndex(), first, last);
+       clips.erase(clips.begin() + first, clips.begin() + last + 1);
+       endRemoveRows();
+       emit any_content_changed();
+}
+
+void PlayList::move_clips(size_t first, size_t last, int delta)
+{
+       if (delta == -1) {
+               beginMoveRows(QModelIndex(), first, last, QModelIndex(), first - 1);
+               rotate(clips.begin() + first - 1, clips.begin() + first, clips.begin() + last + 1);
+       } else {
+               beginMoveRows(QModelIndex(), first, last, QModelIndex(), first + (last - first + 1) + 1);
+               first = clips.size() - first - 1;
+               last = clips.size() - last - 1;
+               rotate(clips.rbegin() + last - 1, clips.rbegin() + last, clips.rbegin() + first + 1);
+       }
+       endMoveRows();
+       emit any_content_changed();
+}
+
+void ClipList::emit_data_changed(size_t row)
+{
+       emit dataChanged(index(row, 0), index(row, int(Column::NUM_COLUMNS)));
+       emit any_content_changed();
+}
+
+void PlayList::emit_data_changed(size_t row)
+{
+       emit dataChanged(index(row, 0), index(row, int(Column::NUM_COLUMNS)));
+       emit any_content_changed();
+}
+
+void PlayList::set_currently_playing(int index, double progress)
+{
+       int old_index = currently_playing_index;
+       int column = int(Column::PLAYING);
+       if (index != old_index) {
+               currently_playing_index = index;
+               play_progress = progress;
+               if (old_index != -1) {
+                       emit dataChanged(this->index(old_index, column), this->index(old_index, column));
+               }
+               if (index != -1) {
+                       emit dataChanged(this->index(index, column), this->index(index, column));
+               }
+       } else if (index != -1 && fabs(progress - play_progress) > 1e-3) {
+               play_progress = progress;
+               emit dataChanged(this->index(index, column), this->index(index, column));
+       }
+}
+
+void PlayList::set_progress(const map<size_t, double> &progress)
+{
+       const int column = int(Column::PLAYING);
+       map<size_t, double> old_progress = move(this->current_progress);
+       this->current_progress = progress;
+
+       for (auto it : old_progress) {
+               size_t index = it.first;
+               if (current_progress.count(index) == 0) {
+                       emit dataChanged(this->index(index, column), this->index(index, column));
+               }
+       }
+       for (auto it : current_progress) {
+               size_t index = it.first;
+               emit dataChanged(this->index(index, column), this->index(index, column));
+       }
+}
+
+namespace {
+
+Clip deserialize_clip(const ClipProto &clip_proto)
+{
+       Clip clip;
+       clip.pts_in = clip_proto.pts_in();
+       clip.pts_out = clip_proto.pts_out();
+       for (int camera_idx = 0; camera_idx < min(clip_proto.description_size(), NUM_CAMERAS); ++camera_idx) {
+               clip.descriptions[camera_idx] = clip_proto.description(camera_idx);
+       }
+       clip.stream_idx = clip_proto.stream_idx();
+       clip.fade_time_seconds = clip_proto.fade_time_seconds();
+       return clip;
+}
+
+void serialize_clip(const Clip &clip, ClipProto *clip_proto)
+{
+       clip_proto->set_pts_in(clip.pts_in);
+       clip_proto->set_pts_out(clip.pts_out);
+       for (int camera_idx = 0; camera_idx < NUM_CAMERAS; ++camera_idx) {
+               *clip_proto->add_description() = clip.descriptions[camera_idx];
+       }
+       clip_proto->set_stream_idx(clip.stream_idx);
+       clip_proto->set_fade_time_seconds(clip.fade_time_seconds);
+}
+
+}  // namespace
+
+ClipList::ClipList(const ClipListProto &serialized)
+{
+       for (const ClipProto &clip_proto : serialized.clip()) {
+               clips.push_back(deserialize_clip(clip_proto));
+       }
+}
+
+ClipListProto ClipList::serialize() const
+{
+       ClipListProto ret;
+       for (const Clip &clip : clips) {
+               serialize_clip(clip, ret.add_clip());
+       }
+       return ret;
+}
+
+PlayList::PlayList(const ClipListProto &serialized)
+{
+       for (const ClipProto &clip_proto : serialized.clip()) {
+               clips.push_back(deserialize_clip(clip_proto));
+       }
+}
+
+ClipListProto PlayList::serialize() const
+{
+       ClipListProto ret;
+       for (const Clip &clip : clips) {
+               serialize_clip(clip, ret.add_clip());
+       }
+       return ret;
+}
diff --git a/futatabi/clip_list.h b/futatabi/clip_list.h
new file mode 100644 (file)
index 0000000..8dead83
--- /dev/null
@@ -0,0 +1,152 @@
+#ifndef _CLIP_LIST_H
+#define _CLIP_LIST_H 1
+
+#include "defs.h"
+#include "state.pb.h"
+
+#include <QAbstractTableModel>
+#include <stdint.h>
+#include <map>
+#include <string>
+#include <vector>
+
+struct Clip {
+       int64_t pts_in = -1, pts_out = -1;  // pts_in is inclusive, pts_out is exclusive.
+       std::string descriptions[NUM_CAMERAS];
+       unsigned stream_idx = 0;  // For the playlist only.
+       double fade_time_seconds = 0.5;  // For the playlist only.
+};
+
+class DataChangedReceiver {
+public:
+       virtual ~DataChangedReceiver() {}
+       virtual void emit_data_changed(size_t row) = 0;
+};
+
+// Like a smart pointer to a Clip, but emits dataChanged when it goes out of scope.
+struct ClipProxy {
+public:
+       ClipProxy(Clip &clip, DataChangedReceiver *clip_list, size_t row)
+               : clip(clip), clip_list(clip_list), row(row) {}
+       ~ClipProxy()
+       {
+               if (clip_list != nullptr) {
+                       clip_list->emit_data_changed(row);
+               }
+       }
+       Clip *operator->() { return &clip; }
+       Clip &operator*() { return clip; }
+
+private:
+       Clip &clip;
+       DataChangedReceiver *clip_list;
+       size_t row;
+};
+
+class ClipList : public QAbstractTableModel, public DataChangedReceiver {
+       Q_OBJECT
+
+public:
+       explicit ClipList(const ClipListProto &serialized);
+
+       enum class Column {
+               IN,
+               OUT,
+               DURATION,
+               CAMERA_1,
+               CAMERA_2,
+               CAMERA_3,
+               CAMERA_4,
+               NUM_COLUMNS
+       };
+
+       int rowCount(const QModelIndex &parent) const override;
+       int columnCount(const QModelIndex &parent) const override;
+       QVariant data(const QModelIndex &parent, int role) const override;
+       QVariant headerData(int section, Qt::Orientation orientation, int role = Qt::DisplayRole) const override;
+       Qt::ItemFlags flags(const QModelIndex &index) const override;
+       bool setData(const QModelIndex &index, const QVariant &value, int role = Qt::EditRole) override;
+
+       void add_clip(const Clip &clip);
+       size_t size() const { return clips.size(); }
+       bool empty() const { return clips.empty(); }
+
+       ClipProxy mutable_clip(size_t index) { return ClipProxy(clips[index], this, index); }
+       const Clip *clip(size_t index) const { return &clips[index]; }
+
+       ClipProxy mutable_back() { return mutable_clip(size() - 1); }
+       const Clip *back() const { return clip(size() - 1); }
+
+       ClipListProto serialize() const;
+
+       void emit_data_changed(size_t row) override;
+
+signals:
+       void any_content_changed();
+
+private:
+       std::vector<Clip> clips;
+};
+
+class PlayList : public QAbstractTableModel, public DataChangedReceiver {
+       Q_OBJECT
+
+public:
+       explicit PlayList(const ClipListProto &serialized);
+
+       enum class Column {
+               PLAYING,
+               IN,
+               OUT,
+               DURATION,
+               CAMERA,
+               DESCRIPTION,
+               FADE_TIME,
+               NUM_COLUMNS
+       };
+
+       int rowCount(const QModelIndex &parent) const override;
+       int columnCount(const QModelIndex &parent) const override;
+       QVariant data(const QModelIndex &parent, int role) const override;
+       QVariant headerData(int section, Qt::Orientation orientation, int role = Qt::DisplayRole) const override;
+       Qt::ItemFlags flags(const QModelIndex &index) const override;
+       bool setData(const QModelIndex &index, const QVariant &value, int role = Qt::EditRole) override;
+
+       void add_clip(const Clip &clip);
+
+       // <last> is inclusive in all of these.
+       void duplicate_clips(size_t first, size_t last);
+       void erase_clips(size_t first, size_t last);
+       // <delta> is -1 to move upwards, +1 to move downwards.
+       void move_clips(size_t first, size_t last, int delta);
+
+       size_t size() const { return clips.size(); }
+       bool empty() const { return clips.empty(); }
+
+       ClipProxy mutable_clip(size_t index) { return ClipProxy(clips[index], this, index); }
+       const Clip *clip(size_t index) const { return &clips[index]; }
+
+       ClipProxy mutable_back() { return mutable_clip(size() - 1); }
+       const Clip *back() const { return clip(size() - 1); }
+
+       // TODO: Move these out of PlayList.
+       void set_currently_playing(int index, double progress);  // -1 = none.
+       int get_currently_playing() const { return currently_playing_index; }
+
+       void set_progress(const std::map<size_t, double> &progress);
+
+       ClipListProto serialize() const;
+
+       void emit_data_changed(size_t row) override;
+
+signals:
+       void any_content_changed();
+
+private:
+       std::vector<Clip> clips;
+       int currently_playing_index = -1;
+       double play_progress = 0.0;
+       std::map<size_t, double> current_progress;
+};
+
+#endif  // !defined (_CLIP_LIST_H)
diff --git a/futatabi/db.cpp b/futatabi/db.cpp
new file mode 100644 (file)
index 0000000..39fd557
--- /dev/null
@@ -0,0 +1,324 @@
+#include "db.h"
+
+#include "frame.pb.h"
+
+#include <string>
+
+using namespace std;
+
+DB::DB(const string &filename)
+{
+       int ret = sqlite3_open(filename.c_str(), &db);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "%s: %s\n", filename.c_str(), sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       sqlite3_exec(db, R"(
+               CREATE TABLE IF NOT EXISTS state (state BLOB);
+       )", nullptr, nullptr, nullptr);  // Ignore errors.
+
+       sqlite3_exec(db, R"(
+               DROP TABLE file;
+       )", nullptr, nullptr, nullptr);  // Ignore errors.
+
+       sqlite3_exec(db, R"(
+               DROP TABLE frame;
+       )", nullptr, nullptr, nullptr);  // Ignore errors.
+
+       sqlite3_exec(db, R"(
+               CREATE TABLE IF NOT EXISTS filev2 (
+                       file INTEGER NOT NULL PRIMARY KEY,
+                       filename VARCHAR NOT NULL UNIQUE,
+                       size BIGINT NOT NULL,
+                       frames BLOB NOT NULL
+               );
+       )", nullptr, nullptr, nullptr);  // Ignore errors.
+
+       sqlite3_exec(db, "PRAGMA journal_mode=WAL", nullptr, nullptr, nullptr);  // Ignore errors.
+       sqlite3_exec(db, "PRAGMA synchronous=NORMAL", nullptr, nullptr, nullptr);  // Ignore errors.
+}
+
+StateProto DB::get_state()
+{
+       StateProto state;
+
+       sqlite3_stmt *stmt;
+       int ret = sqlite3_prepare_v2(db, "SELECT state FROM state", -1, &stmt, 0);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "SELECT prepare: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       ret = sqlite3_step(stmt);
+       if (ret == SQLITE_ROW) {
+               bool ok = state.ParseFromArray(sqlite3_column_blob(stmt, 0), sqlite3_column_bytes(stmt, 0));
+               if (!ok) {
+                       fprintf(stderr, "State in database is corrupted!\n");
+                       exit(1);
+               }
+       } else if (ret != SQLITE_DONE) {
+               fprintf(stderr, "SELECT step: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       ret = sqlite3_finalize(stmt);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "SELECT finalize: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       return state;
+}
+
+void DB::store_state(const StateProto &state)
+{
+       string serialized;
+       state.SerializeToString(&serialized);
+
+       int ret = sqlite3_exec(db, "BEGIN", nullptr, nullptr, nullptr);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "BEGIN: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       ret = sqlite3_exec(db, "DELETE FROM state", nullptr, nullptr, nullptr);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "DELETE: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       sqlite3_stmt *stmt;
+       ret = sqlite3_prepare_v2(db, "INSERT INTO state VALUES (?)", -1, &stmt, 0);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "INSERT prepare: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       sqlite3_bind_blob(stmt, 1, serialized.data(), serialized.size(), SQLITE_STATIC);
+
+       ret = sqlite3_step(stmt);
+       if (ret == SQLITE_ROW) {
+               fprintf(stderr, "INSERT step: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       ret = sqlite3_finalize(stmt);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "INSERT finalize: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       ret = sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "COMMIT: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+}
+
+vector<DB::FrameOnDiskAndStreamIdx> DB::load_frame_file(const string &filename, size_t size, unsigned filename_idx)
+{
+       FileContentsProto file_contents;
+
+       sqlite3_stmt *stmt;
+       int ret = sqlite3_prepare_v2(db, "SELECT frames FROM filev2 WHERE filename=? AND size=?", -1, &stmt, 0);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "SELECT prepare: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       sqlite3_bind_text(stmt, 1, filename.data(), filename.size(), SQLITE_STATIC);
+       sqlite3_bind_int64(stmt, 2, size);
+
+       ret = sqlite3_step(stmt);
+       if (ret == SQLITE_ROW) {
+               bool ok = file_contents.ParseFromArray(sqlite3_column_blob(stmt, 0), sqlite3_column_bytes(stmt, 0));
+               if (!ok) {
+                       fprintf(stderr, "Frame list in database is corrupted!\n");
+                       exit(1);
+               }
+       } else if (ret != SQLITE_DONE) {
+               fprintf(stderr, "SELECT step: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       ret = sqlite3_finalize(stmt);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "SELECT finalize: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       vector<FrameOnDiskAndStreamIdx> frames;
+       for (const StreamContentsProto &stream : file_contents.stream()) {
+               FrameOnDiskAndStreamIdx frame;
+               frame.stream_idx = stream.stream_idx();
+               for (int i = 0; i < stream.pts_size(); ++i) {
+                       frame.frame.filename_idx = filename_idx;
+                       frame.frame.pts = stream.pts(i);
+                       frame.frame.offset = stream.offset(i);
+                       frame.frame.size = stream.file_size(i);
+                       frames.push_back(frame);
+               }
+       }
+
+       return frames;
+}
+
+void DB::store_frame_file(const string &filename, size_t size, const vector<FrameOnDiskAndStreamIdx> &frames)
+{
+       int ret = sqlite3_exec(db, "BEGIN", nullptr, nullptr, nullptr);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "BEGIN: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       // Delete any existing instances with this filename.
+       sqlite3_stmt *stmt;
+
+       ret = sqlite3_prepare_v2(db, "DELETE FROM filev2 WHERE filename=?", -1, &stmt, 0);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "DELETE prepare: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       sqlite3_bind_text(stmt, 1, filename.data(), filename.size(), SQLITE_STATIC);
+
+       ret = sqlite3_step(stmt);
+       if (ret == SQLITE_ROW) {
+               fprintf(stderr, "DELETE step: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       ret = sqlite3_finalize(stmt);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "DELETE finalize: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       // Create the protobuf blob for the new row.
+       FileContentsProto file_contents;
+       unordered_set<unsigned> seen_stream_idx;  // Usually only one.
+       for (const FrameOnDiskAndStreamIdx &frame : frames) {
+               seen_stream_idx.insert(frame.stream_idx);
+       }
+       for (unsigned stream_idx : seen_stream_idx) {
+               StreamContentsProto *stream = file_contents.add_stream();
+               stream->set_stream_idx(stream_idx);
+               stream->mutable_pts()->Reserve(frames.size());
+               stream->mutable_offset()->Reserve(frames.size());
+               stream->mutable_file_size()->Reserve(frames.size());
+               for (const FrameOnDiskAndStreamIdx &frame : frames) {
+                       if (frame.stream_idx != stream_idx) {
+                               continue;
+                       }
+                       stream->add_pts(frame.frame.pts);
+                       stream->add_offset(frame.frame.offset);
+                       stream->add_file_size(frame.frame.size);
+               }
+       }
+       string serialized;
+       file_contents.SerializeToString(&serialized);
+
+       // Insert the new row.
+       ret = sqlite3_prepare_v2(db, "INSERT INTO filev2 (filename, size, frames) VALUES (?, ?, ?)", -1, &stmt, 0);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "INSERT prepare: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       sqlite3_bind_text(stmt, 1, filename.data(), filename.size(), SQLITE_STATIC);
+       sqlite3_bind_int64(stmt, 2, size);
+       sqlite3_bind_blob(stmt, 3, serialized.data(), serialized.size(), SQLITE_STATIC);
+
+       ret = sqlite3_step(stmt);
+       if (ret == SQLITE_ROW) {
+               fprintf(stderr, "INSERT step: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       ret = sqlite3_finalize(stmt);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "INSERT finalize: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       // Commit.
+       ret = sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "COMMIT: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+}
+
+void DB::clean_unused_frame_files(const vector<string> &used_filenames)
+{
+       int ret = sqlite3_exec(db, "BEGIN", nullptr, nullptr, nullptr);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "BEGIN: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       ret = sqlite3_exec(db, R"(
+               CREATE TEMPORARY TABLE used_filenames ( filename VARCHAR NOT NULL PRIMARY KEY )
+       )", nullptr, nullptr, nullptr);
+
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "CREATE TEMPORARY TABLE: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       // Insert the new rows.
+       sqlite3_stmt *stmt;
+       ret = sqlite3_prepare_v2(db, "INSERT INTO used_filenames (filename) VALUES (?)", -1, &stmt, 0);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "INSERT prepare: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       for (const string &filename : used_filenames) {
+               sqlite3_bind_text(stmt, 1, filename.data(), filename.size(), SQLITE_STATIC);
+
+               ret = sqlite3_step(stmt);
+               if (ret == SQLITE_ROW) {
+                       fprintf(stderr, "INSERT step: %s\n", sqlite3_errmsg(db));
+                       exit(1);
+               }
+
+               ret = sqlite3_reset(stmt);
+               if (ret == SQLITE_ROW) {
+                       fprintf(stderr, "INSERT reset: %s\n", sqlite3_errmsg(db));
+                       exit(1);
+               }
+       }
+
+       ret = sqlite3_finalize(stmt);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "INSERT finalize: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       ret = sqlite3_exec(db, R"(
+               DELETE FROM filev2 WHERE filename NOT IN ( SELECT filename FROM used_filenames )
+       )", nullptr, nullptr, nullptr);
+
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "DELETE: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       ret = sqlite3_exec(db, R"(
+               DROP TABLE used_filenames
+       )", nullptr, nullptr, nullptr);
+
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "DROP TABLE: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+
+       // Commit.
+       ret = sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr);
+       if (ret != SQLITE_OK) {
+               fprintf(stderr, "COMMIT: %s\n", sqlite3_errmsg(db));
+               exit(1);
+       }
+}
diff --git a/futatabi/db.h b/futatabi/db.h
new file mode 100644 (file)
index 0000000..f8032c0
--- /dev/null
@@ -0,0 +1,33 @@
+#ifndef DB_H
+#define DB_H 1
+
+#include "state.pb.h"
+
+#include <sqlite3.h>
+#include <string>
+#include <vector>
+
+#include "frame_on_disk.h"
+
+class DB {
+public:
+       explicit DB(const std::string &filename);
+       DB(const DB &) = delete;
+
+       StateProto get_state();
+       void store_state(const StateProto &state);
+
+       struct FrameOnDiskAndStreamIdx {
+               FrameOnDisk frame;
+               unsigned stream_idx;
+       };
+       std::vector<FrameOnDiskAndStreamIdx> load_frame_file(const std::string &filename, size_t size, unsigned frame_idx);  // Empty = none found, or there were no frames.
+       void store_frame_file(const std::string &filename, size_t size, const std::vector<FrameOnDiskAndStreamIdx> &frames);
+       void clean_unused_frame_files(const std::vector<std::string> &used_filenames);
+
+private:
+       StateProto state;
+       sqlite3 *db;
+};
+
+#endif  // !defined(DB_H)
diff --git a/futatabi/defs.h b/futatabi/defs.h
new file mode 100644 (file)
index 0000000..e0d190b
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef _DEFS_H
+#define _DEFS_H 1
+
+#define MAX_STREAMS 16
+#define CACHE_SIZE_MB 2048
+#define NUM_CAMERAS 4
+#define MUX_BUFFER_SIZE 10485760
+
+#define DEFAULT_HTTPD_PORT 9095
+
+#endif  // !defined(_DEFS_H)
diff --git a/futatabi/densify.frag b/futatabi/densify.frag
new file mode 100644 (file)
index 0000000..3bca126
--- /dev/null
@@ -0,0 +1,24 @@
+#version 450 core
+
+in vec2 image_pos;
+flat in int image0_layer, image1_layer;
+flat in vec2 flow_du;
+flat in float mean_diff;
+out vec3 flow_contribution;
+
+uniform sampler2DArray image_tex;
+
+void main()
+{
+       // Equation (3) from the paper. We're using additive blending, so the
+       // sum will happen automatically for us, and normalization happens on
+       // next read.
+       //
+       // Note that equation (2) says 1 for the minimum error, but the code says 2.0.
+       // And it says L2 norm, but really, the code does absolute value even for
+       // L2 error norm (it uses a square root formula for L1 norm).
+       float diff = texture(image_tex, vec3(image_pos, image0_layer)).x - texture(image_tex, vec3(image_pos + flow_du, image1_layer)).x;
+       diff -= mean_diff;
+       float weight = 1.0 / max(abs(diff), 2.0 / 255.0);
+       flow_contribution = vec3(flow_du.x * weight, flow_du.y * weight, weight);
+}
diff --git a/futatabi/densify.vert b/futatabi/densify.vert
new file mode 100644 (file)
index 0000000..181c7f3
--- /dev/null
@@ -0,0 +1,55 @@
+#version 450 core
+#extension GL_ARB_shader_viewport_layer_array : require
+
+layout(location=0) in vec2 position;
+out vec2 image_pos;
+flat out vec2 flow_du;
+flat out float mean_diff;
+flat out int image0_layer, image1_layer;
+
+uniform vec2 patch_size;  // In 0..1 coordinates.
+uniform sampler2DArray flow_tex;
+
+void main()
+{
+       int num_patches = textureSize(flow_tex, 0).x * textureSize(flow_tex, 0).y;
+       int patch_layer = gl_InstanceID / num_patches;
+       int patch_x = gl_InstanceID % textureSize(flow_tex, 0).x;
+       int patch_y = (gl_InstanceID % num_patches) / textureSize(flow_tex, 0).x;
+
+       // Convert the patch index to being the full 0..1 range, to match where
+       // the motion search puts the patches. We don't bother with the locking
+       // to texel centers, though.
+       vec2 patch_center = ivec2(patch_x, patch_y) / (textureSize(flow_tex, 0).xy - 1.0);
+
+       // Increase the patch size a bit; since patch spacing is not necessarily
+       // an integer number of pixels, and we don't use conservative rasterization,
+       // we could be missing the outer edges of the patch. And it seemingly helps
+       // a little bit in general to have some more candidates as well -- although
+       // this is measured without variational refinement, so it might be moot
+       // with it.
+       //
+       // This maps [0.0,1.0] to [-0.25,1.25], ie. extends the patch by 25% in
+       // all directions.
+       vec2 grown_pos = (position * 1.5) - 0.25;
+
+       image_pos = patch_center + patch_size * (grown_pos - 0.5f);
+
+       // Find the flow value for this patch, and send it on to the fragment shader.
+       vec3 flow_du_and_mean_diff = texelFetch(flow_tex, ivec3(patch_x, patch_y, patch_layer), 0).xyz;
+       flow_du = flow_du_and_mean_diff.xy;
+       mean_diff = flow_du_and_mean_diff.z;
+
+       // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+       //
+       //   2.000  0.000  0.000 -1.000
+       //   0.000  2.000  0.000 -1.000
+       //   0.000  0.000 -2.000 -1.000
+       //   0.000  0.000  0.000  1.000
+       gl_Position = vec4(2.0 * image_pos.x - 1.0, 2.0 * image_pos.y - 1.0, -1.0, 1.0);
+       gl_Layer = patch_layer;
+
+       // Forward flow (0) goes from 0 to 1. Backward flow (1) goes from 1 to 0.
+       image0_layer = patch_layer;
+       image1_layer = 1 - patch_layer;
+}
diff --git a/futatabi/derivatives.frag b/futatabi/derivatives.frag
new file mode 100644 (file)
index 0000000..0e2fd68
--- /dev/null
@@ -0,0 +1,32 @@
+#version 450 core
+
+in vec3 tc;
+out vec2 derivatives;
+out float beta_0;
+
+uniform sampler2DArray tex;
+
+void main()
+{
+       float x_m2 = textureOffset(tex, tc, ivec2(-2,  0)).x;
+       float x_m1 = textureOffset(tex, tc, ivec2(-1,  0)).x;
+       float x_p1 = textureOffset(tex, tc, ivec2( 1,  0)).x;
+       float x_p2 = textureOffset(tex, tc, ivec2( 2,  0)).x;
+
+       float y_m2 = textureOffset(tex, tc, ivec2( 0, -2)).x;
+       float y_m1 = textureOffset(tex, tc, ivec2( 0, -1)).x;
+       float y_p1 = textureOffset(tex, tc, ivec2( 0,  1)).x;
+       float y_p2 = textureOffset(tex, tc, ivec2( 0,  2)).x;
+
+       derivatives.x = (x_p1 - x_m1) * (2.0/3.0) + (x_m2 - x_p2) * (1.0/12.0);
+       derivatives.y = (y_p1 - y_m1) * (2.0/3.0) + (y_m2 - y_p2) * (1.0/12.0);
+
+       // The nudge term in the square root in the DeepFlow paper is ζ² = 0.1² = 0.01.
+       // But this is assuming a 0..255 level. Given the nonlinearities in the expression
+       // where β_0 appears, there's no 100% equivalent way to adjust this
+       // constant that I can see, but taking it to (0.1/255)² ~= 1.53e-7 ~=
+       // 1e-7 ought to be good enough. I guess the basic idea is that it
+       // will only matter for near-zero derivatives anyway. I am a tiny
+       // bit worried about fp16 precision when storing these numbers, but OK.
+       beta_0 = 1.0 / (derivatives.x * derivatives.x + derivatives.y * derivatives.y + 1e-7);
+}
diff --git a/futatabi/diffusivity.frag b/futatabi/diffusivity.frag
new file mode 100644 (file)
index 0000000..345c3eb
--- /dev/null
@@ -0,0 +1,39 @@
+#version 450 core
+
+in vec3 tc;
+out float g;
+const float eps_sq = 0.001 * 0.001;
+
+uniform sampler2DArray flow_tex, diff_flow_tex;
+
+// Relative weighting of smoothness term.
+uniform float alpha;
+
+uniform bool zero_diff_flow;
+
+// This must be a macro, since the offset needs to be a constant expression.
+#define get_flow(x_offs, y_offs) \
+       (textureOffset(flow_tex, tc, ivec2((x_offs), (y_offs))).xy + \
+       textureOffset(diff_flow_tex, tc, ivec2((x_offs), (y_offs))).xy)
+
+#define get_flow_no_diff(x_offs, y_offs) \
+       textureOffset(flow_tex, tc, ivec2((x_offs), (y_offs))).xy
+
+float diffusivity(float u_x, float u_y, float v_x, float v_y)
+{
+       return alpha * inversesqrt(u_x * u_x + u_y * u_y + v_x * v_x + v_y * v_y + eps_sq);
+}
+
+void main()
+{
+       // Find diffusivity (g) for this pixel, using central differences.
+       if (zero_diff_flow) {
+               vec2 uv_x = get_flow_no_diff(1, 0) - get_flow_no_diff(-1,  0);
+               vec2 uv_y = get_flow_no_diff(0, 1) - get_flow_no_diff( 0, -1);
+               g = diffusivity(uv_x.x, uv_y.x, uv_x.y, uv_y.y);
+       } else {
+               vec2 uv_x = get_flow(1, 0) - get_flow(-1,  0);
+               vec2 uv_y = get_flow(0, 1) - get_flow( 0, -1);
+               g = diffusivity(uv_x.x, uv_y.x, uv_x.y, uv_y.y);
+       }
+}
diff --git a/futatabi/embedded_files.h b/futatabi/embedded_files.h
new file mode 100644 (file)
index 0000000..83cf0fc
--- /dev/null
@@ -0,0 +1,60 @@
+#ifndef _EMBEDDED_FILES_H
+#define _EMBEDDED_FILES_H 1
+
+// Files that are embedded into the binary as part of the build process.
+// They are used as a backup if the files are not available on disk
+// (which is typically the case if the program is installed, as opposed to
+// being run during development).
+
+#include <stddef.h>
+
+extern const unsigned char *_binary_add_base_flow_frag_data;
+extern const size_t _binary_add_base_flow_frag_size;
+extern const unsigned char *_binary_blend_frag_data;
+extern const size_t _binary_blend_frag_size;
+extern const unsigned char *_binary_chroma_subsample_frag_data;
+extern const size_t _binary_chroma_subsample_frag_size;
+extern const unsigned char *_binary_chroma_subsample_vert_data;
+extern const size_t _binary_chroma_subsample_vert_size;
+extern const unsigned char *_binary_densify_frag_data;
+extern const size_t _binary_densify_frag_size;
+extern const unsigned char *_binary_densify_vert_data;
+extern const size_t _binary_densify_vert_size;
+extern const unsigned char *_binary_derivatives_frag_data;
+extern const size_t _binary_derivatives_frag_size;
+extern const unsigned char *_binary_diffusivity_frag_data;
+extern const size_t _binary_diffusivity_frag_size;
+extern const unsigned char *_binary_equations_frag_data;
+extern const size_t _binary_equations_frag_size;
+extern const unsigned char *_binary_equations_vert_data;
+extern const size_t _binary_equations_vert_size;
+extern const unsigned char *_binary_gray_frag_data;
+extern const size_t _binary_gray_frag_size;
+extern const unsigned char *_binary_hole_blend_frag_data;
+extern const size_t _binary_hole_blend_frag_size;
+extern const unsigned char *_binary_hole_fill_frag_data;
+extern const size_t _binary_hole_fill_frag_size;
+extern const unsigned char *_binary_hole_fill_vert_data;
+extern const size_t _binary_hole_fill_vert_size;
+extern const unsigned char *_binary_motion_search_frag_data;
+extern const size_t _binary_motion_search_frag_size;
+extern const unsigned char *_binary_motion_search_vert_data;
+extern const size_t _binary_motion_search_vert_size;
+extern const unsigned char *_binary_prewarp_frag_data;
+extern const size_t _binary_prewarp_frag_size;
+extern const unsigned char *_binary_resize_flow_frag_data;
+extern const size_t _binary_resize_flow_frag_size;
+extern const unsigned char *_binary_sobel_frag_data;
+extern const size_t _binary_sobel_frag_size;
+extern const unsigned char *_binary_sor_frag_data;
+extern const size_t _binary_sor_frag_size;
+extern const unsigned char *_binary_sor_vert_data;
+extern const size_t _binary_sor_vert_size;
+extern const unsigned char *_binary_splat_frag_data;
+extern const size_t _binary_splat_frag_size;
+extern const unsigned char *_binary_splat_vert_data;
+extern const size_t _binary_splat_vert_size;
+extern const unsigned char *_binary_vs_vert_data;
+extern const size_t _binary_vs_vert_size;
+
+#endif  // !defined(_EMBEDDED_FILES_H)
diff --git a/futatabi/equations.frag b/futatabi/equations.frag
new file mode 100644 (file)
index 0000000..04e5370
--- /dev/null
@@ -0,0 +1,187 @@
+#version 450 core
+
+in vec3 tc0, tc_left0, tc_down0;
+in vec3 tc1, tc_left1, tc_down1;
+in float line_offset;
+out uvec4 equation_red, equation_black;
+
+uniform sampler2DArray I_x_y_tex, I_t_tex;
+uniform sampler2DArray diff_flow_tex, base_flow_tex;
+uniform sampler2DArray beta_0_tex;
+uniform sampler2DArray diffusivity_tex;
+
+// Relative weighting of intensity term.
+uniform float delta;
+
+// Relative weighting of gradient term.
+uniform float gamma;
+
+uniform bool zero_diff_flow;
+
+// Similar to packHalf2x16, but the two values share exponent, and are stored
+// as 12-bit fixed point numbers multiplied by that exponent (the leading one
+// can't be implicit in this kind of format). This allows us to store a much
+// greater range of numbers (8-bit, ie., full fp32 range), and also gives us an
+// extra mantissa bit. (Well, ostensibly two, but because the numbers have to
+// be stored denormalized, we only really gain one.)
+//
+// The price we pay is that if the numbers are of very different magnitudes,
+// the smaller number gets less precision.
+uint pack_floats_shared(float a, float b)
+{
+       float greatest = max(abs(a), abs(b));
+
+       // Find the exponent, increase it by one, and negate it.
+       // E.g., if the nonbiased exponent is 3, the number is between
+       // 2^3 and 2^4, so our normalization factor to get within -1..1
+       // is going to be 2^-4.
+       //
+       // exponent -= 127;
+       // exponent = -(exponent + 1);
+       // exponent += 127;
+       //
+       // is the same as
+       //
+       // exponent = 252 - exponent;
+       uint e = floatBitsToUint(greatest) & 0x7f800000u;
+       float normalizer = uintBitsToFloat((252 << 23) - e);
+
+       // The exponent is the same range as fp32, so just copy it
+       // verbatim, shifted up to where the sign bit used to be.
+       e <<= 1;
+
+       // Quantize to 12 bits.
+       uint qa = uint(int(round(a * (normalizer * 2047.0))));
+       uint qb = uint(int(round(b * (normalizer * 2047.0))));
+
+       return (qa & 0xfffu) | ((qb & 0xfffu) << 12) | e;
+}
+
+float zero_if_outside_border(vec4 val)
+{
+       if (val.w < 1.0f) {
+               // We hit the border (or more like half-way to it), so zero smoothness.
+               return 0.0f;
+       } else {
+               return val.x;
+       }
+}
+
+uvec4 compute_equation(vec3 tc, vec3 tc_left, vec3 tc_down)
+{
+       // Read the flow (on top of the u0/v0 flow).
+       float du, dv;
+       if (zero_diff_flow) {
+               du = dv = 0.0f;
+       } else {
+               vec2 diff_flow = texture(diff_flow_tex, tc).xy;
+               du = diff_flow.x;
+               dv = diff_flow.y;
+       }
+
+       // Read the first derivatives.
+       vec2 I_x_y = texture(I_x_y_tex, tc).xy;
+       float I_x = I_x_y.x;
+       float I_y = I_x_y.y;
+       float I_t = texture(I_t_tex, tc).x;
+
+       // E_I term. Note that we don't square β_0, in line with DeepFlow;
+       // it's probably an error (see variational_refinement.txt),
+       // but squaring it seems to give worse results.
+       float beta_0 = texture(beta_0_tex, tc).x;
+       float k1 = delta * beta_0 * inversesqrt(beta_0 * (I_x * du + I_y * dv + I_t) * (I_x * du + I_y * dv + I_t) + 1e-6);
+       float A11 = k1 * I_x * I_x;
+       float A12 = k1 * I_x * I_y;
+       float A22 = k1 * I_y * I_y;
+       float b1 = -k1 * I_t * I_x;
+       float b2 = -k1 * I_t * I_y;
+
+       // Compute the second derivatives. First I_xx and I_xy.
+       vec2 I_x_y_m2 = textureOffset(I_x_y_tex, tc, ivec2(-2,  0)).xy;
+       vec2 I_x_y_m1 = textureOffset(I_x_y_tex, tc, ivec2(-1,  0)).xy;
+       vec2 I_x_y_p1 = textureOffset(I_x_y_tex, tc, ivec2( 1,  0)).xy;
+       vec2 I_x_y_p2 = textureOffset(I_x_y_tex, tc, ivec2( 2,  0)).xy;
+       vec2 I_xx_yx = (I_x_y_p1 - I_x_y_m1) * (2.0/3.0) + (I_x_y_m2 - I_x_y_p2) * (1.0/12.0);
+       float I_xx = I_xx_yx.x;
+       float I_xy = I_xx_yx.y;
+
+       // And now I_yy; I_yx = I_xy, bar rounding differences, so we don't
+       // bother computing it. We still have to sample the x component,
+       // though, but we can throw it away immediately.
+       float I_y_m2 = textureOffset(I_x_y_tex, tc, ivec2(0, -2)).y;
+       float I_y_m1 = textureOffset(I_x_y_tex, tc, ivec2(0, -1)).y;
+       float I_y_p1 = textureOffset(I_x_y_tex, tc, ivec2(0,  1)).y;
+       float I_y_p2 = textureOffset(I_x_y_tex, tc, ivec2(0,  2)).y;
+       float I_yy = (I_y_p1 - I_y_m1) * (2.0/3.0) + (I_y_m2 - I_y_p2) * (1.0/12.0);
+
+       // Finally I_xt and I_yt. (We compute these as I_tx and I_yt.)
+       vec2 I_t_m2 = textureOffset(I_t_tex, tc, ivec2(-2,  0)).xy;
+       vec2 I_t_m1 = textureOffset(I_t_tex, tc, ivec2(-1,  0)).xy;
+       vec2 I_t_p1 = textureOffset(I_t_tex, tc, ivec2( 1,  0)).xy;
+       vec2 I_t_p2 = textureOffset(I_t_tex, tc, ivec2( 2,  0)).xy;
+       vec2 I_tx_ty = (I_t_p1 - I_t_m1) * (2.0/3.0) + (I_t_m2 - I_t_p2) * (1.0/12.0);
+       float I_xt = I_tx_ty.x;
+       float I_yt = I_tx_ty.y;
+
+       // E_G term. Same normalization as beta_0 (see derivatives.frag).
+       float beta_x = 1.0 / (I_xx * I_xx + I_xy * I_xy + 1e-7);
+       float beta_y = 1.0 / (I_xy * I_xy + I_yy * I_yy + 1e-7);
+       float k2 = gamma * inversesqrt(
+               beta_x * (I_xx * du + I_xy * dv + I_xt) * (I_xx * du + I_xy * dv + I_xt) +
+               beta_y * (I_xy * du + I_yy * dv + I_yt) * (I_xy * du + I_yy * dv + I_yt) +
+               1e-6);
+       float k_x = k2 * beta_x;
+       float k_y = k2 * beta_y;
+       A11 += k_x * I_xx * I_xx + k_y * I_xy * I_xy;
+       A12 += k_x * I_xx * I_xy + k_y * I_xy * I_yy;
+       A22 += k_x * I_xy * I_xy + k_y * I_yy * I_yy;
+       b1 -= k_x * I_xx * I_xt + k_y * I_xy * I_yt;
+       b2 -= k_x * I_xy * I_xt + k_y * I_yy * I_yt;
+
+       // E_S term, sans the part on the right-hand side that deals with
+       // the neighboring pixels. The gamma is multiplied in in smoothness.frag.
+       //
+       // Note that we sample in-between two texels, which gives us the 0.5 *
+       // (x[-1] + x[0]) part for free. If one of the texels is a border
+       // texel, it will have zero alpha, and zero_if_outside_border() will
+       // set smoothness to zero.
+       float smooth_l = zero_if_outside_border(texture(diffusivity_tex, tc_left));
+       float smooth_r = zero_if_outside_border(textureOffset(diffusivity_tex, tc_left, ivec2(1, 0)));
+       float smooth_d = zero_if_outside_border(texture(diffusivity_tex, tc_down));
+       float smooth_u = zero_if_outside_border(textureOffset(diffusivity_tex, tc_down, ivec2(0, 1)));
+       A11 += smooth_l + smooth_r + smooth_d + smooth_u;
+       A22 += smooth_l + smooth_r + smooth_d + smooth_u;
+
+       // Laplacian of (u0, v0).
+       vec2 laplacian =
+               smooth_l * textureOffset(base_flow_tex, tc, ivec2(-1,  0)).xy +
+               smooth_r * textureOffset(base_flow_tex, tc, ivec2( 1,  0)).xy +
+               smooth_d * textureOffset(base_flow_tex, tc, ivec2( 0, -1)).xy +
+               smooth_u * textureOffset(base_flow_tex, tc, ivec2( 0,  1)).xy -
+               (smooth_l + smooth_r + smooth_d + smooth_u) * texture(base_flow_tex, tc).xy;
+       b1 += laplacian.x;
+       b2 += laplacian.y;
+
+       // Encode the equation down into four uint32s.
+       uvec4 ret;
+       ret.x = floatBitsToUint(1.0 / A11);
+       ret.y = floatBitsToUint(A12);
+       ret.z = floatBitsToUint(1.0 / A22);
+       ret.w = pack_floats_shared(b1, b2);
+       return ret;
+}
+
+void main()
+{
+       uvec4 eq0 = compute_equation(tc0, tc_left0, tc_down0);
+       uvec4 eq1 = compute_equation(tc1, tc_left1, tc_down1);
+
+       if ((int(round(line_offset)) & 1) == 1) {
+               // Odd line, so the right value is red.
+               equation_red = eq1;
+               equation_black = eq0;
+       } else {
+               equation_red = eq0;
+               equation_black = eq1;
+       }
+}
diff --git a/futatabi/equations.vert b/futatabi/equations.vert
new file mode 100644 (file)
index 0000000..731e159
--- /dev/null
@@ -0,0 +1,38 @@
+#version 450 core
+#extension GL_ARB_shader_viewport_layer_array : require
+
+layout(location=0) in vec2 position;
+out vec3 tc0, tc_left0, tc_down0;
+out vec3 tc1, tc_left1, tc_down1;
+out float line_offset;
+
+uniform sampler2DArray diffusivity_tex;
+
+void main()
+{
+       // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+       //
+       //   2.000  0.000  0.000 -1.000
+       //   0.000  2.000  0.000 -1.000
+       //   0.000  0.000 -2.000 -1.000
+       //   0.000  0.000  0.000  1.000
+       gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
+       gl_Layer = gl_InstanceID;
+
+       const vec2 half_texel = 0.5f / textureSize(diffusivity_tex, 0).xy;
+
+       vec2 tc = position;
+       vec2 tc_left = vec2(tc.x - half_texel.x, tc.y);
+       vec2 tc_down = vec2(tc.x, tc.y - half_texel.y);
+
+       // Adjust for different texel centers.
+       tc0 = vec3(tc.x - half_texel.x, tc.y, gl_InstanceID);
+       tc_left0 = vec3(tc_left.x - half_texel.x, tc_left.y, gl_InstanceID);
+       tc_down0 = vec3(tc_down.x - half_texel.x, tc_down.y, gl_InstanceID);
+
+       tc1 = vec3(tc.x + half_texel.x, tc.y, gl_InstanceID);
+       tc_left1 = vec3(tc_left.x + half_texel.x, tc_left.y, gl_InstanceID);
+       tc_down1 = vec3(tc_down.x + half_texel.x, tc_down.y, gl_InstanceID);
+
+       line_offset = position.y * textureSize(diffusivity_tex, 0).y - 0.5f;
+}
diff --git a/futatabi/eval.cpp b/futatabi/eval.cpp
new file mode 100644 (file)
index 0000000..85783bb
--- /dev/null
@@ -0,0 +1,41 @@
+// Evaluate a .flo file against ground truth,
+// outputting the average end-point error.
+
+#include "util.h"
+
+#include <assert.h>
+#include <memory>
+#include <stdio.h>
+
+using namespace std;
+
+double eval_flow(const char *filename1, const char *filename2);
+
+int main(int argc, char **argv)
+{
+       double sum_epe = 0.0;
+       int num_flows = 0;
+       for (int i = 1; i < argc; i += 2) {
+               sum_epe += eval_flow(argv[i], argv[i + 1]);
+               ++num_flows;
+       }
+       printf("Average EPE: %.2f pixels\n", sum_epe / num_flows);
+}
+
+double eval_flow(const char *filename1, const char *filename2)
+{
+       Flow flow = read_flow(filename1);
+       Flow gt = read_flow(filename2);
+
+       double sum = 0.0;
+       for (unsigned y = 0; y < unsigned(flow.height); ++y) {
+               for (unsigned x = 0; x < unsigned(flow.width); ++x) {
+                       float du = flow.flow[y * flow.width + x].du;
+                       float dv = flow.flow[y * flow.width + x].dv;
+                       float gt_du = gt.flow[y * flow.width + x].du;
+                       float gt_dv = gt.flow[y * flow.width + x].dv;
+                       sum += hypot(du - gt_du, dv - gt_dv);
+               }
+       }
+       return sum / (flow.width * flow.height);
+}
diff --git a/futatabi/flags.cpp b/futatabi/flags.cpp
new file mode 100644 (file)
index 0000000..4c75370
--- /dev/null
@@ -0,0 +1,82 @@
+#include "flags.h"
+
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <utility>
+
+using namespace std;
+
+Flags global_flags;
+
+// Long options that have no corresponding short option.
+enum LongOption {
+       OPTION_HELP = 1000,
+       OPTION_SLOW_DOWN_INPUT = 1001,
+       OPTION_HTTP_PORT = 1002
+};
+
+void usage()
+{
+       fprintf(stderr, "Usage: futatabi [OPTION]... SOURCE_URL\n");
+       fprintf(stderr, "\n");
+       fprintf(stderr, "      --help                      print usage information\n");
+       fprintf(stderr, "      --slow-down-input           slow down input to realtime (default on if no\n");
+       fprintf(stderr, "                                    source URL given)\n");
+       fprintf(stderr, "  -q, --interpolation-quality N   1 = fastest\n");
+       fprintf(stderr, "                                  2 = default (realtime 720p on fast embedded GPUs)\n");
+       fprintf(stderr, "                                  3 = good (realtime 720p on GTX 970 or so)\n");
+       fprintf(stderr, "                                  4 = best (not realtime on any current GPU)\n");
+       fprintf(stderr, "  -d, --working-directory DIR     where to store frames and database\n");
+       fprintf(stderr, "      --http-port PORT            which port to listen on for output\n");
+}
+
+void parse_flags(int argc, char * const argv[])
+{
+       static const option long_options[] = {
+               { "help", no_argument, 0, OPTION_HELP },
+               { "slow-down-input", no_argument, 0, OPTION_SLOW_DOWN_INPUT },
+               { "interpolation-quality", required_argument, 0, 'q' },
+               { "working-directory", required_argument, 0, 'd' },
+               { "http-port", required_argument, 0, OPTION_HTTP_PORT },
+               { 0, 0, 0, 0 }
+       };
+       for ( ;; ) {
+               int option_index = 0;
+               int c = getopt_long(argc, argv, "q:d:", long_options, &option_index);
+
+               if (c == -1) {
+                       break;
+               }
+               switch (c) {
+               case OPTION_SLOW_DOWN_INPUT:
+                       global_flags.slow_down_input = true;
+                       break;
+               case 'q':
+                       global_flags.interpolation_quality = atoi(optarg);
+                       break;
+               case 'd':
+                       global_flags.working_directory = optarg;
+                       break;
+               case OPTION_HTTP_PORT:
+                       global_flags.http_port = atoi(optarg);
+                       break;
+               case OPTION_HELP:
+                       usage();
+                       exit(0);
+               default:
+                       fprintf(stderr, "Unknown option '%s'\n", argv[option_index]);
+                       fprintf(stderr, "\n");
+                       usage();
+                       exit(1);
+               }
+       }
+
+       if (global_flags.interpolation_quality < 1 || global_flags.interpolation_quality > 4) {
+               fprintf(stderr, "Interpolation quality must be 1, 2, 3 or 4.\n");
+               usage();
+               exit(1);
+       }
+}
diff --git a/futatabi/flags.h b/futatabi/flags.h
new file mode 100644 (file)
index 0000000..5e9d34b
--- /dev/null
@@ -0,0 +1,20 @@
+#ifndef _FLAGS_H
+#define _FLAGS_H
+
+#include <string>
+
+#include "defs.h"
+
+struct Flags {
+       std::string stream_source;
+       std::string working_directory = ".";
+       bool slow_down_input = false;
+       int interpolation_quality = 2;
+       uint16_t http_port = DEFAULT_HTTPD_PORT;
+};
+extern Flags global_flags;
+
+void usage();
+void parse_flags(int argc, char * const argv[]);
+
+#endif  // !defined(_FLAGS_H)
diff --git a/futatabi/flow.cpp b/futatabi/flow.cpp
new file mode 100644 (file)
index 0000000..a10d83b
--- /dev/null
@@ -0,0 +1,1095 @@
+#define NO_SDL_GLEXT 1
+
+#include "flow.h"
+
+#include "embedded_files.h"
+#include "gpu_timers.h"
+#include "util.h"
+#include "shared/read_file.h"
+
+#include <algorithm>
+#include <assert.h>
+#include <deque>
+#include <dlfcn.h>
+#include <epoxy/gl.h>
+#include <map>
+#include <memory>
+#include <stack>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <vector>
+
+#define BUFFER_OFFSET(i) ((char *)nullptr + (i))
+
+using namespace std;
+
+// Weighting constants for the different parts of the variational refinement.
+// These don't correspond 1:1 to the values given in the DIS paper,
+// since we have different normalizations and ranges in some cases.
+// These are found through a simple grid search on some MPI-Sintel data,
+// although the error (EPE) seems to be fairly insensitive to the precise values.
+// Only the relative values matter, so we fix alpha (the smoothness constant)
+// at unity and tweak the others.
+//
+// TODO: Maybe this should not be global.
+float vr_alpha = 1.0f, vr_delta = 0.25f, vr_gamma = 0.25f;
+
+// Some global OpenGL objects.
+// TODO: These should really be part of DISComputeFlow.
+GLuint nearest_sampler, linear_sampler, zero_border_sampler;
+GLuint vertex_vbo;
+
+int find_num_levels(int width, int height)
+{
+       int levels = 1;
+       for (int w = width, h = height; w > 1 || h > 1; ) {
+               w >>= 1;
+               h >>= 1;
+               ++levels;
+       }
+       return levels;
+}
+
+GLuint compile_shader(const string &shader_src, GLenum type)
+{
+       GLuint obj = glCreateShader(type);
+       const GLchar *source[] = { shader_src.data() };
+       const GLint length[] = { (GLint)shader_src.size() };
+       glShaderSource(obj, 1, source, length);
+       glCompileShader(obj);
+
+       GLchar info_log[4096];
+       GLsizei log_length = sizeof(info_log) - 1;
+       glGetShaderInfoLog(obj, log_length, &log_length, info_log);
+       info_log[log_length] = 0;
+       if (strlen(info_log) > 0) {
+               fprintf(stderr, "Shader compile log: %s\n", info_log);
+       }
+
+       GLint status;
+       glGetShaderiv(obj, GL_COMPILE_STATUS, &status);
+       if (status == GL_FALSE) {
+               // Add some line numbers to easier identify compile errors.
+               string src_with_lines = "/*   1 */ ";
+               size_t lineno = 1;
+               for (char ch : shader_src) {
+                       src_with_lines.push_back(ch);
+                       if (ch == '\n') {
+                               char buf[32];
+                               snprintf(buf, sizeof(buf), "/* %3zu */ ", ++lineno);
+                               src_with_lines += buf;
+                       }
+               }
+
+               fprintf(stderr, "Failed to compile shader:\n%s\n", src_with_lines.c_str());
+               exit(1);
+       }
+
+       return obj;
+}
+
+GLuint link_program(GLuint vs_obj, GLuint fs_obj)
+{
+       GLuint program = glCreateProgram();
+       glAttachShader(program, vs_obj);
+       glAttachShader(program, fs_obj);
+       glLinkProgram(program);
+       GLint success;
+       glGetProgramiv(program, GL_LINK_STATUS, &success);
+       if (success == GL_FALSE) {
+               GLchar error_log[1024] = {0};
+               glGetProgramInfoLog(program, 1024, nullptr, error_log);
+               fprintf(stderr, "Error linking program: %s\n", error_log);
+               exit(1);
+       }
+       return program;
+}
+
+void bind_sampler(GLuint program, GLint location, GLuint texture_unit, GLuint tex, GLuint sampler)
+{
+       if (location == -1) {
+               return;
+       }
+
+       glBindTextureUnit(texture_unit, tex);
+       glBindSampler(texture_unit, sampler);
+       glProgramUniform1i(program, location, texture_unit);
+}
+
+template<size_t num_elements>
+void PersistentFBOSet<num_elements>::render_to(const array<GLuint, num_elements> &textures)
+{
+       auto it = fbos.find(textures);
+       if (it != fbos.end()) {
+               glBindFramebuffer(GL_FRAMEBUFFER, it->second);
+               return;
+       }
+
+       GLuint fbo;
+       glCreateFramebuffers(1, &fbo);
+       GLenum bufs[num_elements];
+       for (size_t i = 0; i < num_elements; ++i) {
+               glNamedFramebufferTexture(fbo, GL_COLOR_ATTACHMENT0 + i, textures[i], 0);
+               bufs[i] = GL_COLOR_ATTACHMENT0 + i;
+       }
+       glNamedFramebufferDrawBuffers(fbo, num_elements, bufs);
+
+       fbos[textures] = fbo;
+       glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+}
+
+template<size_t num_elements>
+void PersistentFBOSetWithDepth<num_elements>::render_to(GLuint depth_rb, const array<GLuint, num_elements> &textures)
+{
+       auto key = make_pair(depth_rb, textures);
+
+       auto it = fbos.find(key);
+       if (it != fbos.end()) {
+               glBindFramebuffer(GL_FRAMEBUFFER, it->second);
+               return;
+       }
+
+       GLuint fbo;
+       glCreateFramebuffers(1, &fbo);
+       GLenum bufs[num_elements];
+       glNamedFramebufferRenderbuffer(fbo, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depth_rb);
+       for (size_t i = 0; i < num_elements; ++i) {
+               glNamedFramebufferTexture(fbo, GL_COLOR_ATTACHMENT0 + i, textures[i], 0);
+               bufs[i] = GL_COLOR_ATTACHMENT0 + i;
+       }
+       glNamedFramebufferDrawBuffers(fbo, num_elements, bufs);
+
+       fbos[key] = fbo;
+       glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+}
+
+GrayscaleConversion::GrayscaleConversion()
+{
+       gray_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
+       gray_fs_obj = compile_shader(read_file("gray.frag", _binary_gray_frag_data, _binary_gray_frag_size), GL_FRAGMENT_SHADER);
+       gray_program = link_program(gray_vs_obj, gray_fs_obj);
+
+       // Set up the VAO containing all the required position/texcoord data.
+       glCreateVertexArrays(1, &gray_vao);
+       glBindVertexArray(gray_vao);
+
+       GLint position_attrib = glGetAttribLocation(gray_program, "position");
+       glEnableVertexArrayAttrib(gray_vao, position_attrib);
+       glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+       uniform_tex = glGetUniformLocation(gray_program, "tex");
+}
+
+void GrayscaleConversion::exec(GLint tex, GLint gray_tex, int width, int height, int num_layers)
+{
+       glUseProgram(gray_program);
+       bind_sampler(gray_program, uniform_tex, 0, tex, nearest_sampler);
+
+       glViewport(0, 0, width, height);
+       fbos.render_to(gray_tex);
+       glBindVertexArray(gray_vao);
+       glDisable(GL_BLEND);
+       glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+Sobel::Sobel()
+{
+       sobel_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
+       sobel_fs_obj = compile_shader(read_file("sobel.frag", _binary_sobel_frag_data, _binary_sobel_frag_size), GL_FRAGMENT_SHADER);
+       sobel_program = link_program(sobel_vs_obj, sobel_fs_obj);
+
+       uniform_tex = glGetUniformLocation(sobel_program, "tex");
+}
+
+void Sobel::exec(GLint tex_view, GLint grad_tex, int level_width, int level_height, int num_layers)
+{
+       glUseProgram(sobel_program);
+       bind_sampler(sobel_program, uniform_tex, 0, tex_view, nearest_sampler);
+
+       glViewport(0, 0, level_width, level_height);
+       fbos.render_to(grad_tex);
+       glDisable(GL_BLEND);
+       glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+MotionSearch::MotionSearch(const OperatingPoint &op)
+       : op(op)
+{
+       motion_vs_obj = compile_shader(read_file("motion_search.vert", _binary_motion_search_vert_data, _binary_motion_search_vert_size), GL_VERTEX_SHADER);
+       motion_fs_obj = compile_shader(read_file("motion_search.frag", _binary_motion_search_frag_data, _binary_motion_search_frag_size), GL_FRAGMENT_SHADER);
+       motion_search_program = link_program(motion_vs_obj, motion_fs_obj);
+
+       uniform_inv_image_size = glGetUniformLocation(motion_search_program, "inv_image_size");
+       uniform_inv_prev_level_size = glGetUniformLocation(motion_search_program, "inv_prev_level_size");
+       uniform_out_flow_size = glGetUniformLocation(motion_search_program, "out_flow_size");
+       uniform_image_tex = glGetUniformLocation(motion_search_program, "image_tex");
+       uniform_grad_tex = glGetUniformLocation(motion_search_program, "grad_tex");
+       uniform_flow_tex = glGetUniformLocation(motion_search_program, "flow_tex");
+       uniform_patch_size = glGetUniformLocation(motion_search_program, "patch_size");
+       uniform_num_iterations = glGetUniformLocation(motion_search_program, "num_iterations");
+}
+
+void MotionSearch::exec(GLuint tex_view, GLuint grad_tex, GLuint flow_tex, GLuint flow_out_tex, int level_width, int level_height, int prev_level_width, int prev_level_height, int width_patches, int height_patches, int num_layers)
+{
+       glUseProgram(motion_search_program);
+
+       bind_sampler(motion_search_program, uniform_image_tex, 0, tex_view, linear_sampler);
+       bind_sampler(motion_search_program, uniform_grad_tex, 1, grad_tex, nearest_sampler);
+       bind_sampler(motion_search_program, uniform_flow_tex, 2, flow_tex, linear_sampler);
+
+       glProgramUniform2f(motion_search_program, uniform_inv_image_size, 1.0f / level_width, 1.0f / level_height);
+       glProgramUniform2f(motion_search_program, uniform_inv_prev_level_size, 1.0f / prev_level_width, 1.0f / prev_level_height);
+       glProgramUniform2f(motion_search_program, uniform_out_flow_size, width_patches, height_patches);
+       glProgramUniform1ui(motion_search_program, uniform_patch_size, op.patch_size_pixels);
+       glProgramUniform1ui(motion_search_program, uniform_num_iterations, op.search_iterations);
+
+       glViewport(0, 0, width_patches, height_patches);
+       fbos.render_to(flow_out_tex);
+       glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+Densify::Densify(const OperatingPoint &op)
+       : op(op)
+{
+       densify_vs_obj = compile_shader(read_file("densify.vert", _binary_densify_vert_data, _binary_densify_vert_size), GL_VERTEX_SHADER);
+       densify_fs_obj = compile_shader(read_file("densify.frag", _binary_densify_frag_data, _binary_densify_frag_size), GL_FRAGMENT_SHADER);
+       densify_program = link_program(densify_vs_obj, densify_fs_obj);
+
+       uniform_patch_size = glGetUniformLocation(densify_program, "patch_size");
+       uniform_image_tex = glGetUniformLocation(densify_program, "image_tex");
+       uniform_flow_tex = glGetUniformLocation(densify_program, "flow_tex");
+}
+
+void Densify::exec(GLuint tex_view, GLuint flow_tex, GLuint dense_flow_tex, int level_width, int level_height, int width_patches, int height_patches, int num_layers)
+{
+       glUseProgram(densify_program);
+
+       bind_sampler(densify_program, uniform_image_tex, 0, tex_view, linear_sampler);
+       bind_sampler(densify_program, uniform_flow_tex, 1, flow_tex, nearest_sampler);
+
+       glProgramUniform2f(densify_program, uniform_patch_size,
+               float(op.patch_size_pixels) / level_width,
+               float(op.patch_size_pixels) / level_height);
+
+       glViewport(0, 0, level_width, level_height);
+       glEnable(GL_BLEND);
+       glBlendFunc(GL_ONE, GL_ONE);
+       fbos.render_to(dense_flow_tex);
+       glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
+       glClear(GL_COLOR_BUFFER_BIT);
+       glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, width_patches * height_patches * num_layers);
+}
+
+Prewarp::Prewarp()
+{
+       prewarp_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
+       prewarp_fs_obj = compile_shader(read_file("prewarp.frag", _binary_prewarp_frag_data, _binary_prewarp_frag_size), GL_FRAGMENT_SHADER);
+       prewarp_program = link_program(prewarp_vs_obj, prewarp_fs_obj);
+
+       uniform_image_tex = glGetUniformLocation(prewarp_program, "image_tex");
+       uniform_flow_tex = glGetUniformLocation(prewarp_program, "flow_tex");
+}
+
+void Prewarp::exec(GLuint tex_view, GLuint flow_tex, GLuint I_tex, GLuint I_t_tex, GLuint normalized_flow_tex, int level_width, int level_height, int num_layers)
+{
+       glUseProgram(prewarp_program);
+
+       bind_sampler(prewarp_program, uniform_image_tex, 0, tex_view, linear_sampler);
+       bind_sampler(prewarp_program, uniform_flow_tex, 1, flow_tex, nearest_sampler);
+
+       glViewport(0, 0, level_width, level_height);
+       glDisable(GL_BLEND);
+       fbos.render_to(I_tex, I_t_tex, normalized_flow_tex);
+       glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+Derivatives::Derivatives()
+{
+       derivatives_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
+       derivatives_fs_obj = compile_shader(read_file("derivatives.frag", _binary_derivatives_frag_data, _binary_derivatives_frag_size), GL_FRAGMENT_SHADER);
+       derivatives_program = link_program(derivatives_vs_obj, derivatives_fs_obj);
+
+       uniform_tex = glGetUniformLocation(derivatives_program, "tex");
+}
+
+void Derivatives::exec(GLuint input_tex, GLuint I_x_y_tex, GLuint beta_0_tex, int level_width, int level_height, int num_layers)
+{
+       glUseProgram(derivatives_program);
+
+       bind_sampler(derivatives_program, uniform_tex, 0, input_tex, nearest_sampler);
+
+       glViewport(0, 0, level_width, level_height);
+       glDisable(GL_BLEND);
+       fbos.render_to(I_x_y_tex, beta_0_tex);
+       glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+ComputeDiffusivity::ComputeDiffusivity()
+{
+       diffusivity_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
+       diffusivity_fs_obj = compile_shader(read_file("diffusivity.frag", _binary_diffusivity_frag_data, _binary_diffusivity_frag_size), GL_FRAGMENT_SHADER);
+       diffusivity_program = link_program(diffusivity_vs_obj, diffusivity_fs_obj);
+
+       uniform_flow_tex = glGetUniformLocation(diffusivity_program, "flow_tex");
+       uniform_diff_flow_tex = glGetUniformLocation(diffusivity_program, "diff_flow_tex");
+       uniform_alpha = glGetUniformLocation(diffusivity_program, "alpha");
+       uniform_zero_diff_flow = glGetUniformLocation(diffusivity_program, "zero_diff_flow");
+}
+
+void ComputeDiffusivity::exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint diffusivity_tex, int level_width, int level_height, bool zero_diff_flow, int num_layers)
+{
+       glUseProgram(diffusivity_program);
+
+       bind_sampler(diffusivity_program, uniform_flow_tex, 0, flow_tex, nearest_sampler);
+       bind_sampler(diffusivity_program, uniform_diff_flow_tex, 1, diff_flow_tex, nearest_sampler);
+       glProgramUniform1f(diffusivity_program, uniform_alpha, vr_alpha);
+       glProgramUniform1i(diffusivity_program, uniform_zero_diff_flow, zero_diff_flow);
+
+       glViewport(0, 0, level_width, level_height);
+
+       glDisable(GL_BLEND);
+       fbos.render_to(diffusivity_tex);
+       glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+SetupEquations::SetupEquations()
+{
+       equations_vs_obj = compile_shader(read_file("equations.vert", _binary_equations_vert_data, _binary_equations_vert_size), GL_VERTEX_SHADER);
+       equations_fs_obj = compile_shader(read_file("equations.frag", _binary_equations_frag_data, _binary_equations_frag_size), GL_FRAGMENT_SHADER);
+       equations_program = link_program(equations_vs_obj, equations_fs_obj);
+
+       uniform_I_x_y_tex = glGetUniformLocation(equations_program, "I_x_y_tex");
+       uniform_I_t_tex = glGetUniformLocation(equations_program, "I_t_tex");
+       uniform_diff_flow_tex = glGetUniformLocation(equations_program, "diff_flow_tex");
+       uniform_base_flow_tex = glGetUniformLocation(equations_program, "base_flow_tex");
+       uniform_beta_0_tex = glGetUniformLocation(equations_program, "beta_0_tex");
+       uniform_diffusivity_tex = glGetUniformLocation(equations_program, "diffusivity_tex");
+       uniform_gamma = glGetUniformLocation(equations_program, "gamma");
+       uniform_delta = glGetUniformLocation(equations_program, "delta");
+       uniform_zero_diff_flow = glGetUniformLocation(equations_program, "zero_diff_flow");
+}
+
+void SetupEquations::exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex, GLuint base_flow_tex, GLuint beta_0_tex, GLuint diffusivity_tex, GLuint equation_red_tex, GLuint equation_black_tex, int level_width, int level_height, bool zero_diff_flow, int num_layers)
+{
+       glUseProgram(equations_program);
+
+       bind_sampler(equations_program, uniform_I_x_y_tex, 0, I_x_y_tex, nearest_sampler);
+       bind_sampler(equations_program, uniform_I_t_tex, 1, I_t_tex, nearest_sampler);
+       bind_sampler(equations_program, uniform_diff_flow_tex, 2, diff_flow_tex, nearest_sampler);
+       bind_sampler(equations_program, uniform_base_flow_tex, 3, base_flow_tex, nearest_sampler);
+       bind_sampler(equations_program, uniform_beta_0_tex, 4, beta_0_tex, nearest_sampler);
+       bind_sampler(equations_program, uniform_diffusivity_tex, 5, diffusivity_tex, zero_border_sampler);
+       glProgramUniform1f(equations_program, uniform_delta, vr_delta);
+       glProgramUniform1f(equations_program, uniform_gamma, vr_gamma);
+       glProgramUniform1i(equations_program, uniform_zero_diff_flow, zero_diff_flow);
+
+       glViewport(0, 0, (level_width + 1) / 2, level_height);
+       glDisable(GL_BLEND);
+       fbos.render_to(equation_red_tex, equation_black_tex);
+       glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+SOR::SOR()
+{
+       sor_vs_obj = compile_shader(read_file("sor.vert", _binary_sor_vert_data, _binary_sor_vert_size), GL_VERTEX_SHADER);
+       sor_fs_obj = compile_shader(read_file("sor.frag", _binary_sor_frag_data, _binary_sor_frag_size), GL_FRAGMENT_SHADER);
+       sor_program = link_program(sor_vs_obj, sor_fs_obj);
+
+       uniform_diff_flow_tex = glGetUniformLocation(sor_program, "diff_flow_tex");
+       uniform_equation_red_tex = glGetUniformLocation(sor_program, "equation_red_tex");
+       uniform_equation_black_tex = glGetUniformLocation(sor_program, "equation_black_tex");
+       uniform_diffusivity_tex = glGetUniformLocation(sor_program, "diffusivity_tex");
+       uniform_phase = glGetUniformLocation(sor_program, "phase");
+       uniform_num_nonzero_phases = glGetUniformLocation(sor_program, "num_nonzero_phases");
+}
+
+void SOR::exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_black_tex, GLuint diffusivity_tex, int level_width, int level_height, int num_iterations, bool zero_diff_flow, int num_layers, ScopedTimer *sor_timer)
+{
+       glUseProgram(sor_program);
+
+       bind_sampler(sor_program, uniform_diff_flow_tex, 0, diff_flow_tex, nearest_sampler);
+       bind_sampler(sor_program, uniform_diffusivity_tex, 1, diffusivity_tex, zero_border_sampler);
+       bind_sampler(sor_program, uniform_equation_red_tex, 2, equation_red_tex, nearest_sampler);
+       bind_sampler(sor_program, uniform_equation_black_tex, 3, equation_black_tex, nearest_sampler);
+
+       if (!zero_diff_flow) {
+               glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 2);
+       }
+
+       // NOTE: We bind to the texture we are rendering from, but we never write any value
+       // that we read in the same shader pass (we call discard for red values when we compute
+       // black, and vice versa), and we have barriers between the passes, so we're fine
+       // as per the spec.
+       glViewport(0, 0, level_width, level_height);
+       glDisable(GL_BLEND);
+       fbos.render_to(diff_flow_tex);
+
+       for (int i = 0; i < num_iterations; ++i) {
+               {
+                       ScopedTimer timer("Red pass", sor_timer);
+                       if (zero_diff_flow && i == 0) {
+                               glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 0);
+                       }
+                       glProgramUniform1i(sor_program, uniform_phase, 0);
+                       glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+                       glTextureBarrier();
+               }
+               {
+                       ScopedTimer timer("Black pass", sor_timer);
+                       if (zero_diff_flow && i == 0) {
+                               glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 1);
+                       }
+                       glProgramUniform1i(sor_program, uniform_phase, 1);
+                       glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+                       if (zero_diff_flow && i == 0) {
+                               glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 2);
+                       }
+                       if (i != num_iterations - 1) {
+                               glTextureBarrier();
+                       }
+               }
+       }
+}
+
+AddBaseFlow::AddBaseFlow()
+{
+       add_flow_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
+       add_flow_fs_obj = compile_shader(read_file("add_base_flow.frag", _binary_add_base_flow_frag_data, _binary_add_base_flow_frag_size), GL_FRAGMENT_SHADER);
+       add_flow_program = link_program(add_flow_vs_obj, add_flow_fs_obj);
+
+       uniform_diff_flow_tex = glGetUniformLocation(add_flow_program, "diff_flow_tex");
+}
+
+void AddBaseFlow::exec(GLuint base_flow_tex, GLuint diff_flow_tex, int level_width, int level_height, int num_layers)
+{
+       glUseProgram(add_flow_program);
+
+       bind_sampler(add_flow_program, uniform_diff_flow_tex, 0, diff_flow_tex, nearest_sampler);
+
+       glViewport(0, 0, level_width, level_height);
+       glEnable(GL_BLEND);
+       glBlendFunc(GL_ONE, GL_ONE);
+       fbos.render_to(base_flow_tex);
+
+       glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+ResizeFlow::ResizeFlow()
+{
+       resize_flow_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
+       resize_flow_fs_obj = compile_shader(read_file("resize_flow.frag", _binary_resize_flow_frag_data, _binary_resize_flow_frag_size), GL_FRAGMENT_SHADER);
+       resize_flow_program = link_program(resize_flow_vs_obj, resize_flow_fs_obj);
+
+       uniform_flow_tex = glGetUniformLocation(resize_flow_program, "flow_tex");
+       uniform_scale_factor = glGetUniformLocation(resize_flow_program, "scale_factor");
+}
+
+void ResizeFlow::exec(GLuint flow_tex, GLuint out_tex, int input_width, int input_height, int output_width, int output_height, int num_layers)
+{
+       glUseProgram(resize_flow_program);
+
+       bind_sampler(resize_flow_program, uniform_flow_tex, 0, flow_tex, nearest_sampler);
+
+       glProgramUniform2f(resize_flow_program, uniform_scale_factor, float(output_width) / input_width, float(output_height) / input_height);
+
+       glViewport(0, 0, output_width, output_height);
+       glDisable(GL_BLEND);
+       fbos.render_to(out_tex);
+
+       glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+DISComputeFlow::DISComputeFlow(int width, int height, const OperatingPoint &op)
+       : width(width), height(height), op(op), motion_search(op), densify(op)
+{
+       // Make some samplers.
+       glCreateSamplers(1, &nearest_sampler);
+       glSamplerParameteri(nearest_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+       glSamplerParameteri(nearest_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+       glSamplerParameteri(nearest_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+       glSamplerParameteri(nearest_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+
+       glCreateSamplers(1, &linear_sampler);
+       glSamplerParameteri(linear_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+       glSamplerParameteri(linear_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+       glSamplerParameteri(linear_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+       glSamplerParameteri(linear_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+
+       // The smoothness is sampled so that once we get to a smoothness involving
+       // a value outside the border, the diffusivity between the two becomes zero.
+       // Similarly, gradients are zero outside the border, since the edge is taken
+       // to be constant.
+       glCreateSamplers(1, &zero_border_sampler);
+       glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+       glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+       glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
+       glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
+       float zero[] = { 0.0f, 0.0f, 0.0f, 0.0f };  // Note that zero alpha means we can also see whether we sampled outside the border or not.
+       glSamplerParameterfv(zero_border_sampler, GL_TEXTURE_BORDER_COLOR, zero);
+
+       // Initial flow is zero, 1x1.
+       glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &initial_flow_tex);
+       glTextureStorage3D(initial_flow_tex, 1, GL_RG16F, 1, 1, 1);
+       glClearTexImage(initial_flow_tex, 0, GL_RG, GL_FLOAT, nullptr);
+
+       // Set up the vertex data that will be shared between all passes.
+       float vertices[] = {
+               0.0f, 1.0f,
+               0.0f, 0.0f,
+               1.0f, 1.0f,
+               1.0f, 0.0f,
+       };
+       glCreateBuffers(1, &vertex_vbo);
+       glNamedBufferData(vertex_vbo, sizeof(vertices), vertices, GL_STATIC_DRAW);
+
+       glCreateVertexArrays(1, &vao);
+       glBindVertexArray(vao);
+       glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+       GLint position_attrib = 0;  // Hard-coded in every vertex shader.
+       glEnableVertexArrayAttrib(vao, position_attrib);
+       glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+}
+
+GLuint DISComputeFlow::exec(GLuint tex, FlowDirection flow_direction, ResizeStrategy resize_strategy)
+{
+       int num_layers = (flow_direction == FORWARD_AND_BACKWARD) ? 2 : 1;
+       int prev_level_width = 1, prev_level_height = 1;
+       GLuint prev_level_flow_tex = initial_flow_tex;
+
+       GPUTimers timers;
+
+       glBindVertexArray(vao);
+       glDisable(GL_DITHER);
+
+       ScopedTimer total_timer("Compute flow", &timers);
+       for (int level = op.coarsest_level; level >= int(op.finest_level); --level) {
+               char timer_name[256];
+               snprintf(timer_name, sizeof(timer_name), "Level %d (%d x %d)", level, width >> level, height >> level);
+               ScopedTimer level_timer(timer_name, &total_timer);
+
+               int level_width = width >> level;
+               int level_height = height >> level;
+               float patch_spacing_pixels = op.patch_size_pixels * (1.0f - op.patch_overlap_ratio);
+
+               // Make sure we have patches at least every Nth pixel, e.g. for width=9
+               // and patch_spacing=3 (the default), we put out patch centers in
+               // x=0, x=3, x=6, x=9, which is four patches. The fragment shader will
+               // lock all the centers to integer coordinates if needed.
+               int width_patches = 1 + ceil(level_width / patch_spacing_pixels);
+               int height_patches = 1 + ceil(level_height / patch_spacing_pixels);
+
+               // Make sure we always read from the correct level; the chosen
+               // mipmapping could otherwise be rather unpredictable, especially
+               // during motion search.
+               GLuint tex_view;
+               glGenTextures(1, &tex_view);
+               glTextureView(tex_view, GL_TEXTURE_2D_ARRAY, tex, GL_R8, level, 1, 0, 2);
+
+               // Create a new texture to hold the gradients.
+               GLuint grad_tex = pool.get_texture(GL_R32UI, level_width, level_height, num_layers);
+
+               // Find the derivative.
+               {
+                       ScopedTimer timer("Sobel", &level_timer);
+                       sobel.exec(tex_view, grad_tex, level_width, level_height, num_layers);
+               }
+
+               // Motion search to find the initial flow. We use the flow from the previous
+               // level (sampled bilinearly; no fancy tricks) as a guide, then search from there.
+
+               // Create an output flow texture.
+               GLuint flow_out_tex = pool.get_texture(GL_RGB16F, width_patches, height_patches, num_layers);
+
+               // And draw.
+               {
+                       ScopedTimer timer("Motion search", &level_timer);
+                       motion_search.exec(tex_view, grad_tex, prev_level_flow_tex, flow_out_tex, level_width, level_height, prev_level_width, prev_level_height, width_patches, height_patches, num_layers);
+               }
+               pool.release_texture(grad_tex);
+
+               // Densification.
+
+               // Set up an output texture (cleared in Densify).
+               GLuint dense_flow_tex = pool.get_texture(GL_RGB16F, level_width, level_height, num_layers);
+
+               // And draw.
+               {
+                       ScopedTimer timer("Densification", &level_timer);
+                       densify.exec(tex_view, flow_out_tex, dense_flow_tex, level_width, level_height, width_patches, height_patches, num_layers);
+               }
+               pool.release_texture(flow_out_tex);
+
+               // Everything below here in the loop belongs to variational refinement.
+               ScopedTimer varref_timer("Variational refinement", &level_timer);
+
+               // Prewarping; create I and I_t, and a normalized base flow (so we don't
+               // have to normalize it over and over again, and also save some bandwidth).
+               //
+               // During the entire rest of the variational refinement, flow will be measured
+               // in pixels, not 0..1 normalized OpenGL texture coordinates.
+               // This is because variational refinement depends so heavily on derivatives,
+               // which are measured in intensity levels per pixel.
+               GLuint I_tex = pool.get_texture(GL_R16F, level_width, level_height, num_layers);
+               GLuint I_t_tex = pool.get_texture(GL_R16F, level_width, level_height, num_layers);
+               GLuint base_flow_tex = pool.get_texture(GL_RG16F, level_width, level_height, num_layers);
+               {
+                       ScopedTimer timer("Prewarping", &varref_timer);
+                       prewarp.exec(tex_view, dense_flow_tex, I_tex, I_t_tex, base_flow_tex, level_width, level_height, num_layers);
+               }
+               pool.release_texture(dense_flow_tex);
+               glDeleteTextures(1, &tex_view);
+
+               // TODO: If we don't have variational refinement, we don't need I and I_t,
+               // so computing them is a waste.
+               if (op.variational_refinement) {
+                       // Calculate I_x and I_y. We're only calculating first derivatives;
+                       // the others will be taken on-the-fly in order to sample from fewer
+                       // textures overall, since sampling from the L1 cache is cheap.
+                       // (TODO: Verify that this is indeed faster than making separate
+                       // double-derivative textures.)
+                       GLuint I_x_y_tex = pool.get_texture(GL_RG16F, level_width, level_height, num_layers);
+                       GLuint beta_0_tex = pool.get_texture(GL_R16F, level_width, level_height, num_layers);
+                       {
+                               ScopedTimer timer("First derivatives", &varref_timer);
+                               derivatives.exec(I_tex, I_x_y_tex, beta_0_tex, level_width, level_height, num_layers);
+                       }
+                       pool.release_texture(I_tex);
+
+                       // We need somewhere to store du and dv (the flow increment, relative
+                       // to the non-refined base flow u0 and v0). It's initially garbage,
+                       // but not read until we've written something sane to it.
+                       GLuint diff_flow_tex = pool.get_texture(GL_RG16F, level_width, level_height, num_layers);
+
+                       // And for diffusivity.
+                       GLuint diffusivity_tex = pool.get_texture(GL_R16F, level_width, level_height, num_layers);
+
+                       // And finally for the equation set. See SetupEquations for
+                       // the storage format.
+                       GLuint equation_red_tex = pool.get_texture(GL_RGBA32UI, (level_width + 1) / 2, level_height, num_layers);
+                       GLuint equation_black_tex = pool.get_texture(GL_RGBA32UI, (level_width + 1) / 2, level_height, num_layers);
+
+                       for (int outer_idx = 0; outer_idx < level + 1; ++outer_idx) {
+                               // Calculate the diffusivity term for each pixel.
+                               {
+                                       ScopedTimer timer("Compute diffusivity", &varref_timer);
+                                       compute_diffusivity.exec(base_flow_tex, diff_flow_tex, diffusivity_tex, level_width, level_height, outer_idx == 0, num_layers);
+                               }
+
+                               // Set up the 2x2 equation system for each pixel.
+                               {
+                                       ScopedTimer timer("Set up equations", &varref_timer);
+                                       setup_equations.exec(I_x_y_tex, I_t_tex, diff_flow_tex, base_flow_tex, beta_0_tex, diffusivity_tex, equation_red_tex, equation_black_tex, level_width, level_height, outer_idx == 0, num_layers);
+                               }
+
+                               // Run a few SOR iterations. Note that these are to/from the same texture.
+                               {
+                                       ScopedTimer timer("SOR", &varref_timer);
+                                       sor.exec(diff_flow_tex, equation_red_tex, equation_black_tex, diffusivity_tex, level_width, level_height, 5, outer_idx == 0, num_layers, &timer);
+                               }
+                       }
+
+                       pool.release_texture(I_t_tex);
+                       pool.release_texture(I_x_y_tex);
+                       pool.release_texture(beta_0_tex);
+                       pool.release_texture(diffusivity_tex);
+                       pool.release_texture(equation_red_tex);
+                       pool.release_texture(equation_black_tex);
+
+                       // Add the differential flow found by the variational refinement to the base flow,
+                       // giving the final flow estimate for this level.
+                       // The output is in base_flow_tex; we don't need to make a new texture.
+                       {
+                               ScopedTimer timer("Add differential flow", &varref_timer);
+                               add_base_flow.exec(base_flow_tex, diff_flow_tex, level_width, level_height, num_layers);
+                       }
+                       pool.release_texture(diff_flow_tex);
+               }
+
+               if (prev_level_flow_tex != initial_flow_tex) {
+                       pool.release_texture(prev_level_flow_tex);
+               }
+               prev_level_flow_tex = base_flow_tex;
+               prev_level_width = level_width;
+               prev_level_height = level_height;
+       }
+       total_timer.end();
+
+       if (!in_warmup) {
+               timers.print();
+       }
+
+       // Scale up the flow to the final size (if needed).
+       if (op.finest_level == 0 || resize_strategy == DO_NOT_RESIZE_FLOW) {
+               return prev_level_flow_tex;
+       } else {
+               GLuint final_tex = pool.get_texture(GL_RG16F, width, height, num_layers);
+               resize_flow.exec(prev_level_flow_tex, final_tex, prev_level_width, prev_level_height, width, height, num_layers);
+               pool.release_texture(prev_level_flow_tex);
+               return final_tex;
+       }
+}
+
+Splat::Splat(const OperatingPoint &op)
+       : op(op)
+{
+       splat_vs_obj = compile_shader(read_file("splat.vert", _binary_splat_vert_data, _binary_splat_vert_size), GL_VERTEX_SHADER);
+       splat_fs_obj = compile_shader(read_file("splat.frag", _binary_splat_frag_data, _binary_splat_frag_size), GL_FRAGMENT_SHADER);
+       splat_program = link_program(splat_vs_obj, splat_fs_obj);
+
+       uniform_splat_size = glGetUniformLocation(splat_program, "splat_size");
+       uniform_alpha = glGetUniformLocation(splat_program, "alpha");
+       uniform_gray_tex = glGetUniformLocation(splat_program, "gray_tex");
+       uniform_flow_tex = glGetUniformLocation(splat_program, "flow_tex");
+       uniform_inv_flow_size = glGetUniformLocation(splat_program, "inv_flow_size");
+}
+
+void Splat::exec(GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha)
+{
+       glUseProgram(splat_program);
+
+       bind_sampler(splat_program, uniform_gray_tex, 0, gray_tex, linear_sampler);
+       bind_sampler(splat_program, uniform_flow_tex, 1, bidirectional_flow_tex, nearest_sampler);
+
+       glProgramUniform2f(splat_program, uniform_splat_size, op.splat_size / width, op.splat_size / height);
+       glProgramUniform1f(splat_program, uniform_alpha, alpha);
+       glProgramUniform2f(splat_program, uniform_inv_flow_size, 1.0f / width, 1.0f / height);
+
+       glViewport(0, 0, width, height);
+       glDisable(GL_BLEND);
+       glEnable(GL_DEPTH_TEST);
+       glDepthMask(GL_TRUE);
+       glDepthFunc(GL_LESS);  // We store the difference between I_0 and I_1, where less difference is good. (Default 1.0 is effectively +inf, which always loses.)
+
+       fbos.render_to(depth_rb, flow_tex);
+
+       // Evidently NVIDIA doesn't use fast clears for glClearTexImage, so clear now that
+       // we've got it bound.
+       glClearColor(1000.0f, 1000.0f, 0.0f, 1.0f);  // Invalid flow.
+       glClearDepth(1.0f);  // Effectively infinity.
+       glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+
+       glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, width * height * 2);
+
+       glDisable(GL_DEPTH_TEST);
+}
+
+HoleFill::HoleFill()
+{
+       fill_vs_obj = compile_shader(read_file("hole_fill.vert", _binary_hole_fill_vert_data, _binary_hole_fill_vert_size), GL_VERTEX_SHADER);
+       fill_fs_obj = compile_shader(read_file("hole_fill.frag", _binary_hole_fill_frag_data, _binary_hole_fill_frag_size), GL_FRAGMENT_SHADER);
+       fill_program = link_program(fill_vs_obj, fill_fs_obj);
+
+       uniform_tex = glGetUniformLocation(fill_program, "tex");
+       uniform_z = glGetUniformLocation(fill_program, "z");
+       uniform_sample_offset = glGetUniformLocation(fill_program, "sample_offset");
+}
+
+void HoleFill::exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int width, int height)
+{
+       glUseProgram(fill_program);
+
+       bind_sampler(fill_program, uniform_tex, 0, flow_tex, nearest_sampler);
+
+       glProgramUniform1f(fill_program, uniform_z, 1.0f - 1.0f / 1024.0f);
+
+       glViewport(0, 0, width, height);
+       glDisable(GL_BLEND);
+       glEnable(GL_DEPTH_TEST);
+       glDepthFunc(GL_LESS);  // Only update the values > 0.999f (ie., only invalid pixels).
+
+       fbos.render_to(depth_rb, flow_tex);  // NOTE: Reading and writing to the same texture.
+
+       // Fill holes from the left, by shifting 1, 2, 4, 8, etc. pixels to the right.
+       for (int offs = 1; offs < width; offs *= 2) {
+               glProgramUniform2f(fill_program, uniform_sample_offset, -offs / float(width), 0.0f);
+               glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+               glTextureBarrier();
+       }
+       glCopyImageSubData(flow_tex, GL_TEXTURE_2D, 0, 0, 0, 0, temp_tex[0], GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
+
+       // Similar to the right; adjust Z a bit down, so that we re-fill the pixels that
+       // were overwritten in the last algorithm.
+       glProgramUniform1f(fill_program, uniform_z, 1.0f - 2.0f / 1024.0f);
+       for (int offs = 1; offs < width; offs *= 2) {
+               glProgramUniform2f(fill_program, uniform_sample_offset, offs / float(width), 0.0f);
+               glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+               glTextureBarrier();
+       }
+       glCopyImageSubData(flow_tex, GL_TEXTURE_2D, 0, 0, 0, 0, temp_tex[1], GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
+
+       // Up.
+       glProgramUniform1f(fill_program, uniform_z, 1.0f - 3.0f / 1024.0f);
+       for (int offs = 1; offs < height; offs *= 2) {
+               glProgramUniform2f(fill_program, uniform_sample_offset, 0.0f, -offs / float(height));
+               glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+               glTextureBarrier();
+       }
+       glCopyImageSubData(flow_tex, GL_TEXTURE_2D, 0, 0, 0, 0, temp_tex[2], GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
+
+       // Down.
+       glProgramUniform1f(fill_program, uniform_z, 1.0f - 4.0f / 1024.0f);
+       for (int offs = 1; offs < height; offs *= 2) {
+               glProgramUniform2f(fill_program, uniform_sample_offset, 0.0f, offs / float(height));
+               glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+               glTextureBarrier();
+       }
+
+       glDisable(GL_DEPTH_TEST);
+}
+
+HoleBlend::HoleBlend()
+{
+       blend_vs_obj = compile_shader(read_file("hole_fill.vert", _binary_hole_fill_vert_data, _binary_hole_fill_vert_size), GL_VERTEX_SHADER);  // Reuse the vertex shader from the fill.
+       blend_fs_obj = compile_shader(read_file("hole_blend.frag", _binary_hole_blend_frag_data, _binary_hole_blend_frag_size), GL_FRAGMENT_SHADER);
+       blend_program = link_program(blend_vs_obj, blend_fs_obj);
+
+       uniform_left_tex = glGetUniformLocation(blend_program, "left_tex");
+       uniform_right_tex = glGetUniformLocation(blend_program, "right_tex");
+       uniform_up_tex = glGetUniformLocation(blend_program, "up_tex");
+       uniform_down_tex = glGetUniformLocation(blend_program, "down_tex");
+       uniform_z = glGetUniformLocation(blend_program, "z");
+       uniform_sample_offset = glGetUniformLocation(blend_program, "sample_offset");
+}
+
+void HoleBlend::exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int width, int height)
+{
+       glUseProgram(blend_program);
+
+       bind_sampler(blend_program, uniform_left_tex, 0, temp_tex[0], nearest_sampler);
+       bind_sampler(blend_program, uniform_right_tex, 1, temp_tex[1], nearest_sampler);
+       bind_sampler(blend_program, uniform_up_tex, 2, temp_tex[2], nearest_sampler);
+       bind_sampler(blend_program, uniform_down_tex, 3, flow_tex, nearest_sampler);
+
+       glProgramUniform1f(blend_program, uniform_z, 1.0f - 4.0f / 1024.0f);
+       glProgramUniform2f(blend_program, uniform_sample_offset, 0.0f, 0.0f);
+
+       glViewport(0, 0, width, height);
+       glDisable(GL_BLEND);
+       glEnable(GL_DEPTH_TEST);
+       glDepthFunc(GL_LEQUAL);  // Skip over all of the pixels that were never holes to begin with.
+
+       fbos.render_to(depth_rb, flow_tex);  // NOTE: Reading and writing to the same texture.
+
+       glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+       glDisable(GL_DEPTH_TEST);
+}
+
+Blend::Blend(bool split_ycbcr_output)
+       : split_ycbcr_output(split_ycbcr_output)
+{
+       string frag_shader = read_file("blend.frag", _binary_blend_frag_data, _binary_blend_frag_size);
+       if (split_ycbcr_output) {
+               // Insert after the first #version line.
+               size_t offset = frag_shader.find('\n');
+               assert(offset != string::npos);
+               frag_shader = frag_shader.substr(0, offset + 1) + "#define SPLIT_YCBCR_OUTPUT 1\n" + frag_shader.substr(offset + 1);
+       }
+
+       blend_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
+       blend_fs_obj = compile_shader(frag_shader, GL_FRAGMENT_SHADER);
+       blend_program = link_program(blend_vs_obj, blend_fs_obj);
+
+       uniform_image_tex = glGetUniformLocation(blend_program, "image_tex");
+       uniform_flow_tex = glGetUniformLocation(blend_program, "flow_tex");
+       uniform_alpha = glGetUniformLocation(blend_program, "alpha");
+       uniform_flow_consistency_tolerance = glGetUniformLocation(blend_program, "flow_consistency_tolerance");
+}
+
+void Blend::exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, GLuint output2_tex, int level_width, int level_height, float alpha)
+{
+       glUseProgram(blend_program);
+       bind_sampler(blend_program, uniform_image_tex, 0, image_tex, linear_sampler);
+       bind_sampler(blend_program, uniform_flow_tex, 1, flow_tex, linear_sampler);  // May be upsampled.
+       glProgramUniform1f(blend_program, uniform_alpha, alpha);
+
+       glViewport(0, 0, level_width, level_height);
+       if (split_ycbcr_output) {
+               fbos_split.render_to(output_tex, output2_tex);
+       } else {
+               fbos.render_to(output_tex);
+       }
+       glDisable(GL_BLEND);  // A bit ironic, perhaps.
+       glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+}
+
+Interpolate::Interpolate(const OperatingPoint &op, bool split_ycbcr_output)
+       : flow_level(op.finest_level),
+         split_ycbcr_output(split_ycbcr_output),
+         splat(op),
+         blend(split_ycbcr_output) {
+       // Set up the vertex data that will be shared between all passes.
+       float vertices[] = {
+               0.0f, 1.0f,
+               0.0f, 0.0f,
+               1.0f, 1.0f,
+               1.0f, 0.0f,
+       };
+       glCreateBuffers(1, &vertex_vbo);
+       glNamedBufferData(vertex_vbo, sizeof(vertices), vertices, GL_STATIC_DRAW);
+
+       glCreateVertexArrays(1, &vao);
+       glBindVertexArray(vao);
+       glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+       GLint position_attrib = 0;  // Hard-coded in every vertex shader.
+       glEnableVertexArrayAttrib(vao, position_attrib);
+       glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+}
+
+pair<GLuint, GLuint> Interpolate::exec(GLuint image_tex, GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha)
+{
+       GPUTimers timers;
+
+       ScopedTimer total_timer("Interpolate", &timers);
+
+       glBindVertexArray(vao);
+       glDisable(GL_DITHER);
+
+       // Pick out the right level to test splatting results on.
+       GLuint tex_view;
+       glGenTextures(1, &tex_view);
+       glTextureView(tex_view, GL_TEXTURE_2D_ARRAY, gray_tex, GL_R8, flow_level, 1, 0, 2);
+
+       int flow_width = width >> flow_level;
+       int flow_height = height >> flow_level;
+
+       GLuint flow_tex = pool.get_texture(GL_RG16F, flow_width, flow_height);
+       GLuint depth_rb = pool.get_renderbuffer(GL_DEPTH_COMPONENT16, flow_width, flow_height);  // Used for ranking flows.
+
+       {
+               ScopedTimer timer("Splat", &total_timer);
+               splat.exec(tex_view, bidirectional_flow_tex, flow_tex, depth_rb, flow_width, flow_height, alpha);
+       }
+       glDeleteTextures(1, &tex_view);
+
+       GLuint temp_tex[3];
+       temp_tex[0] = pool.get_texture(GL_RG16F, flow_width, flow_height);
+       temp_tex[1] = pool.get_texture(GL_RG16F, flow_width, flow_height);
+       temp_tex[2] = pool.get_texture(GL_RG16F, flow_width, flow_height);
+
+       {
+               ScopedTimer timer("Fill holes", &total_timer);
+               hole_fill.exec(flow_tex, depth_rb, temp_tex, flow_width, flow_height);
+               hole_blend.exec(flow_tex, depth_rb, temp_tex, flow_width, flow_height);
+       }
+
+       pool.release_texture(temp_tex[0]);
+       pool.release_texture(temp_tex[1]);
+       pool.release_texture(temp_tex[2]);
+       pool.release_renderbuffer(depth_rb);
+
+       GLuint output_tex, output2_tex = 0;
+       if (split_ycbcr_output) {
+               output_tex = pool.get_texture(GL_R8, width, height);
+               output2_tex = pool.get_texture(GL_RG8, width, height);
+               {
+                       ScopedTimer timer("Blend", &total_timer);
+                       blend.exec(image_tex, flow_tex, output_tex, output2_tex, width, height, alpha);
+               }
+       } else {
+               output_tex = pool.get_texture(GL_RGBA8, width, height);
+               {
+                       ScopedTimer timer("Blend", &total_timer);
+                       blend.exec(image_tex, flow_tex, output_tex, 0, width, height, alpha);
+               }
+       }
+       pool.release_texture(flow_tex);
+       total_timer.end();
+       if (!in_warmup) {
+               timers.print();
+       }
+
+       return make_pair(output_tex, output2_tex);
+}
+
+GLuint TexturePool::get_texture(GLenum format, GLuint width, GLuint height, GLuint num_layers)
+{
+       {
+               lock_guard<mutex> lock(mu);
+               for (Texture &tex : textures) {
+                       if (!tex.in_use && !tex.is_renderbuffer && tex.format == format &&
+                           tex.width == width && tex.height == height && tex.num_layers == num_layers) {
+                               tex.in_use = true;
+                               return tex.tex_num;
+                       }
+               }
+       }
+
+       Texture tex;
+       if (num_layers == 0) {
+               glCreateTextures(GL_TEXTURE_2D, 1, &tex.tex_num);
+               glTextureStorage2D(tex.tex_num, 1, format, width, height);
+       } else {
+               glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &tex.tex_num);
+               glTextureStorage3D(tex.tex_num, 1, format, width, height, num_layers);
+       }
+       tex.format = format;
+       tex.width = width;
+       tex.height = height;
+       tex.num_layers = num_layers;
+       tex.in_use = true;
+       tex.is_renderbuffer = false;
+       {
+               lock_guard<mutex> lock(mu);
+               textures.push_back(tex);
+       }
+       return tex.tex_num;
+}
+
+GLuint TexturePool::get_renderbuffer(GLenum format, GLuint width, GLuint height)
+{
+       {
+               lock_guard<mutex> lock(mu);
+               for (Texture &tex : textures) {
+                       if (!tex.in_use && tex.is_renderbuffer && tex.format == format &&
+                           tex.width == width && tex.height == height) {
+                               tex.in_use = true;
+                               return tex.tex_num;
+                       }
+               }
+       }
+
+       Texture tex;
+       glCreateRenderbuffers(1, &tex.tex_num);
+       glNamedRenderbufferStorage(tex.tex_num, format, width, height);
+
+       tex.format = format;
+       tex.width = width;
+       tex.height = height;
+       tex.in_use = true;
+       tex.is_renderbuffer = true;
+       {
+               lock_guard<mutex> lock(mu);
+               textures.push_back(tex);
+       }
+       return tex.tex_num;
+}
+
+void TexturePool::release_texture(GLuint tex_num)
+{
+       lock_guard<mutex> lock(mu);
+       for (Texture &tex : textures) {
+               if (!tex.is_renderbuffer && tex.tex_num == tex_num) {
+                       assert(tex.in_use);
+                       tex.in_use = false;
+                       return;
+               }
+       }
+       assert(false);
+}
+
+void TexturePool::release_renderbuffer(GLuint tex_num)
+{
+       lock_guard<mutex> lock(mu);
+       for (Texture &tex : textures) {
+               if (tex.is_renderbuffer && tex.tex_num == tex_num) {
+                       assert(tex.in_use);
+                       tex.in_use = false;
+                       return;
+               }
+       }
+       //assert(false);
+}
diff --git a/futatabi/flow.h b/futatabi/flow.h
new file mode 100644 (file)
index 0000000..08b2590
--- /dev/null
@@ -0,0 +1,568 @@
+#ifndef _FLOW_H
+#define _FLOW_H 1
+
+// Code for computing optical flow between two images, and using it to interpolate
+// in-between frames. The main user interface is the DISComputeFlow and Interpolate
+// classes (also GrayscaleConversion can be useful).
+
+#include <array>
+#include <epoxy/gl.h>
+#include <map>
+#include <mutex>
+#include <stdint.h>
+#include <utility>
+#include <vector>
+
+class ScopedTimer;
+
+// Predefined operating points from the paper.
+struct OperatingPoint {
+       unsigned coarsest_level;  // TODO: Adjust dynamically based on the resolution?
+       unsigned finest_level;
+       unsigned search_iterations;  // Halved from the paper.
+       unsigned patch_size_pixels;
+       float patch_overlap_ratio;
+       bool variational_refinement;
+
+       // Not part of the original paper; used for interpolation.
+       // NOTE: Values much larger than 1.0 seems to trigger Haswell's “PMA stall”;
+       // the problem is not present on Broadwell and higher (there's a mitigation
+       // in the hardware, but Mesa doesn't enable it at the time of writing).
+       // Since we have hole filling, the holes from 1.0 are not critical,
+       // but larger values seem to do better than hole filling for large
+       // motion, blurs etc. since we have more candidates.
+       float splat_size;
+};
+
+// Operating point 1 (600 Hz on CPU, excluding preprocessing).
+static constexpr OperatingPoint operating_point1 = {
+       5,      // Coarsest level.
+       3,      // Finest level.
+       8,      // Search iterations.
+       8,      // Patch size (pixels).
+       0.30f,  // Overlap ratio.
+       false,  // Variational refinement.
+       1.0f    // Splat size (pixels).
+};
+
+// Operating point 2 (300 Hz on CPU, excluding preprocessing).
+static constexpr OperatingPoint operating_point2 = {
+       5,      // Coarsest level.
+       3,      // Finest level.
+       6,      // Search iterations.
+       8,      // Patch size (pixels).
+       0.40f,  // Overlap ratio.
+       true,   // Variational refinement.
+       1.0f    // Splat size (pixels).
+};
+
+// Operating point 3 (10 Hz on CPU, excluding preprocessing).
+// This is the only one that has been thorougly tested.
+static constexpr OperatingPoint operating_point3 = {
+       5,      // Coarsest level.
+       1,      // Finest level.
+       8,      // Search iterations.
+       12,     // Patch size (pixels).
+       0.75f,  // Overlap ratio.
+       true,   // Variational refinement.
+       4.0f    // Splat size (pixels).
+};
+
+// Operating point 4 (0.5 Hz on CPU, excluding preprocessing).
+static constexpr OperatingPoint operating_point4 = {
+       5,      // Coarsest level.
+       0,      // Finest level.
+       128,    // Search iterations.
+       12,     // Patch size (pixels).
+       0.75f,  // Overlap ratio.
+       true,   // Variational refinement.
+       8.0f    // Splat size (pixels).
+};
+
+int find_num_levels(int width, int height);
+
+// A class that caches FBOs that render to a given set of textures.
+// It never frees anything, so it is only suitable for rendering to
+// the same (small) set of textures over and over again.
+template<size_t num_elements>
+class PersistentFBOSet {
+public:
+       void render_to(const std::array<GLuint, num_elements> &textures);
+
+       // Convenience wrappers.
+       void render_to(GLuint texture0) {
+               render_to({{texture0}});
+       }
+
+       void render_to(GLuint texture0, GLuint texture1) {
+               render_to({{texture0, texture1}});
+       }
+
+       void render_to(GLuint texture0, GLuint texture1, GLuint texture2) {
+               render_to({{texture0, texture1, texture2}});
+       }
+
+       void render_to(GLuint texture0, GLuint texture1, GLuint texture2, GLuint texture3) {
+               render_to({{texture0, texture1, texture2, texture3}});
+       }
+
+private:
+       // TODO: Delete these on destruction.
+       std::map<std::array<GLuint, num_elements>, GLuint> fbos;
+};
+
+// Same, but with a depth texture.
+template<size_t num_elements>
+class PersistentFBOSetWithDepth {
+public:
+       void render_to(GLuint depth_rb, const std::array<GLuint, num_elements> &textures);
+
+       // Convenience wrappers.
+       void render_to(GLuint depth_rb, GLuint texture0) {
+               render_to(depth_rb, {{texture0}});
+       }
+
+       void render_to(GLuint depth_rb, GLuint texture0, GLuint texture1) {
+               render_to(depth_rb, {{texture0, texture1}});
+       }
+
+       void render_to(GLuint depth_rb, GLuint texture0, GLuint texture1, GLuint texture2) {
+               render_to(depth_rb, {{texture0, texture1, texture2}});
+       }
+
+       void render_to(GLuint depth_rb, GLuint texture0, GLuint texture1, GLuint texture2, GLuint texture3) {
+               render_to(depth_rb, {{texture0, texture1, texture2, texture3}});
+       }
+
+private:
+       // TODO: Delete these on destruction.
+       std::map<std::pair<GLuint, std::array<GLuint, num_elements>>, GLuint> fbos;
+};
+
+// Convert RGB to grayscale, using Rec. 709 coefficients.
+class GrayscaleConversion {
+public:
+       GrayscaleConversion();
+       void exec(GLint tex, GLint gray_tex, int width, int height, int num_layers);
+
+private:
+       PersistentFBOSet<1> fbos;
+       GLuint gray_vs_obj;
+       GLuint gray_fs_obj;
+       GLuint gray_program;
+       GLuint gray_vao;
+
+       GLuint uniform_tex;
+};
+
+// Compute gradients in every point, used for the motion search.
+// The DIS paper doesn't actually mention how these are computed,
+// but seemingly, a 3x3 Sobel operator is used here (at least in
+// later versions of the code), while a [1 -8 0 8 -1] kernel is
+// used for all the derivatives in the variational refinement part
+// (which borrows code from DeepFlow). This is inconsistent,
+// but I guess we're better off with staying with the original
+// decisions until we actually know having different ones would be better.
+class Sobel {
+public:
+       Sobel();
+       void exec(GLint tex_view, GLint grad_tex, int level_width, int level_height, int num_layers);
+
+private:
+       PersistentFBOSet<1> fbos;
+       GLuint sobel_vs_obj;
+       GLuint sobel_fs_obj;
+       GLuint sobel_program;
+
+       GLuint uniform_tex;
+};
+
+// Motion search to find the initial flow. See motion_search.frag for documentation.
+class MotionSearch {
+public:
+       MotionSearch(const OperatingPoint &op);
+       void exec(GLuint tex_view, GLuint grad_tex, GLuint flow_tex, GLuint flow_out_tex, int level_width, int level_height, int prev_level_width, int prev_level_height, int width_patches, int height_patches, int num_layers);
+
+private:
+       const OperatingPoint op;
+       PersistentFBOSet<1> fbos;
+
+       GLuint motion_vs_obj;
+       GLuint motion_fs_obj;
+       GLuint motion_search_program;
+
+       GLuint uniform_inv_image_size, uniform_inv_prev_level_size, uniform_out_flow_size;
+       GLuint uniform_image_tex, uniform_grad_tex, uniform_flow_tex;
+       GLuint uniform_patch_size, uniform_num_iterations;
+};
+
+// Do “densification”, ie., upsampling of the flow patches to the flow field
+// (the same size as the image at this level). We draw one quad per patch
+// over its entire covered area (using instancing in the vertex shader),
+// and then weight the contributions in the pixel shader by post-warp difference.
+// This is equation (3) in the paper.
+//
+// We accumulate the flow vectors in the R/G channels (for u/v) and the total
+// weight in the B channel. Dividing R and G by B gives the normalized values.
+class Densify {
+public:
+       Densify(const OperatingPoint &op);
+       void exec(GLuint tex_view, GLuint flow_tex, GLuint dense_flow_tex, int level_width, int level_height, int width_patches, int height_patches, int num_layers);
+
+private:
+       OperatingPoint op;
+       PersistentFBOSet<1> fbos;
+
+       GLuint densify_vs_obj;
+       GLuint densify_fs_obj;
+       GLuint densify_program;
+
+       GLuint uniform_patch_size;
+       GLuint uniform_image_tex, uniform_flow_tex;
+};
+
+// Warp I_1 to I_w, and then compute the mean (I) and difference (I_t) of
+// I_0 and I_w. The prewarping is what enables us to solve the variational
+// flow for du,dv instead of u,v.
+//
+// Also calculates the normalized flow, ie. divides by z (this is needed because
+// Densify works by additive blending) and multiplies by the image size.
+//
+// See variational_refinement.txt for more information.
+class Prewarp {
+public:
+       Prewarp();
+       void exec(GLuint tex_view, GLuint flow_tex, GLuint normalized_flow_tex, GLuint I_tex, GLuint I_t_tex, int level_width, int level_height, int num_layers);
+
+private:
+       PersistentFBOSet<3> fbos;
+
+       GLuint prewarp_vs_obj;
+       GLuint prewarp_fs_obj;
+       GLuint prewarp_program;
+
+       GLuint uniform_image_tex, uniform_flow_tex;
+};
+
+// From I, calculate the partial derivatives I_x and I_y. We use a four-tap
+// central difference filter, since apparently, that's tradition (I haven't
+// measured quality versus a more normal 0.5 (I[x+1] - I[x-1]).)
+// The coefficients come from
+//
+//   https://en.wikipedia.org/wiki/Finite_difference_coefficient
+//
+// Also computes β_0, since it depends only on I_x and I_y.
+class Derivatives {
+public:
+       Derivatives();
+       void exec(GLuint input_tex, GLuint I_x_y_tex, GLuint beta_0_tex, int level_width, int level_height, int num_layers);
+
+private:
+       PersistentFBOSet<2> fbos;
+
+       GLuint derivatives_vs_obj;
+       GLuint derivatives_fs_obj;
+       GLuint derivatives_program;
+
+       GLuint uniform_tex;
+};
+
+// Calculate the diffusivity for each pixels, g(x,y). Smoothness (s) will
+// be calculated in the shaders on-the-fly by sampling in-between two
+// neighboring g(x,y) pixels, plus a border tweak to make sure we get
+// zero smoothness at the border.
+//
+// See variational_refinement.txt for more information.
+class ComputeDiffusivity {
+public:
+       ComputeDiffusivity();
+       void exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint diffusivity_tex, int level_width, int level_height, bool zero_diff_flow, int num_layers);
+
+private:
+       PersistentFBOSet<1> fbos;
+
+       GLuint diffusivity_vs_obj;
+       GLuint diffusivity_fs_obj;
+       GLuint diffusivity_program;
+
+       GLuint uniform_flow_tex, uniform_diff_flow_tex;
+       GLuint uniform_alpha, uniform_zero_diff_flow;
+};
+
+// Set up the equations set (two equations in two unknowns, per pixel).
+// We store five floats; the three non-redundant elements of the 2x2 matrix (A)
+// as 32-bit floats, and the two elements on the right-hand side (b) as 16-bit
+// floats. (Actually, we store the inverse of the diagonal elements, because
+// we only ever need to divide by them.) This fits into four u32 values;
+// R, G, B for the matrix (the last element is symmetric) and A for the two b values.
+// All the values of the energy term (E_I, E_G, E_S), except the smoothness
+// terms that depend on other pixels, are calculated in one pass.
+//
+// The equation set is split in two; one contains only the pixels needed for
+// the red pass, and one only for the black pass (see sor.frag). This reduces
+// the amount of data the SOR shader has to pull in, at the cost of some
+// complexity when the equation texture ends up with half the size and we need
+// to adjust texture coordinates.  The contraction is done along the horizontal
+// axis, so that on even rows (0, 2, 4, ...), the “red” texture will contain
+// pixels 0, 2, 4, 6, etc., and on odd rows 1, 3, 5, etc..
+//
+// See variational_refinement.txt for more information about the actual
+// equations in use.
+class SetupEquations {
+public:
+       SetupEquations();
+       void exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex, GLuint flow_tex, GLuint beta_0_tex, GLuint diffusivity_tex, GLuint equation_red_tex, GLuint equation_black_tex, int level_width, int level_height, bool zero_diff_flow, int num_layers);
+
+private:
+       PersistentFBOSet<2> fbos;
+
+       GLuint equations_vs_obj;
+       GLuint equations_fs_obj;
+       GLuint equations_program;
+
+       GLuint uniform_I_x_y_tex, uniform_I_t_tex;
+       GLuint uniform_diff_flow_tex, uniform_base_flow_tex;
+       GLuint uniform_beta_0_tex;
+       GLuint uniform_diffusivity_tex;
+       GLuint uniform_gamma, uniform_delta, uniform_zero_diff_flow;
+};
+
+// Actually solve the equation sets made by SetupEquations, by means of
+// successive over-relaxation (SOR).
+//
+// See variational_refinement.txt for more information.
+class SOR {
+public:
+       SOR();
+       void exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_black_tex, GLuint diffusivity_tex, int level_width, int level_height, int num_iterations, bool zero_diff_flow, int num_layers, ScopedTimer *sor_timer);
+
+private:
+       PersistentFBOSet<1> fbos;
+
+       GLuint sor_vs_obj;
+       GLuint sor_fs_obj;
+       GLuint sor_program;
+
+       GLuint uniform_diff_flow_tex;
+       GLuint uniform_equation_red_tex, uniform_equation_black_tex;
+       GLuint uniform_diffusivity_tex;
+       GLuint uniform_phase, uniform_num_nonzero_phases;
+};
+
+// Simply add the differential flow found by the variational refinement to the base flow.
+// The output is in base_flow_tex; we don't need to make a new texture.
+class AddBaseFlow {
+public:
+       AddBaseFlow();
+       void exec(GLuint base_flow_tex, GLuint diff_flow_tex, int level_width, int level_height, int num_layers);
+
+private:
+       PersistentFBOSet<1> fbos;
+
+       GLuint add_flow_vs_obj;
+       GLuint add_flow_fs_obj;
+       GLuint add_flow_program;
+
+       GLuint uniform_diff_flow_tex;
+};
+
+// Take a copy of the flow, bilinearly interpolated and scaled up.
+class ResizeFlow {
+public:
+       ResizeFlow();
+       void exec(GLuint in_tex, GLuint out_tex, int input_width, int input_height, int output_width, int output_height, int num_layers);
+
+private:
+       PersistentFBOSet<1> fbos;
+
+       GLuint resize_flow_vs_obj;
+       GLuint resize_flow_fs_obj;
+       GLuint resize_flow_program;
+
+       GLuint uniform_flow_tex;
+       GLuint uniform_scale_factor;
+};
+
+// All operations, except construction and destruction, are thread-safe.
+class TexturePool {
+public:
+       GLuint get_texture(GLenum format, GLuint width, GLuint height, GLuint num_layers = 0);
+       void release_texture(GLuint tex_num);
+       GLuint get_renderbuffer(GLenum format, GLuint width, GLuint height);
+       void release_renderbuffer(GLuint tex_num);
+
+private:
+       struct Texture {
+               GLuint tex_num;
+               GLenum format;
+               GLuint width, height, num_layers;
+               bool in_use = false;
+               bool is_renderbuffer = false;
+       };
+       std::mutex mu;
+       std::vector<Texture> textures;  // Under mu.
+};
+
+class DISComputeFlow {
+public:
+       DISComputeFlow(int width, int height, const OperatingPoint &op);
+
+       enum FlowDirection {
+               FORWARD,
+               FORWARD_AND_BACKWARD
+       };
+       enum ResizeStrategy {
+               DO_NOT_RESIZE_FLOW,
+               RESIZE_FLOW_TO_FULL_SIZE
+       };
+
+       // The texture must have two layers (first and second frame).
+       // Returns a texture that must be released with release_texture()
+       // after use.
+       GLuint exec(GLuint tex, FlowDirection flow_direction, ResizeStrategy resize_strategy);
+
+       void release_texture(GLuint tex)
+       {
+               pool.release_texture(tex);
+       }
+
+private:
+       int width, height;
+       GLuint initial_flow_tex;
+       GLuint vertex_vbo, vao;
+       TexturePool pool;
+       const OperatingPoint op;
+
+       // The various passes.
+       Sobel sobel;
+       MotionSearch motion_search;
+       Densify densify;
+       Prewarp prewarp;
+       Derivatives derivatives;
+       ComputeDiffusivity compute_diffusivity;
+       SetupEquations setup_equations;
+       SOR sor;
+       AddBaseFlow add_base_flow;
+       ResizeFlow resize_flow;
+};
+
+// Forward-warp the flow half-way (or rather, by alpha). A non-zero “splatting”
+// radius fills most of the holes.
+class Splat {
+public:
+       Splat(const OperatingPoint &op);
+
+       // alpha is the time of the interpolated frame (0..1).
+       void exec(GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha);
+
+private:
+       const OperatingPoint op;
+       PersistentFBOSetWithDepth<1> fbos;
+
+       GLuint splat_vs_obj;
+       GLuint splat_fs_obj;
+       GLuint splat_program;
+
+       GLuint uniform_splat_size, uniform_alpha;
+       GLuint uniform_gray_tex, uniform_flow_tex;
+       GLuint uniform_inv_flow_size;
+};
+
+// Doing good and fast hole-filling on a GPU is nontrivial. We choose an option
+// that's fairly simple (given that most holes are really small) and also hopefully
+// cheap should the holes not be so small. Conceptually, we look for the first
+// non-hole to the left of us (ie., shoot a ray until we hit something), then
+// the first non-hole to the right of us, then up and down, and then average them
+// all together. It's going to create “stars” if the holes are big, but OK, that's
+// a tradeoff.
+//
+// Our implementation here is efficient assuming that the hierarchical Z-buffer is
+// on even for shaders that do discard (this typically kills early Z, but hopefully
+// not hierarchical Z); we set up Z so that only holes are written to, which means
+// that as soon as a hole is filled, the rasterizer should just skip it. Most of the
+// fullscreen quads should just be discarded outright, really.
+class HoleFill {
+public:
+       HoleFill();
+
+       // Output will be in flow_tex, temp_tex[0, 1, 2], representing the filling
+       // from the down, left, right and up, respectively. Use HoleBlend to merge
+       // them into one.
+       void exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int width, int height);
+
+private:
+       PersistentFBOSetWithDepth<1> fbos;
+
+       GLuint fill_vs_obj;
+       GLuint fill_fs_obj;
+       GLuint fill_program;
+
+       GLuint uniform_tex;
+       GLuint uniform_z, uniform_sample_offset;
+};
+
+// Blend the four directions from HoleFill into one pixel, so that single-pixel
+// holes become the average of their four neighbors.
+class HoleBlend {
+public:
+       HoleBlend();
+
+       void exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int width, int height);
+
+private:
+       PersistentFBOSetWithDepth<1> fbos;
+
+       GLuint blend_vs_obj;
+       GLuint blend_fs_obj;
+       GLuint blend_program;
+
+       GLuint uniform_left_tex, uniform_right_tex, uniform_up_tex, uniform_down_tex;
+       GLuint uniform_z, uniform_sample_offset;
+};
+
+class Blend {
+public:
+       Blend(bool split_ycbcr_output);
+
+       // output2_tex is only used if split_ycbcr_output was true.
+       void exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, GLuint output2_tex, int width, int height, float alpha);
+
+private:
+       bool split_ycbcr_output;
+       PersistentFBOSet<1> fbos;
+       PersistentFBOSet<2> fbos_split;
+       GLuint blend_vs_obj;
+       GLuint blend_fs_obj;
+       GLuint blend_program;
+
+       GLuint uniform_image_tex, uniform_flow_tex;
+       GLuint uniform_alpha, uniform_flow_consistency_tolerance;
+};
+
+class Interpolate {
+public:
+       Interpolate(const OperatingPoint &op, bool split_ycbcr_output);
+
+       // Returns a texture (or two, if split_ycbcr_output is true) that must
+       // be released with release_texture() after use. image_tex must be a
+       // two-layer RGBA8 texture with mipmaps (unless flow_level == 0).
+       std::pair<GLuint, GLuint> exec(GLuint image_tex, GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha);
+
+       void release_texture(GLuint tex)
+       {
+               pool.release_texture(tex);
+       }
+
+private:
+       int flow_level;
+       GLuint vertex_vbo, vao;
+       TexturePool pool;
+       const bool split_ycbcr_output;
+
+       Splat splat;
+       HoleFill hole_fill;
+       HoleBlend hole_blend;
+       Blend blend;
+};
+
+#endif  // !defined(_FLOW_H)
diff --git a/futatabi/flow_main.cpp b/futatabi/flow_main.cpp
new file mode 100644 (file)
index 0000000..dc82d22
--- /dev/null
@@ -0,0 +1,495 @@
+#define NO_SDL_GLEXT 1
+
+#include "flow.h"
+#include "gpu_timers.h"
+#include "util.h"
+
+#include <SDL2/SDL.h>
+#include <SDL2/SDL_error.h>
+#include <SDL2/SDL_events.h>
+#include <SDL2/SDL_image.h>
+#include <SDL2/SDL_keyboard.h>
+#include <SDL2/SDL_mouse.h>
+#include <SDL2/SDL_video.h>
+#include <algorithm>
+#include <assert.h>
+#include <deque>
+#include <epoxy/gl.h>
+#include <getopt.h>
+#include <map>
+#include <memory>
+#include <stack>
+#include <stdio.h>
+#include <unistd.h>
+#include <vector>
+
+#define BUFFER_OFFSET(i) ((char *)nullptr + (i))
+
+using namespace std;
+
+SDL_Window *window;
+
+bool enable_warmup = false;
+bool enable_variational_refinement = true;  // Just for debugging.
+bool enable_interpolation = false;
+
+extern float vr_alpha, vr_delta, vr_gamma;
+
+// Structures for asynchronous readback. We assume everything is the same size (and GL_RG16F).
+struct ReadInProgress {
+       GLuint pbo;
+       string filename0, filename1;
+       string flow_filename, ppm_filename;  // Either may be empty for no write.
+};
+stack<GLuint> spare_pbos;
+deque<ReadInProgress> reads_in_progress;
+
+enum MipmapPolicy {
+       WITHOUT_MIPMAPS,
+       WITH_MIPMAPS
+};
+
+GLuint load_texture(const char *filename, unsigned *width_ret, unsigned *height_ret, MipmapPolicy mipmaps)
+{
+       SDL_Surface *surf = IMG_Load(filename);
+       if (surf == nullptr) {
+               fprintf(stderr, "IMG_Load(%s): %s\n", filename, IMG_GetError());
+               exit(1);
+       }
+
+       // For whatever reason, SDL doesn't support converting to YUV surfaces
+       // nor grayscale, so we'll do it ourselves.
+       SDL_Surface *rgb_surf = SDL_ConvertSurfaceFormat(surf, SDL_PIXELFORMAT_RGBA32, /*flags=*/0);
+       if (rgb_surf == nullptr) {
+               fprintf(stderr, "SDL_ConvertSurfaceFormat(%s): %s\n", filename, SDL_GetError());
+               exit(1);
+       }
+
+       SDL_FreeSurface(surf);
+
+       unsigned width = rgb_surf->w, height = rgb_surf->h;
+       const uint8_t *sptr = (uint8_t *)rgb_surf->pixels;
+       unique_ptr<uint8_t[]> pix(new uint8_t[width * height * 4]);
+
+       // Extract the Y component, and convert to bottom-left origin.
+       for (unsigned y = 0; y < height; ++y) {
+               unsigned y2 = height - 1 - y;
+               memcpy(pix.get() + y * width * 4, sptr + y2 * rgb_surf->pitch, width * 4);
+       }
+       SDL_FreeSurface(rgb_surf);
+
+       int num_levels = (mipmaps == WITH_MIPMAPS) ? find_num_levels(width, height) : 1;
+
+       GLuint tex;
+       glCreateTextures(GL_TEXTURE_2D, 1, &tex);
+       glTextureStorage2D(tex, num_levels, GL_RGBA8, width, height);
+       glTextureSubImage2D(tex, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, pix.get());
+
+       if (mipmaps == WITH_MIPMAPS) {
+               glGenerateTextureMipmap(tex);
+       }
+
+       *width_ret = width;
+       *height_ret = height;
+
+       return tex;
+}
+
+// OpenGL uses a bottom-left coordinate system, .flo files use a top-left coordinate system.
+void flip_coordinate_system(float *dense_flow, unsigned width, unsigned height)
+{
+       for (unsigned i = 0; i < width * height; ++i) {
+               dense_flow[i * 2 + 1] = -dense_flow[i * 2 + 1];
+       }
+}
+
+// Not relevant for RGB.
+void flip_coordinate_system(uint8_t *dense_flow, unsigned width, unsigned height)
+{
+}
+
+void write_flow(const char *filename, const float *dense_flow, unsigned width, unsigned height)
+{
+       FILE *flowfp = fopen(filename, "wb");
+       fprintf(flowfp, "FEIH");
+       fwrite(&width, 4, 1, flowfp);
+       fwrite(&height, 4, 1, flowfp);
+       for (unsigned y = 0; y < height; ++y) {
+               int yy = height - y - 1;
+               fwrite(&dense_flow[yy * width * 2], width * 2 * sizeof(float), 1, flowfp);
+       }
+       fclose(flowfp);
+}
+
+// Not relevant for RGB.
+void write_flow(const char *filename, const uint8_t *dense_flow, unsigned width, unsigned height)
+{
+       assert(false);
+}
+
+void write_ppm(const char *filename, const float *dense_flow, unsigned width, unsigned height)
+{
+       FILE *fp = fopen(filename, "wb");
+       fprintf(fp, "P6\n%d %d\n255\n", width, height);
+       for (unsigned y = 0; y < unsigned(height); ++y) {
+               int yy = height - y - 1;
+               for (unsigned x = 0; x < unsigned(width); ++x) {
+                       float du = dense_flow[(yy * width + x) * 2 + 0];
+                       float dv = dense_flow[(yy * width + x) * 2 + 1];
+
+                       uint8_t r, g, b;
+                       flow2rgb(du, dv, &r, &g, &b);
+                       putc(r, fp);
+                       putc(g, fp);
+                       putc(b, fp);
+               }
+       }
+       fclose(fp);
+}
+
+void write_ppm(const char *filename, const uint8_t *rgba, unsigned width, unsigned height)
+{
+       unique_ptr<uint8_t[]> rgb_line(new uint8_t[width * 3 + 1]);
+
+       FILE *fp = fopen(filename, "wb");
+       fprintf(fp, "P6\n%d %d\n255\n", width, height);
+       for (unsigned y = 0; y < height; ++y) {
+               unsigned y2 = height - 1 - y;
+               for (size_t x = 0; x < width; ++x) {
+                       memcpy(&rgb_line[x * 3], &rgba[(y2 * width + x) * 4], 4);
+               }
+               fwrite(rgb_line.get(), width * 3, 1, fp);
+       }
+       fclose(fp);
+}
+
+struct FlowType {
+       using type = float;
+       static constexpr GLenum gl_format = GL_RG;
+       static constexpr GLenum gl_type = GL_FLOAT;
+       static constexpr int num_channels = 2;
+};
+
+struct RGBAType {
+       using type = uint8_t;
+       static constexpr GLenum gl_format = GL_RGBA;
+       static constexpr GLenum gl_type = GL_UNSIGNED_BYTE;
+       static constexpr int num_channels = 4;
+};
+
+template <class Type>
+void finish_one_read(GLuint width, GLuint height)
+{
+       using T = typename Type::type;
+       constexpr int bytes_per_pixel = Type::num_channels * sizeof(T);
+
+       assert(!reads_in_progress.empty());
+       ReadInProgress read = reads_in_progress.front();
+       reads_in_progress.pop_front();
+
+       unique_ptr<T[]> flow(new typename Type::type[width * height * Type::num_channels]);
+       void *buf = glMapNamedBufferRange(read.pbo, 0, width * height * bytes_per_pixel, GL_MAP_READ_BIT);  // Blocks if the read isn't done yet.
+       memcpy(flow.get(), buf, width * height * bytes_per_pixel);  // TODO: Unneeded for RGBType, since flip_coordinate_system() does nothing.:
+       glUnmapNamedBuffer(read.pbo);
+       spare_pbos.push(read.pbo);
+
+       flip_coordinate_system(flow.get(), width, height);
+       if (!read.flow_filename.empty()) {
+               write_flow(read.flow_filename.c_str(), flow.get(), width, height);
+               fprintf(stderr, "%s %s -> %s\n", read.filename0.c_str(), read.filename1.c_str(), read.flow_filename.c_str());
+       }
+       if (!read.ppm_filename.empty()) {
+               write_ppm(read.ppm_filename.c_str(), flow.get(), width, height);
+       }
+}
+
+template <class Type>
+void schedule_read(GLuint tex, GLuint width, GLuint height, const char *filename0, const char *filename1, const char *flow_filename, const char *ppm_filename)
+{
+       using T = typename Type::type;
+       constexpr int bytes_per_pixel = Type::num_channels * sizeof(T);
+
+       if (spare_pbos.empty()) {
+               finish_one_read<Type>(width, height);
+       }
+       assert(!spare_pbos.empty());
+       reads_in_progress.emplace_back(ReadInProgress{ spare_pbos.top(), filename0, filename1, flow_filename, ppm_filename });
+       glBindBuffer(GL_PIXEL_PACK_BUFFER, spare_pbos.top());
+       spare_pbos.pop();
+       glGetTextureImage(tex, 0, Type::gl_format, Type::gl_type, width * height * bytes_per_pixel, nullptr);
+       glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+}
+
+void compute_flow_only(int argc, char **argv, int optind)
+{
+       const char *filename0 = argc >= (optind + 1) ? argv[optind] : "test1499.png";
+       const char *filename1 = argc >= (optind + 2) ? argv[optind + 1] : "test1500.png";
+       const char *flow_filename = argc >= (optind + 3) ? argv[optind + 2] : "flow.flo";
+
+       // Load pictures.
+       unsigned width1, height1, width2, height2;
+       GLuint tex0 = load_texture(filename0, &width1, &height1, WITHOUT_MIPMAPS);
+       GLuint tex1 = load_texture(filename1, &width2, &height2, WITHOUT_MIPMAPS);
+
+       if (width1 != width2 || height1 != height2) {
+               fprintf(stderr, "Image dimensions don't match (%dx%d versus %dx%d)\n",
+                       width1, height1, width2, height2);
+               exit(1);
+       }
+
+       // Move them into an array texture, since that's how the rest of the code
+       // would like them.
+       GLuint image_tex;
+       glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &image_tex);
+       glTextureStorage3D(image_tex, 1, GL_RGBA8, width1, height1, 2);
+       glCopyImageSubData(tex0, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 0, width1, height1, 1);
+       glCopyImageSubData(tex1, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 1, width1, height1, 1);
+       glDeleteTextures(1, &tex0);
+       glDeleteTextures(1, &tex1);
+
+       // Set up some PBOs to do asynchronous readback.
+       GLuint pbos[5];
+       glCreateBuffers(5, pbos);
+       for (int i = 0; i < 5; ++i) {
+               glNamedBufferData(pbos[i], width1 * height1 * 2 * 2 * sizeof(float), nullptr, GL_STREAM_READ);
+               spare_pbos.push(pbos[i]);
+       }
+
+       int levels = find_num_levels(width1, height1);
+
+       GLuint tex_gray;
+       glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &tex_gray);
+       glTextureStorage3D(tex_gray, levels, GL_R8, width1, height1, 2);
+
+       OperatingPoint op = operating_point3;
+       if (!enable_variational_refinement) {
+               op.variational_refinement = false;
+       }
+
+       DISComputeFlow compute_flow(width1, height1, op);  // Must be initialized before gray.
+       GrayscaleConversion gray;
+       gray.exec(image_tex, tex_gray, width1, height1, /*num_layers=*/2);
+       glGenerateTextureMipmap(tex_gray);
+
+       if (enable_warmup) {
+               in_warmup = true;
+               for (int i = 0; i < 10; ++i) {
+                       GLuint final_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE);
+                       compute_flow.release_texture(final_tex);
+               }
+               in_warmup = false;
+       }
+
+       GLuint final_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE);
+       //GLuint final_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE);
+
+       schedule_read<FlowType>(final_tex, width1, height1, filename0, filename1, flow_filename, "flow.ppm");
+       compute_flow.release_texture(final_tex);
+
+       // See if there are more flows on the command line (ie., more than three arguments),
+       // and if so, process them.
+       int num_flows = (argc - optind) / 3;
+       for (int i = 1; i < num_flows; ++i) {
+               const char *filename0 = argv[optind + i * 3 + 0];
+               const char *filename1 = argv[optind + i * 3 + 1];
+               const char *flow_filename = argv[optind + i * 3 + 2];
+               GLuint width, height;
+               GLuint tex0 = load_texture(filename0, &width, &height, WITHOUT_MIPMAPS);
+               if (width != width1 || height != height1) {
+                       fprintf(stderr, "%s: Image dimensions don't match (%dx%d versus %dx%d)\n",
+                               filename0, width, height, width1, height1);
+                       exit(1);
+               }
+               glCopyImageSubData(tex0, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 0, width1, height1, 1);
+               glDeleteTextures(1, &tex0);
+
+               GLuint tex1 = load_texture(filename1, &width, &height, WITHOUT_MIPMAPS);
+               if (width != width1 || height != height1) {
+                       fprintf(stderr, "%s: Image dimensions don't match (%dx%d versus %dx%d)\n",
+                               filename1, width, height, width1, height1);
+                       exit(1);
+               }
+               glCopyImageSubData(tex1, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 1, width1, height1, 1);
+               glDeleteTextures(1, &tex1);
+
+               gray.exec(image_tex, tex_gray, width1, height1, /*num_layers=*/2);
+               glGenerateTextureMipmap(tex_gray);
+
+               GLuint final_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE);
+
+               schedule_read<FlowType>(final_tex, width1, height1, filename0, filename1, flow_filename, "");
+               compute_flow.release_texture(final_tex);
+       }
+       glDeleteTextures(1, &tex_gray);
+
+       while (!reads_in_progress.empty()) {
+               finish_one_read<FlowType>(width1, height1);
+       }
+}
+
+// Interpolate images based on
+//
+//   Herbst, Seitz, Baker: “Occlusion Reasoning for Temporal Interpolation
+//   Using Optical Flow”
+//
+// or at least a reasonable subset thereof. Unfinished.
+void interpolate_image(int argc, char **argv, int optind)
+{
+       const char *filename0 = argc >= (optind + 1) ? argv[optind] : "test1499.png";
+       const char *filename1 = argc >= (optind + 2) ? argv[optind + 1] : "test1500.png";
+       //const char *out_filename = argc >= (optind + 3) ? argv[optind + 2] : "interpolated.png";
+
+       // Load pictures.
+       unsigned width1, height1, width2, height2;
+       GLuint tex0 = load_texture(filename0, &width1, &height1, WITH_MIPMAPS);
+       GLuint tex1 = load_texture(filename1, &width2, &height2, WITH_MIPMAPS);
+
+       if (width1 != width2 || height1 != height2) {
+               fprintf(stderr, "Image dimensions don't match (%dx%d versus %dx%d)\n",
+                       width1, height1, width2, height2);
+               exit(1);
+       }
+
+       // Move them into an array texture, since that's how the rest of the code
+       // would like them.
+       int levels = find_num_levels(width1, height1);
+       GLuint image_tex;
+       glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &image_tex);
+       glTextureStorage3D(image_tex, levels, GL_RGBA8, width1, height1, 2);
+       glCopyImageSubData(tex0, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 0, width1, height1, 1);
+       glCopyImageSubData(tex1, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 1, width1, height1, 1);
+       glDeleteTextures(1, &tex0);
+       glDeleteTextures(1, &tex1);
+       glGenerateTextureMipmap(image_tex);
+
+       // Set up some PBOs to do asynchronous readback.
+       GLuint pbos[5];
+       glCreateBuffers(5, pbos);
+       for (int i = 0; i < 5; ++i) {
+               glNamedBufferData(pbos[i], width1 * height1 * 4 * sizeof(uint8_t), nullptr, GL_STREAM_READ);
+               spare_pbos.push(pbos[i]);
+       }
+
+       OperatingPoint op = operating_point3;
+       if (!enable_variational_refinement) {
+               op.variational_refinement = false;
+       }
+       DISComputeFlow compute_flow(width1, height1, op);
+       GrayscaleConversion gray;
+       Interpolate interpolate(op, /*split_ycbcr_output=*/false);
+
+       GLuint tex_gray;
+       glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &tex_gray);
+       glTextureStorage3D(tex_gray, levels, GL_R8, width1, height1, 2);
+       gray.exec(image_tex, tex_gray, width1, height1, /*num_layers=*/2);
+       glGenerateTextureMipmap(tex_gray);
+
+       if (enable_warmup) {
+               in_warmup = true;
+               for (int i = 0; i < 10; ++i) {
+                       GLuint bidirectional_flow_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
+                       GLuint interpolated_tex = interpolate.exec(image_tex, tex_gray, bidirectional_flow_tex, width1, height1, 0.5f).first;
+                       compute_flow.release_texture(bidirectional_flow_tex);
+                       interpolate.release_texture(interpolated_tex);
+               }
+               in_warmup = false;
+       }
+
+       GLuint bidirectional_flow_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
+
+       for (int frameno = 1; frameno < 60; ++frameno) {
+               char ppm_filename[256];
+               snprintf(ppm_filename, sizeof(ppm_filename), "interp%04d.ppm", frameno);
+
+               float alpha = frameno / 60.0f;
+               GLuint interpolated_tex = interpolate.exec(image_tex, tex_gray, bidirectional_flow_tex, width1, height1, alpha).first;
+
+               schedule_read<RGBAType>(interpolated_tex, width1, height1, filename0, filename1, "", ppm_filename);
+               interpolate.release_texture(interpolated_tex);
+       }
+
+       while (!reads_in_progress.empty()) {
+               finish_one_read<RGBAType>(width1, height1);
+       }
+}
+
+int main(int argc, char **argv)
+{
+       static const option long_options[] = {
+               { "smoothness-relative-weight", required_argument, 0, 's' },  // alpha.
+               { "intensity-relative-weight", required_argument, 0, 'i' },  // delta.
+               { "gradient-relative-weight", required_argument, 0, 'g' },  // gamma.
+               { "disable-timing", no_argument, 0, 1000 },
+               { "detailed-timing", no_argument, 0, 1003 },
+               { "disable-variational-refinement", no_argument, 0, 1001 },
+               { "interpolate", no_argument, 0, 1002 },
+               { "warmup", no_argument, 0, 1004 }
+       };
+
+       enable_timing = true;
+
+       for ( ;; ) {
+               int option_index = 0;
+               int c = getopt_long(argc, argv, "s:i:g:", long_options, &option_index);
+
+               if (c == -1) {
+                       break;
+               }
+               switch (c) {
+               case 's':
+                       vr_alpha = atof(optarg);
+                       break;
+               case 'i':
+                       vr_delta = atof(optarg);
+                       break;
+               case 'g':
+                       vr_gamma = atof(optarg);
+                       break;
+               case 1000:
+                       enable_timing = false;
+                       break;
+               case 1001:
+                       enable_variational_refinement = false;
+                       break;
+               case 1002:
+                       enable_interpolation = true;
+                       break;
+               case 1003:
+                       detailed_timing = true;
+                       break;
+               case 1004:
+                       enable_warmup = true;
+                       break;
+               default:
+                       fprintf(stderr, "Unknown option '%s'\n", argv[option_index]);
+                       exit(1);
+               };
+       }
+
+       if (SDL_Init(SDL_INIT_EVERYTHING) == -1) {
+               fprintf(stderr, "SDL_Init failed: %s\n", SDL_GetError());
+               exit(1);
+       }
+       SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 8);
+       SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 0);
+       SDL_GL_SetAttribute(SDL_GL_STENCIL_SIZE, 0);
+       SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
+
+       SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
+       SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
+       SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 5);
+       // SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG);
+       window = SDL_CreateWindow("OpenGL window",
+               SDL_WINDOWPOS_UNDEFINED,
+               SDL_WINDOWPOS_UNDEFINED,
+               64, 64,
+               SDL_WINDOW_OPENGL | SDL_WINDOW_HIDDEN);
+       SDL_GLContext context = SDL_GL_CreateContext(window);
+       assert(context != nullptr);
+
+       if (enable_interpolation) {
+               interpolate_image(argc, argv, optind);
+       } else {
+               compute_flow_only(argc, argv, optind);
+       }
+}
diff --git a/futatabi/frame.proto b/futatabi/frame.proto
new file mode 100644 (file)
index 0000000..c8807fd
--- /dev/null
@@ -0,0 +1,25 @@
+syntax = "proto3";
+
+// Used as header before each frame in a .frames file:
+//
+//  1. "Ftbifrm0" (8 bytes, ASCII -- note that no byte repeats)
+//  2. Length of upcoming FrameHeaderProto (uint32, binary, big endian)
+//  3. The FrameHeaderProto itself
+//  4. The actual frame
+
+message FrameHeaderProto {
+       int32 stream_idx = 1;
+       int64 pts = 2;
+       int64 file_size = 3;  // In bytes of compressed frame. TODO: rename to size.
+}
+
+message StreamContentsProto {
+       int32 stream_idx = 1;
+       repeated int64 pts = 2 [packed=true];
+       repeated int64 file_size = 3 [packed=true];
+       repeated int64 offset = 4 [packed=true];
+}
+
+message FileContentsProto {
+       repeated StreamContentsProto stream = 1;  // Typically only one.
+}
diff --git a/futatabi/frame_on_disk.cpp b/futatabi/frame_on_disk.cpp
new file mode 100644 (file)
index 0000000..b496b3d
--- /dev/null
@@ -0,0 +1,53 @@
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "frame_on_disk.h"
+
+using namespace std;
+
+FrameReader::~FrameReader()
+{
+       if (fd != -1) {
+               close(fd);
+       }
+}
+
+string FrameReader::read_frame(FrameOnDisk frame)
+{
+       if (int(frame.filename_idx) != last_filename_idx) {
+               if (fd != -1) {
+                       close(fd);  // Ignore errors.
+               }
+
+               string filename;
+               {
+                       lock_guard<mutex> lock(frame_mu);
+                       filename = frame_filenames[frame.filename_idx];
+               }
+
+               fd = open(filename.c_str(), O_RDONLY);
+               if (fd == -1) {
+                       perror(filename.c_str());
+                       exit(1);
+               }
+
+               // We want readahead. (Ignore errors.)
+               posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL);
+
+               last_filename_idx = frame.filename_idx;
+       }
+
+       string str;
+       str.resize(frame.size);
+       off_t offset = 0;
+       while (offset < frame.size) {
+               int ret = pread(fd, &str[offset], frame.size - offset, frame.offset + offset);
+               if (ret <= 0) {
+                       perror("pread");
+                       exit(1);
+               }
+
+               offset += ret;
+       }
+       return str;
+}
diff --git a/futatabi/frame_on_disk.h b/futatabi/frame_on_disk.h
new file mode 100644 (file)
index 0000000..1843857
--- /dev/null
@@ -0,0 +1,37 @@
+#ifndef _FRAME_ON_DISK_H
+#define _FRAME_ON_DISK_H 1
+
+#include <mutex>
+#include <string>
+#include <vector>
+
+#include <stdint.h>
+
+#include "defs.h"
+
+extern std::mutex frame_mu;
+struct FrameOnDisk {
+        int64_t pts = -1;  // -1 means empty.
+        off_t offset;
+        unsigned filename_idx;
+        uint32_t size;  // Not using size_t saves a few bytes; we can have so many frames.
+};
+extern std::vector<FrameOnDisk> frames[MAX_STREAMS];  // Under frame_mu.
+extern std::vector<std::string> frame_filenames;  // Under frame_mu.
+
+// A helper class to read frames from disk. It caches the file descriptor
+// so that the kernel has a better chance of doing readahead when it sees
+// the sequential reads. (For this reason, each display has a private
+// FrameReader. Thus, we can easily keep multiple open file descriptors around
+// for a single .frames file.)
+class FrameReader {
+public:
+       ~FrameReader();
+       std::string read_frame(FrameOnDisk frame);
+
+private:
+       int fd = -1;
+       int last_filename_idx = -1;
+};
+
+#endif  // !defined(_FRAME_ON_DISK_H)
diff --git a/futatabi/gpu_timers.cpp b/futatabi/gpu_timers.cpp
new file mode 100644 (file)
index 0000000..ad747ae
--- /dev/null
@@ -0,0 +1,72 @@
+#include "gpu_timers.h"
+
+#include <epoxy/gl.h>
+
+using namespace std;
+
+bool enable_timing = false;
+bool detailed_timing = false;
+bool in_warmup = false;
+
+pair<GLuint, GLuint> GPUTimers::begin_timer(const string &name, int level)
+{
+       if (!enable_timing) {
+               return make_pair(0, 0);
+       }
+
+       GLuint queries[2];
+       glGenQueries(2, queries);
+       glQueryCounter(queries[0], GL_TIMESTAMP);
+
+       Timer timer;
+       timer.name = name;
+       timer.level = level;
+       timer.query.first = queries[0];
+       timer.query.second = queries[1];
+       timers.push_back(timer);
+       return timer.query;
+}
+
+GLint64 find_elapsed(pair<GLuint, GLuint> queries)
+{
+       // NOTE: This makes the CPU wait for the GPU.
+       GLuint64 time_start, time_end;
+       glGetQueryObjectui64v(queries.first, GL_QUERY_RESULT, &time_start);
+       glGetQueryObjectui64v(queries.second, GL_QUERY_RESULT, &time_end);
+       return time_end - time_start;
+}
+
+void GPUTimers::print()
+{
+       for (size_t i = 0; i < timers.size(); ++i) {
+               if (timers[i].level >= 4 && !detailed_timing) {
+                       // In practice, only affects the SOR sub-timers.
+                       continue;
+               }
+
+               GLint64 time_elapsed = find_elapsed(timers[i].query);
+               for (int j = 0; j < timers[i].level * 2; ++j) {
+                       fprintf(stderr, " ");
+               }
+
+               if (detailed_timing) {
+                       // Look for any immediate subtimers, and see if they sum to the large one.
+                       size_t num_subtimers = 0;
+                       GLint64 sum_subtimers = 0;
+                       for (size_t j = i + 1; j < timers.size() && timers[j].level > timers[i].level; ++j) {
+                               if (timers[j].level != timers[i].level + 1)
+                                       continue;
+                               ++num_subtimers;
+                               sum_subtimers += find_elapsed(timers[j].query);
+                       }
+
+                       if (num_subtimers > 0 && (time_elapsed - sum_subtimers) / 1e6 >= 0.01) {
+                               fprintf(stderr, "%-30s %4.3f ms [%4.3f ms unaccounted for]\n", timers[i].name.c_str(), time_elapsed / 1e6, (time_elapsed - sum_subtimers) / 1e6);
+                       } else {
+                               fprintf(stderr, "%-30s %4.3f ms\n", timers[i].name.c_str(), time_elapsed / 1e6);
+                       }
+               } else {
+                       fprintf(stderr, "%-30s %4.1f ms\n", timers[i].name.c_str(), time_elapsed / 1e6);
+               }
+       }
+}
diff --git a/futatabi/gpu_timers.h b/futatabi/gpu_timers.h
new file mode 100644 (file)
index 0000000..a8c626e
--- /dev/null
@@ -0,0 +1,63 @@
+#ifndef _GPU_TIMERS_H
+#define _GPU_TIMERS_H 1
+
+#include <epoxy/gl.h>
+#include <string>
+#include <utility>
+#include <vector>
+
+extern bool enable_timing;
+extern bool detailed_timing;
+extern bool in_warmup;
+
+class GPUTimers {
+public:
+       void print();
+       std::pair<GLuint, GLuint> begin_timer(const std::string &name, int level);
+
+private:
+       struct Timer {
+               std::string name;
+               int level;
+               std::pair<GLuint, GLuint> query;
+       };
+       std::vector<Timer> timers;
+};
+
+// A simple RAII class for timing until the end of the scope.
+class ScopedTimer {
+public:
+       ScopedTimer(const std::string &name, GPUTimers *timers)
+               : timers(timers), level(0)
+       {
+               query = timers->begin_timer(name, level);
+       }
+
+       ScopedTimer(const std::string &name, ScopedTimer *parent_timer)
+               : timers(parent_timer->timers),
+                 level(parent_timer->level + 1)
+       {
+               query = timers->begin_timer(name, level);
+       }
+
+       ~ScopedTimer()
+       {
+               end();
+       }
+
+       void end()
+       {
+               if (enable_timing && !ended) {
+                       glQueryCounter(query.second, GL_TIMESTAMP);
+                       ended = true;
+               }
+       }
+
+private:
+       GPUTimers *timers;
+       int level;
+       std::pair<GLuint, GLuint> query;
+       bool ended = false;
+};
+
+#endif  // !defined(_GPU_TIMERS_H)
diff --git a/futatabi/gray.frag b/futatabi/gray.frag
new file mode 100644 (file)
index 0000000..57a6891
--- /dev/null
@@ -0,0 +1,13 @@
+#version 450 core
+
+in vec3 tc;
+out vec4 gray;
+
+uniform sampler2DArray tex;
+
+void main()
+{
+       vec4 color = texture(tex, tc);
+       gray.rgb = vec3(dot(color.rgb, vec3(0.2126f, 0.7152f, 0.0722f)));  // Rec. 709.
+       gray.a = color.a;
+}
diff --git a/futatabi/hole_blend.frag b/futatabi/hole_blend.frag
new file mode 100644 (file)
index 0000000..d7b217f
--- /dev/null
@@ -0,0 +1,48 @@
+#version 450 core
+
+in vec2 tc;
+out vec2 out_flow;
+
+uniform sampler2D left_tex, right_tex, up_tex, down_tex;
+
+void main()
+{
+       // Some of these may contain “junk”, in the sense that they were
+       // not written in the given pass, if they came from an edge.
+       // Most of the time, this is benign, since it means we'll get
+       // the previous value (left/right/up) again. However, if it were
+       // bogus on the very first pass, we need to exclude it.
+       // Thus the test for 100.0f (invalid flows are initialized to 1000,
+       // all valid ones are less than 1).
+       vec2 left = texture(left_tex, tc).xy;
+       vec2 right = texture(right_tex, tc).xy;
+       vec2 up = texture(up_tex, tc).xy;
+       vec2 down = texture(down_tex, tc).xy;
+
+       vec2 sum = vec2(0.0f);
+       float num = 0.0f;
+       if (left.x < 100.0f) {
+               sum = left;
+               num = 1.0f;
+       }
+       if (right.x < 100.0f) {
+               sum += right;
+               num += 1.0f;
+       }
+       if (up.x < 100.0f) {
+               sum += up;
+               num += 1.0f;
+       }
+       if (down.x < 100.0f) {
+               sum += down;
+               num += 1.0f;
+       }
+
+       // If _all_ of them were 0, this would mean the entire row _and_ column
+       // would be devoid of flow. If so, the zero flow is fine for our purposes.
+       if (num == 0.0f) {
+               out_flow = vec2(0.0f);
+       } else {
+               out_flow = sum / num;
+       }
+}
diff --git a/futatabi/hole_fill.frag b/futatabi/hole_fill.frag
new file mode 100644 (file)
index 0000000..bec50d8
--- /dev/null
@@ -0,0 +1,16 @@
+#version 450 core
+
+in vec2 tc;
+out vec2 out_flow;
+
+uniform sampler2D tex;
+
+void main()
+{
+       vec2 flow = texture(tex, tc).xy;
+       if (flow.x > 100.0f) {
+               // Don't copy unset flows around.
+               discard;
+       }
+       out_flow = flow;
+}
diff --git a/futatabi/hole_fill.vert b/futatabi/hole_fill.vert
new file mode 100644 (file)
index 0000000..97098b6
--- /dev/null
@@ -0,0 +1,24 @@
+#version 450 core
+
+layout(location=0) in vec2 position;
+out vec2 tc;
+
+uniform float z;
+uniform vec2 sample_offset;
+
+void main()
+{
+       // Moving the position is equivalent to moving the texture coordinate,
+       // but cheaper -- as it means some of the fullscreen quad can be clipped away.
+       vec2 adjusted_pos = position - sample_offset;
+
+       // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+       //
+       //   2.000  0.000  0.000 -1.000
+       //   0.000  2.000  0.000 -1.000
+       //   0.000  0.000 -2.000 -1.000
+       //   0.000  0.000  0.000  1.000
+       gl_Position = vec4(2.0 * adjusted_pos.x - 1.0, 2.0 * adjusted_pos.y - 1.0, 2.0f * (z - 0.5f), 1.0);
+
+       tc = position;
+}
diff --git a/futatabi/jpeg_destroyer.h b/futatabi/jpeg_destroyer.h
new file mode 100644 (file)
index 0000000..5fc5c95
--- /dev/null
@@ -0,0 +1,19 @@
+#ifndef _JPEG_DESTROYER_H
+#define _JPEG_DESTROYER_H 1
+
+#include <jpeglib.h>
+
+class JPEGDestroyer {
+public:
+       JPEGDestroyer(jpeg_decompress_struct *dinfo)
+               : dinfo(dinfo) {}
+
+       ~JPEGDestroyer() {
+               jpeg_destroy_decompress(dinfo);
+       }
+
+private:
+       jpeg_decompress_struct *dinfo;
+};
+
+#endif  // !defined(_JPEG_DESTROYER_H)
diff --git a/futatabi/jpeg_frame.h b/futatabi/jpeg_frame.h
new file mode 100644 (file)
index 0000000..eb73e13
--- /dev/null
@@ -0,0 +1,16 @@
+#ifndef _JPEG_FRAME_H
+#define _JPEG_FRAME_H 1
+
+#include <memory>
+
+struct Frame {
+       bool is_semiplanar = false;
+       std::unique_ptr<uint8_t[]> y;
+       std::unique_ptr<uint8_t[]> cb, cr; // For planar.
+       std::unique_ptr<uint8_t[]> cbcr;  // For semiplanar.
+       unsigned width, height;
+       unsigned chroma_subsampling_x, chroma_subsampling_y;
+       unsigned pitch_y, pitch_chroma;
+};
+
+#endif   // !defined(_JPEG_FRAME_H)
diff --git a/futatabi/jpeg_frame_view.cpp b/futatabi/jpeg_frame_view.cpp
new file mode 100644 (file)
index 0000000..9dc2ec2
--- /dev/null
@@ -0,0 +1,455 @@
+#include "jpeg_frame_view.h"
+
+#include "defs.h"
+#include "jpeg_destroyer.h"
+#include "shared/post_to_main_thread.h"
+#include "video_stream.h"
+#include "ycbcr_converter.h"
+
+#include <QMouseEvent>
+#include <QScreen>
+#include <atomic>
+#include <condition_variable>
+#include <deque>
+#include <jpeglib.h>
+#include <movit/init.h>
+#include <movit/resource_pool.h>
+#include <movit/util.h>
+#include <mutex>
+#include <stdint.h>
+#include <thread>
+#include <unistd.h>
+#include <utility>
+
+// Must come after the Qt stuff.
+#include "vaapi_jpeg_decoder.h"
+
+using namespace movit;
+using namespace std;
+
+namespace {
+
+// Just an arbitrary order for std::map.
+struct FrameOnDiskLexicalOrder
+{
+       bool operator() (const FrameOnDisk &a, const FrameOnDisk &b) const
+       {
+               if (a.pts != b.pts)
+                       return a.pts < b.pts;
+               if (a.offset != b.offset)
+                       return a.offset < b.offset;
+               if (a.filename_idx != b.filename_idx)
+                       return a.filename_idx < b.filename_idx;
+               assert(a.size == b.size);
+               return false;
+       }
+};
+
+inline size_t frame_size(const Frame &frame)
+{
+       size_t y_size = frame.width * frame.height;
+       size_t cbcr_size = y_size / frame.chroma_subsampling_x / frame.chroma_subsampling_y;
+       return y_size + cbcr_size * 2;
+}
+
+struct LRUFrame {
+       shared_ptr<Frame> frame;
+       size_t last_used;
+};
+
+struct PendingDecode {
+       JPEGFrameView *destination;
+
+       // For actual decodes (only if frame below is nullptr).
+       FrameOnDisk primary, secondary;
+       float fade_alpha;  // Irrelevant if secondary.stream_idx == -1.
+
+       // Already-decoded frames are also sent through PendingDecode,
+       // so that they get drawn in the right order. If frame is nullptr,
+       // it's a real decode.
+       shared_ptr<Frame> frame;
+};
+
+}  // namespace
+
+thread JPEGFrameView::jpeg_decoder_thread;
+mutex cache_mu;
+map<FrameOnDisk, LRUFrame, FrameOnDiskLexicalOrder> cache;  // Under cache_mu.
+size_t cache_bytes_used = 0;  // Under cache_mu.
+condition_variable any_pending_decodes;
+deque<PendingDecode> pending_decodes;  // Under cache_mu.
+atomic<size_t> event_counter{0};
+extern QGLWidget *global_share_widget;
+extern atomic<bool> should_quit;
+
+shared_ptr<Frame> decode_jpeg(const string &jpeg)
+{
+       shared_ptr<Frame> frame;
+       if (vaapi_jpeg_decoding_usable) {
+               frame = decode_jpeg_vaapi(jpeg);
+               if (frame != nullptr) {
+                       return frame;
+               }
+               fprintf(stderr, "VA-API hardware decoding failed; falling back to software.\n");
+       }
+
+       frame.reset(new Frame);
+
+       jpeg_decompress_struct dinfo;
+       jpeg_error_mgr jerr;
+       dinfo.err = jpeg_std_error(&jerr);
+       jpeg_create_decompress(&dinfo);
+       JPEGDestroyer destroy_dinfo(&dinfo);
+
+       jpeg_mem_src(&dinfo, reinterpret_cast<const unsigned char *>(jpeg.data()), jpeg.size());
+       jpeg_read_header(&dinfo, true);
+
+       if (dinfo.num_components != 3) {
+               fprintf(stderr, "Not a color JPEG. (%d components, Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
+                       dinfo.num_components,
+                       dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
+                       dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
+                       dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
+               exit(1);
+       }
+       if (dinfo.comp_info[0].h_samp_factor != dinfo.max_h_samp_factor ||
+           dinfo.comp_info[0].v_samp_factor != dinfo.max_v_samp_factor ||  // Y' must not be subsampled.
+           dinfo.comp_info[1].h_samp_factor != dinfo.comp_info[2].h_samp_factor ||
+           dinfo.comp_info[1].v_samp_factor != dinfo.comp_info[2].v_samp_factor ||  // Cb and Cr must be identically subsampled.
+           (dinfo.max_h_samp_factor % dinfo.comp_info[1].h_samp_factor) != 0 ||
+           (dinfo.max_v_samp_factor % dinfo.comp_info[1].v_samp_factor) != 0) {  // No 2:3 subsampling or other weirdness.
+               fprintf(stderr, "Unsupported subsampling scheme. (Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
+                       dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
+                       dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
+                       dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
+               exit(1);
+       }
+       dinfo.raw_data_out = true;
+
+       jpeg_start_decompress(&dinfo);
+
+       frame->width = dinfo.output_width;
+       frame->height = dinfo.output_height;
+       frame->chroma_subsampling_x = dinfo.max_h_samp_factor / dinfo.comp_info[1].h_samp_factor;
+       frame->chroma_subsampling_y = dinfo.max_v_samp_factor / dinfo.comp_info[1].v_samp_factor;
+
+       unsigned h_mcu_size = DCTSIZE * dinfo.max_h_samp_factor;
+       unsigned v_mcu_size = DCTSIZE * dinfo.max_v_samp_factor;
+       unsigned mcu_width_blocks = (dinfo.output_width + h_mcu_size - 1) / h_mcu_size;
+       unsigned mcu_height_blocks = (dinfo.output_height + v_mcu_size - 1) / v_mcu_size;
+
+       unsigned luma_width_blocks = mcu_width_blocks * dinfo.comp_info[0].h_samp_factor;
+       unsigned chroma_width_blocks = mcu_width_blocks * dinfo.comp_info[1].h_samp_factor;
+       unsigned luma_height_blocks = mcu_height_blocks * dinfo.comp_info[0].v_samp_factor;
+       unsigned chroma_height_blocks = mcu_height_blocks * dinfo.comp_info[1].v_samp_factor;
+
+       // TODO: Decode into a PBO.
+       frame->y.reset(new uint8_t[luma_width_blocks * luma_height_blocks * DCTSIZE2]);
+       frame->cb.reset(new uint8_t[chroma_width_blocks * chroma_height_blocks * DCTSIZE2]);
+       frame->cr.reset(new uint8_t[chroma_width_blocks * chroma_height_blocks * DCTSIZE2]);
+       frame->pitch_y = luma_width_blocks * DCTSIZE;
+       frame->pitch_chroma = chroma_width_blocks * DCTSIZE;
+
+       JSAMPROW yptr[v_mcu_size], cbptr[v_mcu_size], crptr[v_mcu_size];
+       JSAMPARRAY data[3] = { yptr, cbptr, crptr };
+       for (unsigned y = 0; y < mcu_height_blocks; ++y) {
+               // NOTE: The last elements of cbptr/crptr will be unused for vertically subsampled chroma.
+               for (unsigned yy = 0; yy < v_mcu_size; ++yy) {
+                       yptr[yy] = frame->y.get() + (y * DCTSIZE * dinfo.max_v_samp_factor + yy) * frame->pitch_y;
+                       cbptr[yy] = frame->cb.get() + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * frame->pitch_chroma;
+                       crptr[yy] = frame->cr.get() + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * frame->pitch_chroma;
+               }
+
+               jpeg_read_raw_data(&dinfo, data, v_mcu_size);
+       }
+
+       (void)jpeg_finish_decompress(&dinfo);
+
+       return frame;
+}
+
+void prune_cache()
+{
+       // Assumes cache_mu is held.
+       int64_t bytes_still_to_remove = cache_bytes_used - (size_t(CACHE_SIZE_MB) * 1024 * 1024) * 9 / 10;
+       if (bytes_still_to_remove <= 0) return;
+
+       vector<pair<size_t, size_t>> lru_timestamps_and_size;
+       for (const auto &key_and_value : cache) {
+               lru_timestamps_and_size.emplace_back(
+                       key_and_value.second.last_used,
+                       frame_size(*key_and_value.second.frame));
+       }
+       sort(lru_timestamps_and_size.begin(), lru_timestamps_and_size.end());
+
+       // Remove the oldest ones until we are below 90% of the cache used.
+       size_t lru_cutoff_point = 0;
+       for (const pair<size_t, size_t> &it : lru_timestamps_and_size) {
+               lru_cutoff_point = it.first;
+               bytes_still_to_remove -= it.second;
+               if (bytes_still_to_remove <= 0) break;
+       }
+
+       for (auto it = cache.begin(); it != cache.end(); ) {
+               if (it->second.last_used <= lru_cutoff_point) {
+                       cache_bytes_used -= frame_size(*it->second.frame);
+                       it = cache.erase(it);
+               } else {
+                       ++it;
+               }
+       }
+}
+
+shared_ptr<Frame> decode_jpeg_with_cache(FrameOnDisk frame_spec, CacheMissBehavior cache_miss_behavior, FrameReader *frame_reader, bool *did_decode)
+{
+       *did_decode = false;
+       {
+               unique_lock<mutex> lock(cache_mu);
+               auto it = cache.find(frame_spec);
+               if (it != cache.end()) {
+                       it->second.last_used = event_counter++;
+                       return it->second.frame;
+               }
+       }
+
+       if (cache_miss_behavior == RETURN_NULLPTR_IF_NOT_IN_CACHE) {
+               return nullptr;
+       }
+
+       *did_decode = true;
+       shared_ptr<Frame> frame = decode_jpeg(frame_reader->read_frame(frame_spec));
+
+       unique_lock<mutex> lock(cache_mu);
+       cache_bytes_used += frame_size(*frame);
+       cache[frame_spec] = LRUFrame{ frame, event_counter++ };
+
+       if (cache_bytes_used > size_t(CACHE_SIZE_MB) * 1024 * 1024) {
+               prune_cache();
+       }
+       return frame;
+}
+
+void JPEGFrameView::jpeg_decoder_thread_func()
+{
+       size_t num_decoded = 0, num_dropped = 0;
+
+       pthread_setname_np(pthread_self(), "JPEGDecoder");
+       while (!should_quit.load()) {
+               PendingDecode decode;
+               CacheMissBehavior cache_miss_behavior = DECODE_IF_NOT_IN_CACHE;
+               {
+                       unique_lock<mutex> lock(cache_mu);  // TODO: Perhaps under another lock?
+                       any_pending_decodes.wait(lock, [] {
+                               return !pending_decodes.empty() || should_quit.load();
+                       });
+                       if (should_quit.load())
+                               break;
+                       decode = pending_decodes.front();
+                       pending_decodes.pop_front();
+
+                       size_t num_pending = 0;
+                       for (const PendingDecode &other_decode : pending_decodes) {
+                               if (other_decode.destination == decode.destination) {
+                                       ++num_pending;
+                               }
+                       }
+                       if (num_pending > 3) {
+                               cache_miss_behavior = RETURN_NULLPTR_IF_NOT_IN_CACHE;
+                       }
+               }
+
+               if (decode.frame != nullptr) {
+                       // Already decoded, so just show it.
+                       decode.destination->setDecodedFrame(decode.frame, nullptr, 1.0f);
+                       continue;
+               }
+
+               shared_ptr<Frame> primary_frame, secondary_frame;
+               bool drop = false;
+               for (int subframe_idx = 0; subframe_idx < 2; ++subframe_idx) {
+                       const FrameOnDisk &frame_spec = (subframe_idx == 0 ? decode.primary : decode.secondary);
+                       if (frame_spec.pts == -1) {
+                               // No secondary frame.
+                               continue;
+                       }
+
+                       bool found_in_cache;
+                       shared_ptr<Frame> frame = decode_jpeg_with_cache(frame_spec, cache_miss_behavior, &decode.destination->frame_reader, &found_in_cache);
+
+                       if (frame == nullptr) {
+                               assert(cache_miss_behavior == RETURN_NULLPTR_IF_NOT_IN_CACHE);
+                               drop = true;
+                               break;
+                       }
+
+                       if (!found_in_cache) {
+                               ++num_decoded;
+                               if (num_decoded % 1000 == 0) {
+                                       fprintf(stderr, "Decoded %zu images, dropped %zu (%.2f%% dropped)\n",
+                                               num_decoded, num_dropped, (100.0 * num_dropped) / (num_decoded + num_dropped));
+                               }
+                       }
+                       if (subframe_idx == 0) {
+                               primary_frame = std::move(frame);
+                       } else {
+                               secondary_frame = std::move(frame);
+                       }
+               }
+               if (drop) {
+                       ++num_dropped;
+                       continue;
+               }
+
+               // TODO: Could we get jitter between non-interpolated and interpolated frames here?
+               decode.destination->setDecodedFrame(primary_frame, secondary_frame, decode.fade_alpha);
+       }
+}
+
+void JPEGFrameView::shutdown()
+{
+       any_pending_decodes.notify_all();
+       jpeg_decoder_thread.join();
+}
+
+JPEGFrameView::JPEGFrameView(QWidget *parent)
+       : QGLWidget(parent, global_share_widget)
+{
+}
+
+void JPEGFrameView::setFrame(unsigned stream_idx, FrameOnDisk frame, FrameOnDisk secondary_frame, float fade_alpha)
+{
+       current_stream_idx = stream_idx;  // TODO: Does this interact with fades?
+
+       unique_lock<mutex> lock(cache_mu);
+       PendingDecode decode;
+       decode.primary = frame;
+       decode.secondary = secondary_frame;
+       decode.fade_alpha = fade_alpha;
+       decode.destination = this;
+       pending_decodes.push_back(decode);
+       any_pending_decodes.notify_all();
+}
+
+void JPEGFrameView::setFrame(shared_ptr<Frame> frame)
+{
+       unique_lock<mutex> lock(cache_mu);
+       PendingDecode decode;
+       decode.frame = std::move(frame);
+       decode.destination = this;
+       pending_decodes.push_back(decode);
+       any_pending_decodes.notify_all();
+}
+
+ResourcePool *resource_pool = nullptr;
+
+void JPEGFrameView::initializeGL()
+{
+       glDisable(GL_BLEND);
+       glDisable(GL_DEPTH_TEST);
+       check_error();
+
+       static once_flag once;
+       call_once(once, [] {
+               resource_pool = new ResourcePool;
+               jpeg_decoder_thread = std::thread(jpeg_decoder_thread_func);
+       });
+
+       ycbcr_converter.reset(new YCbCrConverter(YCbCrConverter::OUTPUT_TO_RGBA, resource_pool));
+
+       ImageFormat inout_format;
+       inout_format.color_space = COLORSPACE_sRGB;
+       inout_format.gamma_curve = GAMMA_sRGB;
+
+       overlay_chain.reset(new EffectChain(overlay_base_width, overlay_base_height, resource_pool));
+       overlay_input = (movit::FlatInput *)overlay_chain->add_input(new FlatInput(inout_format, FORMAT_GRAYSCALE, GL_UNSIGNED_BYTE, overlay_base_width, overlay_base_height));
+
+       overlay_chain->add_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED);
+       overlay_chain->finalize();
+}
+
+void JPEGFrameView::resizeGL(int width, int height)
+{
+       check_error();
+       glViewport(0, 0, width, height);
+       check_error();
+
+       // Save these, as width() and height() will lie with DPI scaling.
+       gl_width = width;
+       gl_height = height;
+}
+
+void JPEGFrameView::paintGL()
+{
+       glViewport(0, 0, gl_width, gl_height);
+       if (current_frame == nullptr) {
+               glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
+               glClear(GL_COLOR_BUFFER_BIT);
+               return;
+       }
+
+       check_error();
+       current_chain->render_to_screen();
+
+       if (overlay_image != nullptr) {
+               if (overlay_input_needs_refresh) {
+                       overlay_input->set_width(overlay_width);
+                       overlay_input->set_height(overlay_height);
+                       overlay_input->set_pixel_data(overlay_image->bits());
+               }
+               glViewport(gl_width - overlay_width, 0, overlay_width, overlay_height);
+               overlay_chain->render_to_screen();
+       }
+}
+
+namespace {
+
+}  // namespace
+
+void JPEGFrameView::setDecodedFrame(shared_ptr<Frame> frame, shared_ptr<Frame> secondary_frame, float fade_alpha)
+{
+       post_to_main_thread([this, frame, secondary_frame, fade_alpha] {
+               current_frame = frame;
+               current_secondary_frame = secondary_frame;
+
+               if (secondary_frame != nullptr) {
+                       current_chain = ycbcr_converter->prepare_chain_for_fade(frame, secondary_frame, fade_alpha);
+               } else {
+                       current_chain = ycbcr_converter->prepare_chain_for_conversion(frame);
+               }
+               update();
+       });
+}
+
+void JPEGFrameView::mousePressEvent(QMouseEvent *event)
+{
+       if (event->type() == QEvent::MouseButtonPress && event->button() == Qt::LeftButton) {
+               emit clicked();
+       }
+}
+
+void JPEGFrameView::set_overlay(const string &text)
+{
+       if (text.empty()) {
+               overlay_image.reset();
+               return;
+       }
+
+       float dpr = QGuiApplication::primaryScreen()->devicePixelRatio();
+       overlay_width = lrint(overlay_base_width * dpr);
+       overlay_height = lrint(overlay_base_height * dpr);
+
+       overlay_image.reset(new QImage(overlay_width, overlay_height, QImage::Format_Grayscale8));
+       overlay_image->setDevicePixelRatio(dpr);
+       overlay_image->fill(0);
+       QPainter painter(overlay_image.get());
+
+       painter.setPen(Qt::white);
+       QFont font = painter.font();
+       font.setPointSize(12);
+       painter.setFont(font);
+
+       painter.drawText(QRectF(0, 0, overlay_base_width, overlay_base_height), Qt::AlignCenter, QString::fromStdString(text));
+
+       // Don't refresh immediately; we might not have an OpenGL context here.
+       overlay_input_needs_refresh = true;
+}
diff --git a/futatabi/jpeg_frame_view.h b/futatabi/jpeg_frame_view.h
new file mode 100644 (file)
index 0000000..3ecfa0d
--- /dev/null
@@ -0,0 +1,78 @@
+#ifndef _JPEG_FRAME_VIEW_H
+#define _JPEG_FRAME_VIEW_H 1
+
+#include "frame_on_disk.h"
+#include "jpeg_frame.h"
+#include "ycbcr_converter.h"
+
+#include <QGLWidget>
+#include <epoxy/gl.h>
+#include <memory>
+#include <movit/effect_chain.h>
+#include <movit/flat_input.h>
+#include <movit/mix_effect.h>
+#include <movit/ycbcr_input.h>
+#include <stdint.h>
+#include <thread>
+
+enum CacheMissBehavior {
+       DECODE_IF_NOT_IN_CACHE,
+       RETURN_NULLPTR_IF_NOT_IN_CACHE
+};
+
+std::shared_ptr<Frame> decode_jpeg(const std::string &jpeg);
+std::shared_ptr<Frame> decode_jpeg_with_cache(FrameOnDisk id, CacheMissBehavior cache_miss_behavior, FrameReader *frame_reader, bool *did_decode);
+
+class JPEGFrameView : public QGLWidget {
+       Q_OBJECT
+
+public:
+       JPEGFrameView(QWidget *parent);
+
+       void setFrame(unsigned stream_idx, FrameOnDisk frame, FrameOnDisk secondary_frame = {}, float fade_alpha = 0.0f);
+       void setFrame(std::shared_ptr<Frame> frame);
+
+       void mousePressEvent(QMouseEvent *event) override;
+
+       unsigned get_stream_idx() const { return current_stream_idx; }
+
+       void setDecodedFrame(std::shared_ptr<Frame> frame, std::shared_ptr<Frame> secondary_frame, float fade_alpha);
+       void set_overlay(const std::string &text);  // Blank for none.
+
+       static void shutdown();
+
+signals:
+       void clicked();
+
+protected:
+       void initializeGL() override;
+       void resizeGL(int width, int height) override;
+       void paintGL() override;
+
+private:
+       static void jpeg_decoder_thread_func();
+
+       FrameReader frame_reader;
+
+       // The stream index of the latest frame we displayed.
+       unsigned current_stream_idx = 0;
+
+       std::unique_ptr<YCbCrConverter> ycbcr_converter;
+       movit::EffectChain *current_chain = nullptr;  // Owned by ycbcr_converter.
+
+       std::shared_ptr<Frame> current_frame;  // So that we hold on to the pixels.
+       std::shared_ptr<Frame> current_secondary_frame;  // Same.
+
+       static constexpr int overlay_base_width = 16, overlay_base_height = 16;
+       int overlay_width = overlay_base_width, overlay_height = overlay_base_height;
+       std::unique_ptr<QImage> overlay_image;  // If nullptr, no overlay.
+       std::unique_ptr<movit::EffectChain> overlay_chain;  // Just to get the overlay on screen in the easiest way possible.
+       movit::FlatInput *overlay_input;
+       bool overlay_input_needs_refresh = false;
+
+       int gl_width, gl_height;
+
+       static std::thread jpeg_decoder_thread;
+};
+
+#endif  // !defined(_JPEG_FRAME_VIEW_H)
diff --git a/futatabi/main.cpp b/futatabi/main.cpp
new file mode 100644 (file)
index 0000000..f6d2ab1
--- /dev/null
@@ -0,0 +1,502 @@
+#include <assert.h>
+#include <arpa/inet.h>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <dirent.h>
+#include <getopt.h>
+#include <memory>
+#include <mutex>
+#include <stdint.h>
+#include <stdio.h>
+#include <string>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <thread>
+#include <vector>
+
+extern "C" {
+#include <libavformat/avformat.h>
+}
+
+#include "clip_list.h"
+#include "shared/context.h"
+#include "defs.h"
+#include "shared/disk_space_estimator.h"
+#include "shared/ffmpeg_raii.h"
+#include "flags.h"
+#include "frame_on_disk.h"
+#include "frame.pb.h"
+#include "shared/httpd.h"
+#include "mainwindow.h"
+#include "player.h"
+#include "shared/post_to_main_thread.h"
+#include "shared/ref_counted_gl_sync.h"
+#include "shared/timebase.h"
+#include "ui_mainwindow.h"
+#include "vaapi_jpeg_decoder.h"
+
+#include <QApplication>
+#include <QGLFormat>
+#include <QSurfaceFormat>
+#include <QProgressDialog>
+#include <movit/init.h>
+#include <movit/util.h>
+
+using namespace std;
+using namespace std::chrono;
+
+constexpr char frame_magic[] = "Ftbifrm0";
+constexpr size_t frame_magic_len = 8;
+
+mutex RefCountedGLsync::fence_lock;
+atomic<bool> should_quit{false};
+
+int64_t start_pts = -1;
+
+// TODO: Replace by some sort of GUI control, I guess.
+int64_t current_pts = 0;
+
+struct FrameFile {
+       FILE *fp = nullptr;
+       unsigned filename_idx;
+       size_t frames_written_so_far = 0;
+};
+std::map<int, FrameFile> open_frame_files;
+
+mutex frame_mu;
+vector<FrameOnDisk> frames[MAX_STREAMS];  // Under frame_mu.
+vector<string> frame_filenames;  // Under frame_mu.
+
+namespace {
+
+FrameOnDisk write_frame(int stream_idx, int64_t pts, const uint8_t *data, size_t size, DB *db)
+{
+       if (open_frame_files.count(stream_idx) == 0) {
+               char filename[256];
+               snprintf(filename, sizeof(filename), "%s/frames/cam%d-pts%09ld.frames",
+                       global_flags.working_directory.c_str(), stream_idx, pts);
+               FILE *fp = fopen(filename, "wb");
+               if (fp == nullptr) {
+                       perror(filename);
+                       exit(1);
+               }
+
+               lock_guard<mutex> lock(frame_mu);
+               unsigned filename_idx = frame_filenames.size();
+               frame_filenames.push_back(filename);
+               open_frame_files[stream_idx] = FrameFile{ fp, filename_idx, 0 };
+       }
+
+       FrameFile &file = open_frame_files[stream_idx];
+       unsigned filename_idx = file.filename_idx;
+       string filename;
+       {
+               lock_guard<mutex> lock(frame_mu);
+               filename = frame_filenames[filename_idx];
+       }
+
+       FrameHeaderProto hdr;
+       hdr.set_stream_idx(stream_idx);
+       hdr.set_pts(pts);
+       hdr.set_file_size(size);
+
+       string serialized;
+       if (!hdr.SerializeToString(&serialized)) {
+               fprintf(stderr, "Frame header serialization failed.\n");
+               exit(1);
+       }
+       uint32_t len = htonl(serialized.size());
+
+       if (fwrite(frame_magic, frame_magic_len, 1, file.fp) != 1) {
+               perror("fwrite");
+               exit(1);
+       }
+       if (fwrite(&len, sizeof(len), 1, file.fp) != 1) {
+               perror("fwrite");
+               exit(1);
+       }
+       if (fwrite(serialized.data(), serialized.size(), 1, file.fp) != 1) {
+               perror("fwrite");
+               exit(1);
+       }
+       off_t offset = ftell(file.fp);
+       if (fwrite(data, size, 1, file.fp) != 1) {
+               perror("fwrite");
+               exit(1);
+       }
+       fflush(file.fp);  // No fsync(), though. We can accept losing a few frames.
+       global_disk_space_estimator->report_write(filename, 8 + sizeof(len) + serialized.size() + size, pts);
+
+       FrameOnDisk frame;
+       frame.pts = pts;
+       frame.filename_idx = filename_idx;
+       frame.offset = offset;
+       frame.size = size;
+
+       {
+               lock_guard<mutex> lock(frame_mu);
+               assert(stream_idx < MAX_STREAMS);
+               frames[stream_idx].push_back(frame);
+       }
+
+       if (++file.frames_written_so_far >= 1000) {
+               size_t size = ftell(file.fp);
+
+               // Start a new file next time.
+               if (fclose(file.fp) != 0) {
+                       perror("fclose");
+                       exit(1);
+               }
+               open_frame_files.erase(stream_idx);
+
+               // Write information about all frames in the finished file to SQLite.
+               // (If we crash before getting to do this, we'll be scanning through
+               // the file on next startup, and adding it to the database then.)
+               // NOTE: Since we don't fsync(), we could in theory get broken data
+               // but with the right size, but it would seem unlikely.
+               vector<DB::FrameOnDiskAndStreamIdx> frames_this_file;
+               {
+                       lock_guard<mutex> lock(frame_mu);
+                       for (size_t stream_idx = 0; stream_idx < MAX_STREAMS; ++stream_idx) {
+                               for (const FrameOnDisk &frame : frames[stream_idx]) {
+                                       if (frame.filename_idx == filename_idx) {
+                                               frames_this_file.emplace_back(DB::FrameOnDiskAndStreamIdx{ frame, unsigned(stream_idx) });
+                                       }
+                               }
+                       }
+               }
+
+               const char *basename = filename.c_str();
+               while (strchr(basename, '/') != nullptr) {
+                       basename = strchr(basename, '/');
+               }
+               db->store_frame_file(basename, size, frames_this_file);
+       }
+
+       return frame;
+}
+
+} // namespace
+
+HTTPD *global_httpd;
+
+void load_existing_frames();
+int record_thread_func();
+
+int main(int argc, char **argv)
+{
+       parse_flags(argc, argv);
+       if (optind == argc) {
+               global_flags.stream_source = "multiangle.mp4";
+               global_flags.slow_down_input = true;
+       } else if (optind + 1 == argc) {
+               global_flags.stream_source = argv[optind];
+       } else {
+               usage();
+               exit(1);
+       }
+
+       string frame_dir = global_flags.working_directory + "/frames";
+
+       struct stat st;
+       if (stat(frame_dir.c_str(), &st) == -1) {
+               fprintf(stderr, "%s does not exist, creating it.\n", frame_dir.c_str());
+               if (mkdir(frame_dir.c_str(), 0777) == -1) {
+                       perror(global_flags.working_directory.c_str());
+                       exit(1);
+               }
+       }
+
+       avformat_network_init();
+       global_httpd = new HTTPD;
+
+       QCoreApplication::setAttribute(Qt::AA_ShareOpenGLContexts, true);
+
+       QSurfaceFormat fmt;
+       fmt.setDepthBufferSize(0);
+       fmt.setStencilBufferSize(0);
+       fmt.setProfile(QSurfaceFormat::CoreProfile);
+       fmt.setMajorVersion(4);
+       fmt.setMinorVersion(5);
+
+       // Turn off vsync, since Qt generally gives us at most frame rate
+       // (display frequency) / (number of QGLWidgets active).
+       fmt.setSwapInterval(0);
+
+       QSurfaceFormat::setDefaultFormat(fmt);
+
+       QGLFormat::setDefaultFormat(QGLFormat::fromSurfaceFormat(fmt));
+
+       QApplication app(argc, argv);
+       global_share_widget = new QGLWidget();
+       if (!global_share_widget->isValid()) {
+               fprintf(stderr, "Failed to initialize OpenGL. Futatabi needs at least OpenGL 4.5 to function properly.\n");
+               exit(1);
+       }
+
+       // Initialize Movit.
+       {
+               QSurface *surface = create_surface();
+               QOpenGLContext *context = create_context(surface);
+               make_current(context, surface);
+               CHECK(movit::init_movit(MOVIT_SHADER_DIR, movit::MOVIT_DEBUG_OFF));
+               delete_context(context);
+               // TODO: Delete the surface, too.
+       }
+
+       load_existing_frames();
+
+       MainWindow main_window;
+       main_window.show();
+
+       global_httpd->add_endpoint("/queue_status", bind(&MainWindow::get_queue_status, &main_window), HTTPD::NO_CORS_POLICY);
+       global_httpd->start(global_flags.http_port);
+
+       init_jpeg_vaapi();
+
+       thread record_thread(record_thread_func);
+
+       int ret = app.exec();
+
+       should_quit = true;
+       record_thread.join();
+       JPEGFrameView::shutdown();
+
+       return ret;
+}
+
+void load_frame_file(const char *filename, const string &basename, unsigned filename_idx, DB *db)
+{
+       struct stat st;
+       if (stat(filename, &st) == -1) {
+               perror(filename);
+               exit(1);
+       }
+
+       vector<DB::FrameOnDiskAndStreamIdx> all_frames = db->load_frame_file(basename, st.st_size, filename_idx);
+       if (!all_frames.empty()) {
+               // We already had this cached in the database, so no need to look in the file.
+               for (const DB::FrameOnDiskAndStreamIdx &frame : all_frames) {
+                       if (frame.stream_idx >= 0 && frame.stream_idx < MAX_STREAMS) {
+                               frames[frame.stream_idx].push_back(frame.frame);
+                               start_pts = max(start_pts, frame.frame.pts);
+                       }
+               }
+               return;
+       }
+
+       FILE *fp = fopen(filename, "rb");
+       if (fp == nullptr) {
+               perror(filename);
+               exit(1);
+       }
+
+       size_t magic_offset = 0;
+       size_t skipped_bytes = 0;
+       while (!feof(fp) && !ferror(fp)) {
+               int ch = getc(fp);
+               if (ch == -1) {
+                       break;
+               }
+               if (ch != frame_magic[magic_offset++]) {
+                       skipped_bytes += magic_offset;
+                       magic_offset = 0;
+                       continue;
+               }
+               if (magic_offset < frame_magic_len) {
+                       // Still reading the magic (hopefully).
+                       continue;
+               }
+
+               // OK, found the magic. Try to parse the frame header.
+               magic_offset = 0;
+
+               if (skipped_bytes > 0)  {
+                       fprintf(stderr, "WARNING: %s: Skipped %zu garbage bytes in the middle.\n",
+                               filename, skipped_bytes);
+                       skipped_bytes = 0;
+               }
+
+               uint32_t len;
+               if (fread(&len, sizeof(len), 1, fp) != 1) {
+                       fprintf(stderr, "WARNING: %s: Short read when getting length.\n", filename);
+                       break;
+               }
+
+               string serialized;
+               serialized.resize(ntohl(len));
+               if (fread(&serialized[0], serialized.size(), 1, fp) != 1) {
+                       fprintf(stderr, "WARNING: %s: Short read when reading frame header (%zu bytes).\n", filename, serialized.size());
+                       break;
+               }
+
+               FrameHeaderProto hdr;
+               if (!hdr.ParseFromString(serialized)) {
+                       fprintf(stderr, "WARNING: %s: Corrupted frame header.\n", filename);
+                       continue;
+               }
+
+               FrameOnDisk frame;
+               frame.pts = hdr.pts();
+               frame.offset = ftell(fp);
+               frame.filename_idx = filename_idx;
+               frame.size = hdr.file_size();
+
+               if (fseek(fp, frame.offset + frame.size, SEEK_SET) == -1) {
+                       fprintf(stderr, "WARNING: %s: Could not seek past frame (probably truncated).\n", filename);
+                       continue;
+               }
+
+               if (hdr.stream_idx() >= 0 && hdr.stream_idx() < MAX_STREAMS) {
+                       frames[hdr.stream_idx()].push_back(frame);
+                       start_pts = max(start_pts, hdr.pts());
+               }
+               all_frames.emplace_back(DB::FrameOnDiskAndStreamIdx{ frame, unsigned(hdr.stream_idx()) });
+       }
+
+       if (skipped_bytes > 0) {
+               fprintf(stderr, "WARNING: %s: Skipped %zu garbage bytes at the end.\n",
+                       filename, skipped_bytes);
+       }
+
+       size_t size = ftell(fp);
+       fclose(fp);
+
+       db->store_frame_file(basename, size, all_frames);
+}
+
+void load_existing_frames()
+{
+       QProgressDialog progress("Scanning frame directory...", "Abort", 0, 1);
+       progress.setWindowTitle("Futatabi");
+       progress.setWindowModality(Qt::WindowModal);
+       progress.setMinimumDuration(1000);
+       progress.setMaximum(1);
+       progress.setValue(0);
+
+       string frame_dir = global_flags.working_directory + "/frames";
+       DIR *dir = opendir(frame_dir.c_str());
+       if (dir == nullptr) {
+               perror("frames/");
+               start_pts = 0;
+               return;
+       }
+
+       vector<string> frame_basenames;
+       for ( ;; ) {
+               errno = 0;
+               dirent *de = readdir(dir);
+               if (de == nullptr) {
+                       if (errno != 0) {
+                               perror("readdir");
+                               exit(1);
+                       }
+                       break;
+               }
+
+               if (de->d_type == DT_REG || de->d_type == DT_LNK) {
+                       string filename = frame_dir + "/" + de->d_name;
+                       frame_filenames.push_back(filename);
+                       frame_basenames.push_back(de->d_name);
+               }
+
+               if (progress.wasCanceled()) {
+                       exit(1);
+               }
+       }
+       closedir(dir);
+
+       progress.setMaximum(frame_filenames.size() + 2);
+       progress.setValue(1);
+
+       progress.setLabelText("Opening database...");
+       DB db(global_flags.working_directory + "/futatabi.db");
+
+       progress.setLabelText("Reading frame files...");
+       progress.setValue(2);
+
+       for (size_t i = 0; i < frame_filenames.size(); ++i) {
+               load_frame_file(frame_filenames[i].c_str(), frame_basenames[i], i, &db);
+               progress.setValue(i + 3);
+               if (progress.wasCanceled()) {
+                       exit(1);
+               }
+       }
+
+       if (start_pts == -1) {
+               start_pts = 0;
+       } else {
+               // Add a gap of one second from the old frames to the new ones.
+               start_pts += TIMEBASE;
+       }
+
+       for (int stream_idx = 0; stream_idx < MAX_STREAMS; ++stream_idx) {
+               sort(frames[stream_idx].begin(), frames[stream_idx].end(),
+                       [](const auto &a, const auto &b) { return a.pts < b.pts; });
+       }
+
+       db.clean_unused_frame_files(frame_basenames);
+}
+
+int record_thread_func()
+{
+       auto format_ctx = avformat_open_input_unique(global_flags.stream_source.c_str(), nullptr, nullptr);
+       if (format_ctx == nullptr) {
+               fprintf(stderr, "%s: Error opening file\n", global_flags.stream_source.c_str());
+               return 1;
+       }
+
+       int64_t last_pts = -1;
+       int64_t pts_offset;
+       DB db(global_flags.working_directory + "/futatabi.db");
+
+       while (!should_quit.load()) {
+               AVPacket pkt;
+               unique_ptr<AVPacket, decltype(av_packet_unref)*> pkt_cleanup(
+                       &pkt, av_packet_unref);
+               av_init_packet(&pkt);
+               pkt.data = nullptr;
+               pkt.size = 0;
+
+               // TODO: Make it possible to abort av_read_frame() (use an interrupt callback);
+               // right now, should_quit will be ignored if it's hung on I/O.
+               if (av_read_frame(format_ctx.get(), &pkt) != 0) {
+                       break;
+               }
+
+               // Convert pts to our own timebase.
+               AVRational stream_timebase = format_ctx->streams[pkt.stream_index]->time_base;
+               int64_t pts = av_rescale_q(pkt.pts, stream_timebase, AVRational{ 1, TIMEBASE });
+
+               // Translate offset into our stream.
+               if (last_pts == -1) {
+                       pts_offset = start_pts - pts;
+               }
+               pts = std::max(pts + pts_offset, start_pts);
+
+               //fprintf(stderr, "Got a frame from camera %d, pts = %ld, size = %d\n",
+               //      pkt.stream_index, pts, pkt.size);
+               FrameOnDisk frame = write_frame(pkt.stream_index, pts, pkt.data, pkt.size, &db);
+
+               post_to_main_thread([pkt, frame] {
+                       if (pkt.stream_index == 0) {
+                               global_mainwindow->ui->input1_display->setFrame(pkt.stream_index, frame);
+                       } else if (pkt.stream_index == 1) {
+                               global_mainwindow->ui->input2_display->setFrame(pkt.stream_index, frame);
+                       } else if (pkt.stream_index == 2) {
+                               global_mainwindow->ui->input3_display->setFrame(pkt.stream_index, frame);
+                       } else if (pkt.stream_index == 3) {
+                               global_mainwindow->ui->input4_display->setFrame(pkt.stream_index, frame);
+                       }
+               });
+
+               if (last_pts != -1 && global_flags.slow_down_input) {
+                       this_thread::sleep_for(microseconds((pts - last_pts) * 1000000 / TIMEBASE));
+               }
+               last_pts = pts;
+               current_pts = pts;
+       }
+
+       return 0;
+}
diff --git a/futatabi/mainwindow.cpp b/futatabi/mainwindow.cpp
new file mode 100644 (file)
index 0000000..2a05a24
--- /dev/null
@@ -0,0 +1,773 @@
+#include "mainwindow.h"
+
+#include "clip_list.h"
+#include "shared/disk_space_estimator.h"
+#include "flags.h"
+#include "frame_on_disk.h"
+#include "player.h"
+#include "shared/post_to_main_thread.h"
+#include "shared/timebase.h"
+#include "ui_mainwindow.h"
+
+#include <QMouseEvent>
+#include <QShortcut>
+#include <QTimer>
+#include <QWheelEvent>
+#include <future>
+#include <sqlite3.h>
+#include <string>
+#include <vector>
+
+using namespace std;
+using namespace std::placeholders;
+
+MainWindow *global_mainwindow = nullptr;
+static ClipList *cliplist_clips;
+static PlayList *playlist_clips;
+
+extern int64_t current_pts;
+
+MainWindow::MainWindow()
+       : ui(new Ui::MainWindow),
+         db(global_flags.working_directory + "/futatabi.db")
+{
+       global_mainwindow = this;
+       ui->setupUi(this);
+
+       // The menus.
+       connect(ui->exit_action, &QAction::triggered, this, &MainWindow::exit_triggered);
+
+       global_disk_space_estimator = new DiskSpaceEstimator(bind(&MainWindow::report_disk_space, this, _1, _2));
+       disk_free_label = new QLabel(this);
+       disk_free_label->setStyleSheet("QLabel {padding-right: 5px;}");
+       ui->menuBar->setCornerWidget(disk_free_label);
+
+       StateProto state = db.get_state();
+
+       cliplist_clips = new ClipList(state.clip_list());
+       ui->clip_list->setModel(cliplist_clips);
+       connect(cliplist_clips, &ClipList::any_content_changed, this, &MainWindow::content_changed);
+
+       playlist_clips = new PlayList(state.play_list());
+       ui->playlist->setModel(playlist_clips);
+       connect(playlist_clips, &PlayList::any_content_changed, this, &MainWindow::content_changed);
+
+       // For un-highlighting when we lose focus.
+       ui->clip_list->installEventFilter(this);
+
+       // For scrubbing in the pts columns.
+       ui->clip_list->viewport()->installEventFilter(this);
+       ui->playlist->viewport()->installEventFilter(this);
+
+       QShortcut *cue_in = new QShortcut(QKeySequence(Qt::Key_A), this);
+       connect(cue_in, &QShortcut::activated, ui->cue_in_btn, &QPushButton::click);
+       connect(ui->cue_in_btn, &QPushButton::clicked, this, &MainWindow::cue_in_clicked);
+
+       QShortcut *cue_out = new QShortcut(QKeySequence(Qt::Key_S), this);
+       connect(cue_out, &QShortcut::activated, ui->cue_out_btn, &QPushButton::click);
+       connect(ui->cue_out_btn, &QPushButton::clicked, this, &MainWindow::cue_out_clicked);
+
+       QShortcut *queue = new QShortcut(QKeySequence(Qt::Key_Q), this);
+       connect(queue, &QShortcut::activated, ui->queue_btn, &QPushButton::click);
+       connect(ui->queue_btn, &QPushButton::clicked, this, &MainWindow::queue_clicked);
+
+       QShortcut *preview = new QShortcut(QKeySequence(Qt::Key_W), this);
+       connect(preview, &QShortcut::activated, ui->preview_btn, &QPushButton::click);
+       connect(ui->preview_btn, &QPushButton::clicked, this, &MainWindow::preview_clicked);
+
+       QShortcut *play = new QShortcut(QKeySequence(Qt::Key_Space), this);
+       connect(play, &QShortcut::activated, ui->play_btn, &QPushButton::click);
+       connect(ui->play_btn, &QPushButton::clicked, this, &MainWindow::play_clicked);
+
+       QShortcut *preview_1 = new QShortcut(QKeySequence(Qt::Key_1), this);
+       connect(preview_1, &QShortcut::activated, ui->preview_1_btn, &QPushButton::click);
+       connect(ui->input1_display, &JPEGFrameView::clicked, ui->preview_1_btn, &QPushButton::click);
+       connect(ui->preview_1_btn, &QPushButton::clicked, [this]{ preview_angle_clicked(0); });
+       ui->input1_display->set_overlay("1");
+
+       QShortcut *preview_2 = new QShortcut(QKeySequence(Qt::Key_2), this);
+       connect(preview_2, &QShortcut::activated, ui->preview_2_btn, &QPushButton::click);
+       connect(ui->input2_display, &JPEGFrameView::clicked, ui->preview_2_btn, &QPushButton::click);
+       connect(ui->preview_2_btn, &QPushButton::clicked, [this]{ preview_angle_clicked(1); });
+       ui->input2_display->set_overlay("2");
+
+       QShortcut *preview_3 = new QShortcut(QKeySequence(Qt::Key_3), this);
+       connect(preview_3, &QShortcut::activated, ui->preview_3_btn, &QPushButton::click);
+       connect(ui->input3_display, &JPEGFrameView::clicked, ui->preview_3_btn, &QPushButton::click);
+       connect(ui->preview_3_btn, &QPushButton::clicked, [this]{ preview_angle_clicked(2); });
+       ui->input3_display->set_overlay("3");
+
+       QShortcut *preview_4 = new QShortcut(QKeySequence(Qt::Key_4), this);
+       connect(preview_4, &QShortcut::activated, ui->preview_4_btn, &QPushButton::click);
+       connect(ui->input4_display, &JPEGFrameView::clicked, ui->preview_4_btn, &QPushButton::click);
+       connect(ui->preview_4_btn, &QPushButton::clicked, [this]{ preview_angle_clicked(3); });
+       ui->input4_display->set_overlay("4");
+
+       connect(ui->playlist_duplicate_btn, &QPushButton::clicked, this, &MainWindow::playlist_duplicate);
+
+       connect(ui->playlist_remove_btn, &QPushButton::clicked, this, &MainWindow::playlist_remove);
+       QShortcut *delete_key = new QShortcut(QKeySequence(Qt::Key_Delete), ui->playlist);
+       connect(delete_key, &QShortcut::activated, [this] {
+               if (ui->playlist->hasFocus()) {
+                       playlist_remove();
+               }
+       });
+
+       // TODO: support drag-and-drop.
+       connect(ui->playlist_move_up_btn, &QPushButton::clicked, [this]{ playlist_move(-1); });
+       connect(ui->playlist_move_down_btn, &QPushButton::clicked, [this]{ playlist_move(1); });
+
+       connect(ui->playlist->selectionModel(), &QItemSelectionModel::selectionChanged,
+               this, &MainWindow::playlist_selection_changed);
+       playlist_selection_changed();  // First time set-up.
+
+       preview_player = new Player(ui->preview_display, /*also_output_to_stream=*/false);
+       live_player = new Player(ui->live_display, /*also_output_to_stream=*/true);
+       live_player->set_done_callback([this]{
+               post_to_main_thread([this]{
+                       live_player_clip_done();
+               });
+       });
+       live_player->set_next_clip_callback(bind(&MainWindow::live_player_get_next_clip, this));
+       live_player->set_progress_callback([this](const map<size_t, double> &progress) {
+               post_to_main_thread([this, progress] {
+                       live_player_clip_progress(progress);
+               });
+       });
+       set_output_status("paused");
+
+       defer_timeout = new QTimer(this);
+       defer_timeout->setSingleShot(true);
+       connect(defer_timeout, &QTimer::timeout, this, &MainWindow::defer_timer_expired);
+
+       connect(ui->clip_list->selectionModel(), &QItemSelectionModel::currentChanged,
+               this, &MainWindow::clip_list_selection_changed);
+}
+
+void MainWindow::cue_in_clicked()
+{
+       if (!cliplist_clips->empty() && cliplist_clips->back()->pts_out < 0) {
+               cliplist_clips->mutable_back()->pts_in = current_pts;
+               return;
+       }
+       Clip clip;
+       clip.pts_in = current_pts;
+       cliplist_clips->add_clip(clip);
+       playlist_selection_changed();
+}
+
+void MainWindow::cue_out_clicked()
+{
+       if (!cliplist_clips->empty()) {
+               cliplist_clips->mutable_back()->pts_out = current_pts;
+               // TODO: select the row in the clip list?
+       }
+}
+
+void MainWindow::queue_clicked()
+{
+       if (cliplist_clips->empty()) {
+               return;
+       }
+
+       QItemSelectionModel *selected = ui->clip_list->selectionModel();
+       if (!selected->hasSelection()) {
+               Clip clip = *cliplist_clips->back();
+               clip.stream_idx = 0;
+               if (clip.pts_out != -1) {
+                       playlist_clips->add_clip(clip);
+                       playlist_selection_changed();
+               }
+               return;
+       }
+
+       QModelIndex index = selected->currentIndex();
+       Clip clip = *cliplist_clips->clip(index.row());
+       if (index.column() >= int(ClipList::Column::CAMERA_1) &&
+           index.column() <= int(ClipList::Column::CAMERA_4)) {
+               clip.stream_idx = index.column() - int(ClipList::Column::CAMERA_1);
+       } else {
+               clip.stream_idx = ui->preview_display->get_stream_idx();
+       }
+
+       if (clip.pts_out != -1) {
+               playlist_clips->add_clip(clip);
+               playlist_selection_changed();
+       }
+}
+
+void MainWindow::preview_clicked()
+{
+       if (ui->playlist->hasFocus()) {
+               // Allow the playlist as preview iff it has focus and something is selected.
+               QItemSelectionModel *selected = ui->playlist->selectionModel();
+               if (selected->hasSelection()) {
+                       QModelIndex index = selected->currentIndex();
+                       const Clip &clip = *playlist_clips->clip(index.row());
+                       preview_player->play_clip(clip, index.row(), clip.stream_idx);
+                       return;
+               }
+       }
+
+       if (cliplist_clips->empty())
+               return;
+
+       QItemSelectionModel *selected = ui->clip_list->selectionModel();
+       if (!selected->hasSelection()) {
+               preview_player->play_clip(*cliplist_clips->back(), cliplist_clips->size() - 1, 0);
+               return;
+       }
+
+       QModelIndex index = selected->currentIndex();
+       unsigned stream_idx;
+       if (index.column() >= int(ClipList::Column::CAMERA_1) &&
+           index.column() <= int(ClipList::Column::CAMERA_4)) {
+               stream_idx = index.column() - int(ClipList::Column::CAMERA_1);
+       } else {
+               stream_idx = ui->preview_display->get_stream_idx();
+       }
+       preview_player->play_clip(*cliplist_clips->clip(index.row()), index.row(), stream_idx);
+}
+
+void MainWindow::preview_angle_clicked(unsigned stream_idx)
+{
+       preview_player->override_angle(stream_idx);
+
+       // Change the selection if we were previewing a clip from the clip list.
+       // (The only other thing we could be showing is a pts scrub, and if so,
+       // that would be selected.)
+       QItemSelectionModel *selected = ui->clip_list->selectionModel();
+       if (selected->hasSelection()) {
+               QModelIndex cell = selected->selectedIndexes()[0];
+               int column = int(ClipList::Column::CAMERA_1) + stream_idx;
+               selected->setCurrentIndex(cell.sibling(cell.row(), column), QItemSelectionModel::ClearAndSelect);
+       }
+}
+
+void MainWindow::playlist_duplicate()
+{
+       QItemSelectionModel *selected = ui->playlist->selectionModel();
+       if (!selected->hasSelection()) {
+               // Should have been grayed out, but OK.
+               return;
+       }
+       QModelIndexList rows = selected->selectedRows();
+       int first = rows.front().row(), last = rows.back().row();
+       playlist_clips->duplicate_clips(first, last);
+       playlist_selection_changed();
+}
+
+void MainWindow::playlist_remove()
+{
+       QItemSelectionModel *selected = ui->playlist->selectionModel();
+       if (!selected->hasSelection()) {
+               // Should have been grayed out, but OK.
+               return;
+       }
+       QModelIndexList rows = selected->selectedRows();
+       int first = rows.front().row(), last = rows.back().row();
+       playlist_clips->erase_clips(first, last);
+
+       // TODO: select the next one in the list?
+
+       playlist_selection_changed();
+}
+
+void MainWindow::playlist_move(int delta)
+{
+       QItemSelectionModel *selected = ui->playlist->selectionModel();
+       if (!selected->hasSelection()) {
+               // Should have been grayed out, but OK.
+               return;
+       }
+
+       QModelIndexList rows = selected->selectedRows();
+       int first = rows.front().row(), last = rows.back().row();
+       if ((delta == -1 && first == 0) ||
+           (delta == 1 && size_t(last) == playlist_clips->size() - 1)) {
+               // Should have been grayed out, but OK.
+               return;
+       }
+
+       playlist_clips->move_clips(first, last, delta);
+       playlist_selection_changed();
+}
+
+void MainWindow::defer_timer_expired()
+{
+       state_changed(deferred_state);
+}
+
+void MainWindow::content_changed()
+{
+       if (defer_timeout->isActive() &&
+           (!currently_deferring_model_changes || deferred_change_id != current_change_id)) {
+               // There's some deferred event waiting, but this event is unrelated.
+               // So it's time to short-circuit that timer and do the work it wanted to do.
+               defer_timeout->stop();
+               state_changed(deferred_state);
+       }
+       StateProto state;
+       *state.mutable_clip_list() = cliplist_clips->serialize();
+       *state.mutable_play_list() = playlist_clips->serialize();
+       if (currently_deferring_model_changes) {
+               deferred_change_id = current_change_id;
+               deferred_state = std::move(state);
+               defer_timeout->start(200);
+               return;
+       }
+       state_changed(state);
+}
+
+void MainWindow::state_changed(const StateProto &state)
+{
+       db.store_state(state);
+}
+
+void MainWindow::play_clicked()
+{
+       if (playlist_clips->empty())
+               return;
+
+       QItemSelectionModel *selected = ui->playlist->selectionModel();
+       int row;
+       if (!selected->hasSelection()) {
+               row = 0;
+       } else {
+               row = selected->selectedRows(0)[0].row();
+       }
+
+       const Clip &clip = *playlist_clips->clip(row);
+       live_player->play_clip(clip, row, clip.stream_idx);
+       playlist_clips->set_progress({{ row, 0.0f }});
+       playlist_clips->set_currently_playing(row, 0.0f);
+       playlist_selection_changed();
+}
+
+void MainWindow::live_player_clip_done()
+{
+       int row = playlist_clips->get_currently_playing();
+       if (row == -1 || row == int(playlist_clips->size()) - 1) {
+               set_output_status("paused");
+               playlist_clips->set_progress({});
+               playlist_clips->set_currently_playing(-1, 0.0f);
+       } else {
+               playlist_clips->set_progress({{ row + 1, 0.0f }});
+               playlist_clips->set_currently_playing(row + 1, 0.0f);
+       }
+}
+
+pair<Clip, size_t> MainWindow::live_player_get_next_clip()
+{
+       // playlist_clips can only be accessed on the main thread.
+       // Hopefully, we won't have to wait too long for this to come back.
+       promise<pair<Clip, size_t>> clip_promise;
+       future<pair<Clip, size_t>> clip = clip_promise.get_future();
+       post_to_main_thread([this, &clip_promise] {
+               int row = playlist_clips->get_currently_playing();
+               if (row != -1 && row < int(playlist_clips->size()) - 1) {
+                       clip_promise.set_value(make_pair(*playlist_clips->clip(row + 1), row + 1));
+               } else {
+                       clip_promise.set_value(make_pair(Clip(), 0));
+               }
+       });
+       return clip.get();
+}
+
+static string format_duration(double t)
+{
+       int t_ms = lrint(t * 1e3);
+
+       int ms = t_ms % 1000;
+       t_ms /= 1000;
+       int s = t_ms % 60;
+       t_ms /= 60;
+       int m = t_ms;
+
+       char buf[256];
+       snprintf(buf, sizeof(buf), "%d:%02d.%03d", m, s, ms);
+       return buf;
+}
+
+void MainWindow::live_player_clip_progress(const map<size_t, double> &progress)
+{
+       playlist_clips->set_progress(progress);
+
+       // Look at the last clip and then start counting from there.
+       assert(!progress.empty());
+       auto last_it = progress.end();
+       --last_it;
+       double remaining = 0.0;
+       double last_fade_time_seconds = 0.0;
+       for (size_t row = last_it->first; row < playlist_clips->size(); ++row) {
+               const Clip clip = *playlist_clips->clip(row);
+               double clip_length = double(clip.pts_out - clip.pts_in) / TIMEBASE / 0.5;  // FIXME: stop hardcoding speed.
+               if (row == last_it->first) {
+                       // A clip we're playing: Subtract the part we've already played.
+                       remaining = clip_length * (1.0 - last_it->second);
+               } else {
+                       // A clip we haven't played yet: Subtract the part that's overlapping
+                       // with a previous clip (due to fade).
+                       remaining += max(clip_length - last_fade_time_seconds, 0.0);
+               }
+               last_fade_time_seconds = min(clip_length, clip.fade_time_seconds);
+       }
+       set_output_status(format_duration(remaining) + " left");
+}
+
+void MainWindow::resizeEvent(QResizeEvent *event)
+{
+       QMainWindow::resizeEvent(event);
+
+       // Ask for a relayout, but only after the event loop is done doing relayout
+       // on everything else.
+       QMetaObject::invokeMethod(this, "relayout", Qt::QueuedConnection);
+}
+
+void MainWindow::relayout()
+{
+       ui->live_display->setMinimumWidth(ui->live_display->height() * 16 / 9);
+       ui->preview_display->setMinimumWidth(ui->preview_display->height() * 16 / 9);
+}
+
+void set_pts_in(int64_t pts, int64_t current_pts, ClipProxy &clip)
+{
+       pts = std::max<int64_t>(pts, 0);
+       if (clip->pts_out == -1) {
+               pts = std::min(pts, current_pts);
+       } else {
+               pts = std::min(pts, clip->pts_out);
+       }
+       clip->pts_in = pts;
+}
+
+bool MainWindow::eventFilter(QObject *watched, QEvent *event)
+{
+       constexpr int dead_zone_pixels = 3;  // To avoid that simple clicks get misinterpreted.
+       constexpr int scrub_sensitivity = 100;  // pts units per pixel.
+       constexpr int wheel_sensitivity = 100;  // pts units per degree.
+       constexpr int camera_degrees_per_pixel = 15;  // One click of most mice.
+
+       unsigned stream_idx = ui->preview_display->get_stream_idx();
+
+       if (watched == ui->clip_list) {
+               if (event->type() == QEvent::FocusOut) {
+                       highlight_camera_input(-1);
+               }
+               return false;
+       }
+
+       if (event->type() != QEvent::Wheel) {
+               last_mousewheel_camera_row = -1;
+       }
+
+       if (event->type() == QEvent::MouseButtonPress) {
+               QMouseEvent *mouse = (QMouseEvent *)event;
+
+               QTableView *destination;
+               ScrubType type;
+
+               if (watched == ui->clip_list->viewport()) {
+                       destination = ui->clip_list;
+                       type = SCRUBBING_CLIP_LIST;
+               } else if (watched == ui->playlist->viewport()) {
+                       destination = ui->playlist;
+                       type = SCRUBBING_PLAYLIST;
+               } else {
+                       return false;
+               }
+               int column = destination->columnAt(mouse->x());
+               int row = destination->rowAt(mouse->y());
+               if (column == -1 || row == -1)
+                       return false;
+
+               if (type == SCRUBBING_CLIP_LIST) {
+                       if (ClipList::Column(column) == ClipList::Column::IN) {
+                               scrub_pts_origin = cliplist_clips->clip(row)->pts_in;
+                               preview_single_frame(scrub_pts_origin, stream_idx, FIRST_AT_OR_AFTER);
+                       } else if (ClipList::Column(column) == ClipList::Column::OUT) {
+                               scrub_pts_origin = cliplist_clips->clip(row)->pts_out;
+                               preview_single_frame(scrub_pts_origin, stream_idx, LAST_BEFORE);
+                       } else {
+                               return false;
+                       }
+               } else {
+                       if (PlayList::Column(column) == PlayList::Column::IN) {
+                               scrub_pts_origin = playlist_clips->clip(row)->pts_in;
+                               preview_single_frame(scrub_pts_origin, stream_idx, FIRST_AT_OR_AFTER);
+                       } else if (PlayList::Column(column) == PlayList::Column::OUT) {
+                               scrub_pts_origin = playlist_clips->clip(row)->pts_out;
+                               preview_single_frame(scrub_pts_origin, stream_idx, LAST_BEFORE);
+                       } else {
+                               return false;
+                       }
+               }
+
+               scrubbing = true;
+               scrub_row = row;
+               scrub_column = column;
+               scrub_x_origin = mouse->x();
+               scrub_type = type;
+       } else if (event->type() == QEvent::MouseMove) {
+               if (scrubbing) {
+                       QMouseEvent *mouse = (QMouseEvent *)event;
+                       int offset = mouse->x() - scrub_x_origin;
+                       int adjusted_offset;
+                       if (offset >= dead_zone_pixels) {
+                               adjusted_offset = offset - dead_zone_pixels;
+                       } else if (offset < -dead_zone_pixels) {
+                               adjusted_offset = offset + dead_zone_pixels;
+                       } else {
+                               adjusted_offset = 0;
+                       }
+
+                       int64_t pts = scrub_pts_origin + adjusted_offset * scrub_sensitivity;
+                       currently_deferring_model_changes = true;
+                       if (scrub_type == SCRUBBING_CLIP_LIST) {
+                               ClipProxy clip = cliplist_clips->mutable_clip(scrub_row);
+                               if (scrub_column == int(ClipList::Column::IN)) {
+                                       current_change_id = "cliplist:in:" + to_string(scrub_row);
+                                       set_pts_in(pts, current_pts, clip);
+                                       preview_single_frame(pts, stream_idx, FIRST_AT_OR_AFTER);
+                               } else {
+                                       current_change_id = "cliplist:out" + to_string(scrub_row);
+                                       pts = std::max(pts, clip->pts_in);
+                                       pts = std::min(pts, current_pts);
+                                       clip->pts_out = pts;
+                                       preview_single_frame(pts, stream_idx, LAST_BEFORE);
+                               }
+                       } else {
+                               ClipProxy clip = playlist_clips->mutable_clip(scrub_row);
+                               if (scrub_column == int(PlayList::Column::IN)) {
+                                       current_change_id = "playlist:in:" + to_string(scrub_row);
+                                       set_pts_in(pts, current_pts, clip);
+                                       preview_single_frame(pts, clip->stream_idx, FIRST_AT_OR_AFTER);
+                               } else {
+                                       current_change_id = "playlist:out:" + to_string(scrub_row);
+                                       pts = std::max(pts, clip->pts_in);
+                                       pts = std::min(pts, current_pts);
+                                       clip->pts_out = pts;
+                                       preview_single_frame(pts, clip->stream_idx, LAST_BEFORE);
+                               }
+                       }
+                       currently_deferring_model_changes = false;
+
+                       return true;  // Don't use this mouse movement for selecting things.
+               }
+       } else if (event->type() == QEvent::Wheel) {
+               QWheelEvent *wheel = (QWheelEvent *)event;
+
+               QTableView *destination;
+               int in_column, out_column, camera_column;
+               if (watched == ui->clip_list->viewport()) {
+                       destination = ui->clip_list;
+                       in_column = int(ClipList::Column::IN);
+                       out_column = int(ClipList::Column::OUT);
+                       camera_column = -1;
+                       last_mousewheel_camera_row = -1;
+               } else if (watched == ui->playlist->viewport()) {
+                       destination = ui->playlist;
+                       in_column = int(PlayList::Column::IN);
+                       out_column = int(PlayList::Column::OUT);
+                       camera_column = int(PlayList::Column::CAMERA);
+               } else {
+                       last_mousewheel_camera_row = -1;
+                       return false;
+               }
+               int column = destination->columnAt(wheel->x());
+               int row = destination->rowAt(wheel->y());
+               if (column == -1 || row == -1) return false;
+
+               // Only adjust pts with the wheel if the given row is selected.
+               if (!destination->hasFocus() ||
+                   row != destination->selectionModel()->currentIndex().row()) {
+                       return false;
+               }
+
+               currently_deferring_model_changes = true;
+               {
+                       current_change_id = (watched == ui->clip_list->viewport()) ? "cliplist:" : "playlist:";
+                       ClipProxy clip = (watched == ui->clip_list->viewport()) ?
+                               cliplist_clips->mutable_clip(row) : playlist_clips->mutable_clip(row);
+                       if (watched == ui->playlist->viewport()) {
+                               stream_idx = clip->stream_idx;
+                       }
+
+                       if (column != camera_column) {
+                               last_mousewheel_camera_row = -1;
+                       }
+                       if (column == in_column) {
+                               current_change_id += "in:" + to_string(row);
+                               int64_t pts = clip->pts_in + wheel->angleDelta().y() * wheel_sensitivity;
+                               set_pts_in(pts, current_pts, clip);
+                               preview_single_frame(pts, stream_idx, FIRST_AT_OR_AFTER);
+                       } else if (column == out_column) {
+                               current_change_id += "out:" + to_string(row);
+                               int64_t pts = clip->pts_out + wheel->angleDelta().y() * wheel_sensitivity;
+                               pts = std::max(pts, clip->pts_in);
+                               pts = std::min(pts, current_pts);
+                               clip->pts_out = pts;
+                               preview_single_frame(pts, stream_idx, LAST_BEFORE);
+                       } else if (column == camera_column) {
+                               current_change_id += "camera:" + to_string(row);
+                               int angle_degrees = wheel->angleDelta().y();
+                               if (last_mousewheel_camera_row == row) {
+                                       angle_degrees += leftover_angle_degrees;
+                               }
+
+                               int stream_idx = clip->stream_idx + angle_degrees / camera_degrees_per_pixel;
+                               stream_idx = std::max(stream_idx, 0);
+                               stream_idx = std::min(stream_idx, NUM_CAMERAS - 1);
+                               clip->stream_idx = stream_idx;
+
+                               last_mousewheel_camera_row = row;
+                               leftover_angle_degrees = angle_degrees % camera_degrees_per_pixel;
+
+                               // Don't update the live view, that's rarely what the operator wants.
+                       }
+               }
+               currently_deferring_model_changes = false;
+               return true;  // Don't scroll.
+       } else if (event->type() == QEvent::MouseButtonRelease) {
+               scrubbing = false;
+       }
+       return false;
+}
+
+void MainWindow::preview_single_frame(int64_t pts, unsigned stream_idx, MainWindow::Rounding rounding)
+{
+       if (rounding == LAST_BEFORE) {
+               lock_guard<mutex> lock(frame_mu);
+               if (frames[stream_idx].empty())
+                       return;
+               auto it = lower_bound(frames[stream_idx].begin(), frames[stream_idx].end(), pts,
+                       [](const FrameOnDisk &frame, int64_t pts) { return frame.pts < pts; });
+               if (it != frames[stream_idx].end()) {
+                       pts = it->pts;
+               }
+       } else {
+               assert(rounding == FIRST_AT_OR_AFTER);
+               lock_guard<mutex> lock(frame_mu);
+               if (frames[stream_idx].empty())
+                       return;
+               auto it = upper_bound(frames[stream_idx].begin(), frames[stream_idx].end(), pts - 1,
+                       [](int64_t pts, const FrameOnDisk &frame) { return pts < frame.pts; });
+               if (it != frames[stream_idx].end()) {
+                       pts = it->pts;
+               }
+       }
+
+       Clip fake_clip;
+       fake_clip.pts_in = pts;
+       fake_clip.pts_out = pts + 1;
+       preview_player->play_clip(fake_clip, 0, stream_idx);
+}
+
+void MainWindow::playlist_selection_changed()
+{
+       QItemSelectionModel *selected = ui->playlist->selectionModel();
+       bool any_selected = selected->hasSelection();
+       ui->playlist_duplicate_btn->setEnabled(any_selected);
+       ui->playlist_remove_btn->setEnabled(any_selected);
+       ui->playlist_move_up_btn->setEnabled(
+               any_selected && selected->selectedRows().front().row() > 0);
+       ui->playlist_move_down_btn->setEnabled(
+               any_selected && selected->selectedRows().back().row() < int(playlist_clips->size()) - 1);
+       ui->play_btn->setEnabled(!playlist_clips->empty());
+
+       if (!any_selected) {
+               set_output_status("paused");
+       } else {
+               double remaining = 0.0;
+               for (int row = selected->selectedRows().front().row(); row < int(playlist_clips->size()); ++row) {
+                       const Clip clip = *playlist_clips->clip(row);
+                       remaining += double(clip.pts_out - clip.pts_in) / TIMEBASE / 0.5;  // FIXME: stop hardcoding speed.
+               }
+               set_output_status(format_duration(remaining) + " ready");
+       }
+}
+
+void MainWindow::clip_list_selection_changed(const QModelIndex &current, const QModelIndex &)
+{
+       int camera_selected = -1;
+       if (current.column() >= int(ClipList::Column::CAMERA_1) &&
+           current.column() <= int(ClipList::Column::CAMERA_4)) {
+               camera_selected = current.column() - int(ClipList::Column::CAMERA_1);
+       }
+       highlight_camera_input(camera_selected);
+}
+
+void MainWindow::report_disk_space(off_t free_bytes, double estimated_seconds_left)
+{
+       char time_str[256];
+       if (estimated_seconds_left < 60.0) {
+               strcpy(time_str, "<font color=\"red\">Less than a minute</font>");
+       } else if (estimated_seconds_left < 1800.0) {  // Less than half an hour: Xm Ys (red).
+               int s = lrintf(estimated_seconds_left);
+               int m = s / 60;
+               s %= 60;
+               snprintf(time_str, sizeof(time_str), "<font color=\"red\">%dm %ds</font>", m, s);
+       } else if (estimated_seconds_left < 3600.0) {  // Less than an hour: Xm.
+               int m = lrintf(estimated_seconds_left / 60.0);
+               snprintf(time_str, sizeof(time_str), "%dm", m);
+       } else if (estimated_seconds_left < 36000.0) {  // Less than ten hours: Xh Ym.
+               int m = lrintf(estimated_seconds_left / 60.0);
+               int h = m / 60;
+               m %= 60;
+               snprintf(time_str, sizeof(time_str), "%dh %dm", h, m);
+       } else {  // More than ten hours: Xh.
+               int h = lrintf(estimated_seconds_left / 3600.0);
+               snprintf(time_str, sizeof(time_str), "%dh", h);
+       }
+       char buf[256];
+       snprintf(buf, sizeof(buf), "Disk free: %'.0f MB (approx. %s)", free_bytes / 1048576.0, time_str);
+
+       std::string label = buf;
+
+       post_to_main_thread([this, label] {
+               disk_free_label->setText(QString::fromStdString(label));
+               ui->menuBar->setCornerWidget(disk_free_label);  // Need to set this again for the sizing to get right.
+       });
+}
+
+void MainWindow::exit_triggered()
+{
+       close();
+}
+
+void MainWindow::highlight_camera_input(int stream_idx)
+{
+       if (stream_idx == 0) {
+               ui->input1_frame->setStyleSheet("background: rgb(0,255,0)");
+       } else {
+               ui->input1_frame->setStyleSheet("");
+       }
+       if (stream_idx == 1) {
+               ui->input2_frame->setStyleSheet("background: rgb(0,255,0)");
+       } else {
+               ui->input2_frame->setStyleSheet("");
+       }
+       if (stream_idx == 2) {
+               ui->input3_frame->setStyleSheet("background: rgb(0,255,0)");
+       } else {
+               ui->input3_frame->setStyleSheet("");
+       }
+       if (stream_idx == 3) {
+               ui->input4_frame->setStyleSheet("background: rgb(0,255,0)");
+       } else {
+               ui->input4_frame->setStyleSheet("");
+       }
+}
+
+void MainWindow::set_output_status(const string &status)
+{
+       ui->live_label->setText(QString::fromStdString("Current output (" + status + ")"));
+
+       lock_guard<mutex> lock(queue_status_mu);
+       queue_status = status;
+}
+
+pair<string, string> MainWindow::get_queue_status() const {
+       lock_guard<mutex> lock(queue_status_mu);
+       return {queue_status, "text/plain"};
+}
diff --git a/futatabi/mainwindow.h b/futatabi/mainwindow.h
new file mode 100644 (file)
index 0000000..7f8c57a
--- /dev/null
@@ -0,0 +1,112 @@
+#ifndef MAINWINDOW_H
+#define MAINWINDOW_H
+
+#include "clip_list.h"
+#include "db.h"
+#include "state.pb.h"
+
+#include <mutex>
+#include <QLabel>
+#include <QMainWindow>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <string>
+#include <utility>
+
+namespace Ui {
+class MainWindow;
+}  // namespace Ui
+
+class Player;
+
+class MainWindow : public QMainWindow {
+       Q_OBJECT
+
+public:
+       MainWindow();
+
+       // HTTP callback. TODO: Does perhaps not belong to MainWindow?
+       std::pair<std::string, std::string> get_queue_status() const;
+
+//private:
+       Ui::MainWindow *ui;
+
+private:
+       QLabel *disk_free_label;
+       Player *preview_player, *live_player;
+       DB db;
+
+       // State when doing a scrub operation on a timestamp with the mouse.
+       bool scrubbing = false;
+       int scrub_x_origin;  // In pixels on the viewport.
+       int64_t scrub_pts_origin;
+
+       // Which element (e.g. pts_in on clip 4) we are scrubbing.
+       enum ScrubType { SCRUBBING_CLIP_LIST, SCRUBBING_PLAYLIST } scrub_type;
+       int scrub_row;
+       int scrub_column;
+
+       // Used to keep track of small mouse wheel motions on the camera index in the playlist.
+       int last_mousewheel_camera_row = -1;
+       int leftover_angle_degrees = 0;
+
+       // Some operations, notably scrubbing and scrolling, happen in so large increments
+       // that we want to group them instead of saving to disk every single time.
+       // If they happen (ie., we get a callback from the model that it's changed) while
+       // currently_deferring_model_changes, we fire off this timer. If it manages to elapse
+       // before some other event happens, we count the event. (If the other event is of the
+       // same kind, we just fire off the timer anew instead of taking any action.)
+       QTimer *defer_timeout;
+       std::string deferred_change_id;
+       StateProto deferred_state;
+
+       // Before a change that should be deferred (see above), currently_deferring_model_changes
+       // must be set to true, and current_change_id must be given contents describing what's
+       // changed to avoid accidental grouping.
+       bool currently_deferring_model_changes = false;
+       std::string current_change_id;
+
+       mutable std::mutex queue_status_mu;
+       std::string queue_status;  // Under queue_status_mu.
+
+       void cue_in_clicked();
+       void cue_out_clicked();
+       void queue_clicked();
+       void preview_clicked();
+       void preview_angle_clicked(unsigned stream_idx);
+       void play_clicked();
+       void live_player_clip_done();
+       std::pair<Clip, size_t> live_player_get_next_clip();
+       void live_player_clip_progress(const std::map<size_t, double> &progress);
+       void set_output_status(const std::string &status);
+       void playlist_duplicate();
+       void playlist_remove();
+       void playlist_move(int delta);
+
+       void defer_timer_expired();
+       void content_changed();  // In clip_list or play_list.
+       void state_changed(const StateProto &state);  // Called post-filtering.
+
+       enum Rounding { FIRST_AT_OR_AFTER, LAST_BEFORE };
+       void preview_single_frame(int64_t pts, unsigned stream_idx, Rounding rounding);
+
+       // Also covers when the playlist itself changes.
+       void playlist_selection_changed();
+
+       void clip_list_selection_changed(const QModelIndex &current, const QModelIndex &previous);
+
+       void resizeEvent(QResizeEvent *event) override;
+       bool eventFilter(QObject *watched, QEvent *event) override;
+
+       void report_disk_space(off_t free_bytes, double estimated_seconds_left);
+       void exit_triggered();
+
+       void highlight_camera_input(int stream_idx);
+
+private slots:
+       void relayout();
+};
+
+extern MainWindow *global_mainwindow;
+
+#endif
diff --git a/futatabi/mainwindow.ui b/futatabi/mainwindow.ui
new file mode 100644 (file)
index 0000000..dbdb622
--- /dev/null
@@ -0,0 +1,472 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>MainWindow</class>
+ <widget class="QMainWindow" name="MainWindow">
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>1038</width>
+    <height>600</height>
+   </rect>
+  </property>
+  <property name="windowTitle">
+   <string>Futatabi</string>
+  </property>
+  <widget class="QWidget" name="centralwidget">
+   <layout class="QGridLayout" name="gridLayout">
+    <item row="0" column="0">
+     <widget class="QSplitter" name="splitter">
+      <property name="orientation">
+       <enum>Qt::Horizontal</enum>
+      </property>
+      <widget class="QWidget" name="horizontalLayoutWidget">
+       <layout class="QVBoxLayout" name="clip_and_play_lists">
+        <item>
+         <widget class="QTableView" name="clip_list"/>
+        </item>
+        <item>
+         <layout class="QHBoxLayout" name="clip_list_buttons">
+          <item>
+           <widget class="QPushButton" name="queue_btn">
+            <property name="text">
+             <string>Queue (&amp;Q)</string>
+            </property>
+            <property name="icon">
+             <iconset theme="list-add">
+              <normaloff>.</normaloff>.</iconset>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <widget class="QPushButton" name="preview_btn">
+            <property name="text">
+             <string>Preview (&amp;W)</string>
+            </property>
+            <property name="icon">
+             <iconset theme="media-playback-start">
+              <normaloff>.</normaloff>.</iconset>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <widget class="QPushButton" name="cue_in_btn">
+            <property name="text">
+             <string>Cue in (&amp;A)</string>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <widget class="QPushButton" name="cue_out_btn">
+            <property name="text">
+             <string>Cue out (&amp;S)</string>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <spacer name="horizontalSpacer_2">
+            <property name="orientation">
+             <enum>Qt::Horizontal</enum>
+            </property>
+            <property name="sizeHint" stdset="0">
+             <size>
+              <width>40</width>
+              <height>20</height>
+             </size>
+            </property>
+           </spacer>
+          </item>
+         </layout>
+        </item>
+        <item>
+         <widget class="QTableView" name="playlist">
+          <property name="selectionMode">
+           <enum>QAbstractItemView::ContiguousSelection</enum>
+          </property>
+          <property name="selectionBehavior">
+           <enum>QAbstractItemView::SelectRows</enum>
+          </property>
+         </widget>
+        </item>
+        <item>
+         <layout class="QHBoxLayout" name="playlist_buttons">
+          <item>
+           <widget class="QPushButton" name="playlist_duplicate_btn">
+            <property name="text">
+             <string>Duplicate</string>
+            </property>
+            <property name="icon">
+             <iconset theme="list-add">
+              <normaloff>.</normaloff>.</iconset>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <widget class="QPushButton" name="playlist_remove_btn">
+            <property name="sizePolicy">
+             <sizepolicy hsizetype="Fixed" vsizetype="Fixed">
+              <horstretch>0</horstretch>
+              <verstretch>0</verstretch>
+             </sizepolicy>
+            </property>
+            <property name="text">
+             <string>Remove</string>
+            </property>
+            <property name="icon">
+             <iconset theme="list-remove">
+              <normaloff>.</normaloff>.</iconset>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <widget class="QPushButton" name="playlist_move_up_btn">
+            <property name="text">
+             <string>Move up</string>
+            </property>
+            <property name="icon">
+             <iconset theme="go-up">
+              <normaloff>.</normaloff>.</iconset>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <widget class="QPushButton" name="playlist_move_down_btn">
+            <property name="text">
+             <string>Move down</string>
+            </property>
+            <property name="icon">
+             <iconset theme="go-down">
+              <normaloff>.</normaloff>.</iconset>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <spacer name="horizontalSpacer">
+            <property name="orientation">
+             <enum>Qt::Horizontal</enum>
+            </property>
+            <property name="sizeHint" stdset="0">
+             <size>
+              <width>40</width>
+              <height>20</height>
+             </size>
+            </property>
+           </spacer>
+          </item>
+          <item>
+           <widget class="QPushButton" name="play_btn">
+            <property name="text">
+             <string>Play (space)</string>
+            </property>
+            <property name="icon">
+             <iconset theme="media-playback-start">
+              <normaloff>.</normaloff>.</iconset>
+            </property>
+           </widget>
+          </item>
+         </layout>
+        </item>
+       </layout>
+      </widget>
+      <widget class="QWidget" name="verticalLayoutWidget_4">
+       <layout class="QVBoxLayout" name="video_displays" stretch="1,2">
+        <item>
+         <layout class="QHBoxLayout" name="preview_and_live_panes">
+          <item>
+           <layout class="QVBoxLayout" name="preview_pane" stretch="1,0">
+            <item>
+             <widget class="JPEGFrameView" name="preview_display" native="true"/>
+            </item>
+            <item>
+             <layout class="QHBoxLayout" name="horizontalLayout_3">
+              <property name="spacing">
+               <number>0</number>
+              </property>
+              <item>
+               <widget class="QLabel" name="label_2">
+                <property name="text">
+                 <string>Preview output</string>
+                </property>
+                <property name="alignment">
+                 <set>Qt::AlignCenter</set>
+                </property>
+               </widget>
+              </item>
+              <item>
+               <widget class="QPushButton" name="preview_1_btn">
+                <property name="sizePolicy">
+                 <sizepolicy hsizetype="Maximum" vsizetype="Fixed">
+                  <horstretch>0</horstretch>
+                  <verstretch>0</verstretch>
+                 </sizepolicy>
+                </property>
+                <property name="maximumSize">
+                 <size>
+                  <width>20</width>
+                  <height>17</height>
+                 </size>
+                </property>
+                <property name="text">
+                 <string>1</string>
+                </property>
+               </widget>
+              </item>
+              <item>
+               <widget class="QPushButton" name="preview_2_btn">
+                <property name="sizePolicy">
+                 <sizepolicy hsizetype="Maximum" vsizetype="Fixed">
+                  <horstretch>0</horstretch>
+                  <verstretch>0</verstretch>
+                 </sizepolicy>
+                </property>
+                <property name="maximumSize">
+                 <size>
+                  <width>20</width>
+                  <height>17</height>
+                 </size>
+                </property>
+                <property name="text">
+                 <string>2</string>
+                </property>
+               </widget>
+              </item>
+              <item>
+               <widget class="QPushButton" name="preview_3_btn">
+                <property name="maximumSize">
+                 <size>
+                  <width>20</width>
+                  <height>17</height>
+                 </size>
+                </property>
+                <property name="text">
+                 <string>3</string>
+                </property>
+               </widget>
+              </item>
+              <item>
+               <widget class="QPushButton" name="preview_4_btn">
+                <property name="maximumSize">
+                 <size>
+                  <width>20</width>
+                  <height>17</height>
+                 </size>
+                </property>
+                <property name="text">
+                 <string>4</string>
+                </property>
+               </widget>
+              </item>
+             </layout>
+            </item>
+           </layout>
+          </item>
+          <item>
+           <layout class="QVBoxLayout" name="live_pane" stretch="1,0">
+            <item>
+             <widget class="JPEGFrameView" name="live_display" native="true"/>
+            </item>
+            <item>
+             <widget class="QLabel" name="live_label">
+              <property name="text">
+               <string>Current output (paused)</string>
+              </property>
+              <property name="alignment">
+               <set>Qt::AlignCenter</set>
+              </property>
+             </widget>
+            </item>
+           </layout>
+          </item>
+         </layout>
+        </item>
+        <item>
+         <layout class="QVBoxLayout" name="input_pane" stretch="1,0">
+          <item>
+           <layout class="QGridLayout" name="input_displays">
+            <property name="spacing">
+             <number>0</number>
+            </property>
+            <item row="0" column="0">
+             <widget class="QFrame" name="input1_frame">
+              <property name="frameShape">
+               <enum>QFrame::Box</enum>
+              </property>
+              <property name="frameShadow">
+               <enum>QFrame::Plain</enum>
+              </property>
+              <property name="lineWidth">
+               <number>0</number>
+              </property>
+              <layout class="QGridLayout" name="gridLayout_2">
+               <property name="leftMargin">
+                <number>3</number>
+               </property>
+               <property name="topMargin">
+                <number>3</number>
+               </property>
+               <property name="rightMargin">
+                <number>3</number>
+               </property>
+               <property name="bottomMargin">
+                <number>3</number>
+               </property>
+               <item row="0" column="0">
+                <widget class="JPEGFrameView" name="input1_display" native="true">
+                 <property name="autoFillBackground">
+                  <bool>true</bool>
+                 </property>
+                </widget>
+               </item>
+              </layout>
+             </widget>
+            </item>
+            <item row="1" column="0">
+             <widget class="QFrame" name="input3_frame">
+              <property name="frameShape">
+               <enum>QFrame::Box</enum>
+              </property>
+              <property name="frameShadow">
+               <enum>QFrame::Plain</enum>
+              </property>
+              <property name="lineWidth">
+               <number>0</number>
+              </property>
+              <layout class="QGridLayout" name="gridLayout_4">
+               <property name="leftMargin">
+                <number>3</number>
+               </property>
+               <property name="topMargin">
+                <number>3</number>
+               </property>
+               <property name="rightMargin">
+                <number>3</number>
+               </property>
+               <property name="bottomMargin">
+                <number>3</number>
+               </property>
+               <item row="0" column="0">
+                <widget class="JPEGFrameView" name="input3_display" native="true"/>
+               </item>
+              </layout>
+             </widget>
+            </item>
+            <item row="0" column="1">
+             <widget class="QFrame" name="input2_frame">
+              <property name="frameShape">
+               <enum>QFrame::Box</enum>
+              </property>
+              <property name="frameShadow">
+               <enum>QFrame::Plain</enum>
+              </property>
+              <property name="lineWidth">
+               <number>0</number>
+              </property>
+              <layout class="QGridLayout" name="gridLayout_3">
+               <property name="leftMargin">
+                <number>3</number>
+               </property>
+               <property name="topMargin">
+                <number>3</number>
+               </property>
+               <property name="rightMargin">
+                <number>3</number>
+               </property>
+               <property name="bottomMargin">
+                <number>3</number>
+               </property>
+               <item row="0" column="0">
+                <widget class="JPEGFrameView" name="input2_display" native="true">
+                 <property name="autoFillBackground">
+                  <bool>true</bool>
+                 </property>
+                </widget>
+               </item>
+              </layout>
+             </widget>
+            </item>
+            <item row="1" column="1">
+             <widget class="QFrame" name="input4_frame">
+              <property name="autoFillBackground">
+               <bool>true</bool>
+              </property>
+              <property name="frameShape">
+               <enum>QFrame::Box</enum>
+              </property>
+              <property name="frameShadow">
+               <enum>QFrame::Plain</enum>
+              </property>
+              <property name="lineWidth">
+               <number>0</number>
+              </property>
+              <layout class="QGridLayout" name="gridLayout_5">
+               <property name="leftMargin">
+                <number>3</number>
+               </property>
+               <property name="topMargin">
+                <number>3</number>
+               </property>
+               <property name="rightMargin">
+                <number>3</number>
+               </property>
+               <property name="bottomMargin">
+                <number>3</number>
+               </property>
+               <item row="0" column="0">
+                <widget class="JPEGFrameView" name="input4_display" native="true"/>
+               </item>
+              </layout>
+             </widget>
+            </item>
+           </layout>
+          </item>
+          <item>
+           <widget class="QLabel" name="label">
+            <property name="text">
+             <string>Current inputs</string>
+            </property>
+            <property name="alignment">
+             <set>Qt::AlignCenter</set>
+            </property>
+           </widget>
+          </item>
+         </layout>
+        </item>
+       </layout>
+      </widget>
+     </widget>
+    </item>
+   </layout>
+  </widget>
+  <widget class="QMenuBar" name="menuBar">
+   <property name="geometry">
+    <rect>
+     <x>0</x>
+     <y>0</y>
+     <width>1038</width>
+     <height>22</height>
+    </rect>
+   </property>
+   <widget class="QMenu" name="menuFile">
+    <property name="title">
+     <string>&amp;File</string>
+    </property>
+    <addaction name="exit_action"/>
+   </widget>
+   <addaction name="menuFile"/>
+  </widget>
+  <action name="exit_action">
+   <property name="text">
+    <string>E&amp;xit</string>
+   </property>
+  </action>
+ </widget>
+ <customwidgets>
+  <customwidget>
+   <class>JPEGFrameView</class>
+   <extends>QWidget</extends>
+   <header>jpeg_frame_view.h</header>
+  </customwidget>
+ </customwidgets>
+ <resources/>
+ <connections/>
+</ui>
diff --git a/futatabi/make-example-video.sh b/futatabi/make-example-video.sh
new file mode 100644 (file)
index 0000000..975c7a4
--- /dev/null
@@ -0,0 +1,8 @@
+youtube-dl 'https://www.youtube.com/watch?v=Wa2fBiCEzTc'
+FILE='MULTI ANGLE _ George Evans pops up with a 92nd-minute winner in Blackburn!-Wa2fBiCEzTc.mp4'
+ffmpeg -y -ss 0:03.290 -t 0:37 -i "$FILE" -c:v mjpeg -an angle1.mp4    
+ffmpeg -y -ss 0:40 -t 0:40 -i "$FILE" -c:v mjpeg -an angle2.mp4   
+ffmpeg -y -ss 1:12.880 -i "$FILE" -c:v mjpeg -an angle3.mp4
+ffmpeg -y -ss 0:07 -i ./angle3.mp4 -c:v copy -copyts -start_at_zero angle3-cut.mp4                    
+ffmpeg -y -copyts -i angle1.mp4 -i angle2.mp4 -i angle3-cut.mp4 -map 0:0 -map 1:0 -map 2:0 -c:v copy multiangle.mp4 
+
diff --git a/futatabi/meson.build b/futatabi/meson.build
new file mode 100644 (file)
index 0000000..dd94984
--- /dev/null
@@ -0,0 +1,58 @@
+qt5 = import('qt5')
+protoc = find_program('protoc')
+
+epoxydep = dependency('epoxy')
+libavcodecdep = dependency('libavcodec')
+libavformatdep = dependency('libavformat')
+libavutildep = dependency('libavutil')
+libjpegdep = dependency('libjpeg')
+libswscaledep = dependency('libswscale')
+movitdep = dependency('movit')
+protobufdep = dependency('protobuf')
+qt5deps = dependency('qt5', modules: ['Core', 'Gui', 'Widgets', 'OpenGLExtensions', 'OpenGL', 'PrintSupport'])
+sdl2_imagedep = dependency('SDL2_image')
+sdl2dep = dependency('sdl2')
+sqlite3dep = dependency('sqlite3')
+vadrmdep = dependency('libva-drm')
+vax11dep = dependency('libva-x11')
+x11dep = dependency('x11')
+
+# Protobuf compilation.
+gen = generator(protoc, \
+  output    : ['@BASENAME@.pb.cc', '@BASENAME@.pb.h'],
+  arguments : ['--proto_path=@CURRENT_SOURCE_DIR@', '--cpp_out=@BUILD_DIR@', '@INPUT@'])
+proto_generated = gen.process('state.proto', 'frame.proto')
+
+# Preprocess Qt as needed.
+moc_files = qt5.preprocess(
+  moc_headers: ['mainwindow.h', 'jpeg_frame_view.h', 'clip_list.h'],
+  ui_files: ['mainwindow.ui'],
+  dependencies: qt5deps)
+
+# Flow objects.
+srcs = ['flow.cpp', 'gpu_timers.cpp']
+
+# All the other files.
+srcs += ['main.cpp', 'player.cpp', 'video_stream.cpp', 'chroma_subsampler.cpp']
+srcs += ['vaapi_jpeg_decoder.cpp', 'db.cpp', 'ycbcr_converter.cpp', 'flags.cpp']
+srcs += ['mainwindow.cpp', 'jpeg_frame_view.cpp', 'clip_list.cpp', 'frame_on_disk.cpp']
+srcs += moc_files
+srcs += proto_generated
+
+# Shaders needed at runtime.
+shaders = ['chroma_subsample.vert', 'densify.vert', 'equations.vert', 'hole_fill.vert', 'motion_search.vert', 'sor.vert', 'splat.vert', 'vs.vert']
+shaders += ['add_base_flow.frag', 'blend.frag', 'chroma_subsample.frag', 'densify.frag', 'derivatives.frag', 'diffusivity.frag',
+  'equations.frag', 'gray.frag', 'hole_blend.frag', 'hole_fill.frag', 'motion_search.frag', 'prewarp.frag', 'resize_flow.frag',
+  'sobel.frag', 'sor.frag', 'splat.frag']
+
+foreach shader : shaders
+  run_command('ln', '-s', join_paths(meson.current_source_dir(), shader), meson.current_build_dir())
+endforeach
+
+shader_srcs = bin2h_gen.process(shaders)
+srcs += shader_srcs
+
+executable('futatabi', srcs, dependencies: [shareddep, qt5deps, libjpegdep, movitdep, libmicrohttpddep, protobufdep, sqlite3dep, vax11dep, vadrmdep, x11dep, libavformatdep, libavcodecdep, libavutildep, libswscaledep], link_with: shared)
+executable('flow', 'flow_main.cpp', 'flow.cpp', 'gpu_timers.cpp', shader_srcs, dependencies: [shareddep, epoxydep, sdl2dep, sdl2_imagedep])
+executable('eval', 'eval.cpp', 'util.cpp')
+executable('vis', 'vis.cpp', 'util.cpp')
diff --git a/futatabi/motion_search.frag b/futatabi/motion_search.frag
new file mode 100644 (file)
index 0000000..eb4f7c7
--- /dev/null
@@ -0,0 +1,184 @@
+#version 450 core
+
+/*
+  The motion search is one of the two major components of DIS. It works more or less
+  like you'd expect; there's a bunch of overlapping patches (8x8 or 12x12 pixels) in
+  a grid, and for each patch, there's a search to try to find the most similar patch
+  in the other frame.
+
+  Unlike in a typical video codec, the DIS patch search is based on gradient descent;
+  conceptually, you start with an initial guess (the value from the previous level,
+  or the zero flow for the very first level), subtract the reference (“template”)
+  patch from the candidate, look at the gradient to see in what direction there is
+  a lower difference, and then inch a bit toward that direction. (There is seemingly
+  nothing like AdaM, Momentum or similar, but the searched value is only in two
+  dimensions, so perhaps it doesn't matter as much then.)
+
+  DIS does a tweak to this concept. Since the procedure as outlined above requires
+  computing the gradient of the candidate patch, it uses the reference patch as
+  candidate (thus the “inverse” name), and thus uses _its_ gradient to understand
+  in which direction to move. (This is a bit dodgy, but not _that_ dodgy; after
+  all, the two patches are supposed to be quite similar, so their surroundings and
+  thus also gradients should also be quite similar.) It's not entirely clear whether
+  this is still a win on GPU, where calculations are much cheaper, especially
+  the way we parallelize the search, but we've kept it around for now.
+
+  The inverse search is explained and derived in the supplementary material of the
+  paper, section A. Do note that there's a typo; the text under equation 9 claims
+  that the matrix H is n x n (where presumably n is the patch size), while in reality,
+  it's 2x2.
+
+  Our GPU parallellization is fairly dumb right now; we do one patch per fragment
+  (ie., parallellize only over patches, not within each patch), which may not
+  be optimal. In particular, in the initial level, we only have 40 patches,
+  which is on the low side for a GPU, and the memory access patterns may also not
+  be ideal.
+ */
+
+in vec3 flow_tc;
+in vec2 patch_center;
+flat in int ref_layer, search_layer;
+out vec3 out_flow;
+
+uniform sampler2DArray flow_tex, image_tex;
+uniform usampler2DArray grad_tex;  // Also contains the corresponding reference image.
+uniform vec2 inv_image_size, inv_prev_level_size;
+uniform uint patch_size;
+uniform uint num_iterations;
+
+vec3 unpack_gradients(uint v)
+{
+       uint vi = v & 0xffu;
+       uint xi = (v >> 8) & 0xfffu;
+       uint yi = v >> 20;
+       vec3 r = vec3(xi * (1.0f / 4095.0f) - 0.5f, yi * (1.0f / 4095.0f) - 0.5f, vi * (1.0f / 255.0f));
+       return r;
+}
+
+// Note: The third variable is the actual pixel value.
+vec3 get_gradients(vec3 tc)
+{
+       vec3 grad = unpack_gradients(texture(grad_tex, tc).x);
+
+       // Zero gradients outside the image. (We'd do this with a sampler,
+       // but we want the repeat behavior for the actual texels, in the
+       // z channel.)
+       if (any(lessThan(tc.xy, vec2(0.0f))) || any(greaterThan(tc.xy, vec2(1.0f)))) {
+               grad.xy = vec2(0.0f);
+       }
+
+       return grad;
+}
+
+void main()
+{
+       vec2 image_size = textureSize(grad_tex, 0).xy;
+
+       // Lock the patch center to an integer, so that we never get
+       // any bilinear artifacts for the gradient. (NOTE: This assumes an
+       // even patch size.) Then calculate the bottom-left texel of the patch.
+       vec2 base = (round(patch_center * image_size) - (0.5f * patch_size - 0.5f))
+               * inv_image_size;
+
+       // First, precompute the pseudo-Hessian for the template patch.
+       // This is the part where we really save by the inverse search
+       // (ie., we can compute it up-front instead of anew for each
+       // patch).
+       //
+       //  H = sum(S^T S)
+       //
+       // where S is the gradient at each point in the patch. Note that
+       // this is an outer product, so we get a (symmetric) 2x2 matrix,
+       // not a scalar.
+       mat2 H = mat2(0.0f);
+       vec2 grad_sum = vec2(0.0f);  // Used for patch normalization.
+       float template_sum = 0.0f;
+       for (uint y = 0; y < patch_size; ++y) {
+               for (uint x = 0; x < patch_size; ++x) {
+                       vec2 tc = base + uvec2(x, y) * inv_image_size;
+                       vec3 grad = get_gradients(vec3(tc, ref_layer));
+                       H[0][0] += grad.x * grad.x;
+                       H[1][1] += grad.y * grad.y;
+                       H[0][1] += grad.x * grad.y;
+
+                       template_sum += grad.z;  // The actual template pixel value.
+                       grad_sum += grad.xy;
+               }
+       }
+       H[1][0] = H[0][1];
+
+       // Make sure we don't get a singular matrix even if e.g. the picture is
+       // all black. (The paper doesn't mention this, but the reference code
+       // does it, and it seems like a reasonable hack to avoid NaNs. With such
+       // a H, we'll go out-of-bounds pretty soon, though.)
+       if (determinant(H) < 1e-6) {
+               H[0][0] += 1e-6;
+               H[1][1] += 1e-6;
+       }
+
+       mat2 H_inv = inverse(H);
+
+       // Fetch the initial guess for the flow, and convert from the previous size to this one.
+       vec2 initial_u = texture(flow_tex, flow_tc).xy * (image_size * inv_prev_level_size);
+       vec2 u = initial_u;
+       float mean_diff, first_mean_diff;
+
+       for (uint i = 0; i < num_iterations; ++i) {
+               vec2 du = vec2(0.0, 0.0);
+               float warped_sum = 0.0f;
+               vec2 u_norm = u * inv_image_size;  // In [0..1] coordinates instead of pixels.
+               for (uint y = 0; y < patch_size; ++y) {
+                       for (uint x = 0; x < patch_size; ++x) {
+                               vec2 tc = base + uvec2(x, y) * inv_image_size;
+                               vec3 grad = get_gradients(vec3(tc, ref_layer));
+                               float t = grad.z;
+                               float warped = texture(image_tex, vec3(tc + u_norm, search_layer)).x;
+                               du += grad.xy * (warped - t);
+                               warped_sum += warped;
+                       }
+               }
+
+               // Subtract the mean for patch normalization. We've done our
+               // sums without subtracting the means (because we didn't know them
+               // beforehand), ie.:
+               //
+               //   sum(S^T * ((x + µ1) - (y + µ2))) = sum(S^T * (x - y)) + (µ1 – µ2) sum(S^T)
+               //
+               // which gives trivially
+               //
+               //   sum(S^T * (x - y)) = [what we calculated] - (µ1 - µ2) sum(S^T)
+               //
+               // so we can just subtract away the mean difference here.
+               mean_diff = (warped_sum - template_sum) * (1.0 / float(patch_size * patch_size));
+               du -= grad_sum * mean_diff;
+
+               if (i == 0) {
+                       first_mean_diff = mean_diff;
+               }
+
+               // Do the actual update.
+               u -= H_inv * du;
+       }
+
+       // Reject if we moved too far. Note that the paper says “too far” is the
+       // patch size, but the DIS code uses half of a patch size. The latter seems
+       // to give much better overall results.
+       //
+       // Also reject if the patch goes out-of-bounds (the paper does not mention this,
+       // but the code does, and it seems to be critical to avoid really bad behavior
+       // at the edges).
+       vec2 patch_center = (base * image_size - 0.5f) + patch_size * 0.5f + u;
+       if (length(u - initial_u) > (patch_size * 0.5f) ||
+           patch_center.x < -(patch_size * 0.5f) ||
+           image_size.x - patch_center.x < -(patch_size * 0.5f) ||
+           patch_center.y < -(patch_size * 0.5f) ||
+           image_size.y - patch_center.y < -(patch_size * 0.5f)) {
+               u = initial_u;
+               mean_diff = first_mean_diff;
+       }
+
+       // NOTE: The mean patch diff will be for the second-to-last patch,
+       // not the true position of du. But hopefully, it will be very close.
+       u *= inv_image_size;
+       out_flow = vec3(u.x, u.y, mean_diff);
+}
diff --git a/futatabi/motion_search.vert b/futatabi/motion_search.vert
new file mode 100644 (file)
index 0000000..d023276
--- /dev/null
@@ -0,0 +1,47 @@
+#version 450 core
+#extension GL_ARB_shader_viewport_layer_array : require
+
+layout(location=0) in vec2 position;
+out vec3 flow_tc;
+out vec2 patch_center;
+flat out int ref_layer, search_layer;
+
+uniform sampler2DArray flow_tex;
+uniform vec2 out_flow_size;
+
+void main()
+{
+       // Patch placement: We want the outermost patches to have centers exactly in the
+       // image corners, so that the bottom-left patch has centre (0,0) and the
+       // upper-right patch has center (1,1). The position we get in is _almost_ there;
+       // since the quad's corners are in (0,0) and (1,1), the fragment shader will get
+       // centers in x=0.5/w, x=1.5/w and so on (and similar for y).
+       //
+       // In other words, find some f(x) = ax + b so that
+       //
+       //   a 0.5 / w + b = 0
+       //   a (1.0 - 0.5 / w) + b = 1
+       //
+       // which gives
+       //
+       //   a = 1 / (w - 1)
+       //   b = w / 2 (w - 1)
+       vec2 a = out_flow_size / (out_flow_size - 1);
+       vec2 b = -1.0 / (2 * (out_flow_size - 1.0));
+       patch_center = a * position + b;
+
+       // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+       //
+       //   2.000  0.000  0.000 -1.000
+       //   0.000  2.000  0.000 -1.000
+       //   0.000  0.000 -2.000 -1.000
+       //   0.000  0.000  0.000  1.000
+       gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
+       flow_tc = vec3(position, gl_InstanceID);
+
+       gl_Layer = gl_InstanceID;
+
+       // Forward flow (0) goes from 0 to 1. Backward flow (1) goes from 1 to 0.
+       ref_layer = gl_InstanceID;
+       search_layer = 1 - gl_InstanceID;
+}
diff --git a/futatabi/player.cpp b/futatabi/player.cpp
new file mode 100644 (file)
index 0000000..b88aa56
--- /dev/null
@@ -0,0 +1,451 @@
+#include "player.h"
+
+#include "clip_list.h"
+#include "shared/context.h"
+#include "defs.h"
+#include "shared/ffmpeg_raii.h"
+#include "frame_on_disk.h"
+#include "shared/httpd.h"
+#include "jpeg_frame_view.h"
+#include "shared/mux.h"
+#include "shared/timebase.h"
+#include "video_stream.h"
+
+#include <algorithm>
+#include <chrono>
+#include <condition_variable>
+#include <movit/util.h>
+#include <mutex>
+#include <stdio.h>
+#include <thread>
+#include <vector>
+
+using namespace std;
+using namespace std::chrono;
+
+extern HTTPD *global_httpd;
+
+void Player::thread_func(bool also_output_to_stream)
+{
+       pthread_setname_np(pthread_self(), "Player");
+
+       QSurface *surface = create_surface();
+       QOpenGLContext *context = create_context(surface);
+       if (!make_current(context, surface)) {
+               printf("oops\n");
+               exit(1);
+       }
+
+       check_error();
+
+       // Create the VideoStream object, now that we have an OpenGL context.
+       if (also_output_to_stream) {
+               video_stream.reset(new VideoStream);
+               video_stream->start();
+       }
+
+       check_error();
+
+       constexpr double output_framerate = 60000.0 / 1001.0;  // FIXME: make configurable
+       int64_t pts = 0;
+       Clip next_clip;
+       size_t next_clip_idx = size_t(-1);
+       bool got_next_clip = false;
+       double next_clip_fade_time = -1.0;
+
+       for ( ;; ) {
+wait_for_clip:
+               bool clip_ready;
+               steady_clock::time_point before_sleep = steady_clock::now();
+
+               // Wait until we're supposed to play something.
+               {
+                       unique_lock<mutex> lock(queue_state_mu);
+                       clip_ready = new_clip_changed.wait_for(lock, milliseconds(100), [this] {
+                               return new_clip_ready && current_clip.pts_in != -1;
+                       });
+                       new_clip_ready = false;
+                       playing = true;
+               }
+
+               steady_clock::duration time_slept = steady_clock::now() - before_sleep;
+               pts += duration_cast<duration<size_t, TimebaseRatio>>(time_slept).count();
+
+               if (!clip_ready) {
+                       if (video_stream != nullptr) {
+                               video_stream->schedule_refresh_frame(steady_clock::now(), pts, /*display_func=*/nullptr, QueueSpotHolder());
+                       }
+                       continue;
+               }
+
+               Clip clip;
+               size_t clip_idx;
+               unsigned stream_idx;
+               {
+                       lock_guard<mutex> lock(mu);
+                       clip = current_clip;
+                       clip_idx = current_clip_idx;
+                       stream_idx = current_stream_idx;
+               }
+               steady_clock::time_point origin = steady_clock::now();  // TODO: Add a 100 ms buffer for ramp-up?
+               int64_t in_pts_origin = clip.pts_in;
+got_clip:
+               int64_t out_pts_origin = pts;
+
+               // Start playing exactly at a frame.
+               // TODO: Snap secondary (fade-to) clips in the same fashion
+               // so that we don't get jank here).
+               {
+                       lock_guard<mutex> lock(frame_mu);
+
+                       // Find the first frame such that frame.pts <= in_pts.
+                       auto it = lower_bound(frames[stream_idx].begin(),
+                               frames[stream_idx].end(),
+                               in_pts_origin,
+                               [](const FrameOnDisk &frame, int64_t pts) { return frame.pts < pts; });
+                       if (it != frames[stream_idx].end()) {
+                               in_pts_origin = it->pts;
+                       }
+               }
+
+               // TODO: Lock to a rational multiple of the frame rate if possible.
+               double speed = 0.5;
+
+               int64_t in_pts_start_next_clip = -1;
+               for (int frameno = 0; ; ++frameno) {  // Ends when the clip ends.
+                       double out_pts = out_pts_origin + TIMEBASE * frameno / output_framerate;
+                       steady_clock::time_point next_frame_start =
+                               origin + microseconds(lrint((out_pts - out_pts_origin) * 1e6 / TIMEBASE));
+                       int64_t in_pts = lrint(in_pts_origin + TIMEBASE * frameno * speed / output_framerate);
+                       pts = lrint(out_pts);
+
+                       if (in_pts >= clip.pts_out) {
+                               break;
+                       }
+
+                       steady_clock::duration time_behind = steady_clock::now() - next_frame_start;
+                       if (time_behind >= milliseconds(200)) {
+                               fprintf(stderr, "WARNING: %ld ms behind, dropping a frame (no matter the type).\n",
+                                       lrint(1e3 * duration<double>(time_behind).count()));
+                               continue;
+                       }
+
+                       double time_left_this_clip = double(clip.pts_out - in_pts) / TIMEBASE / speed;
+                       if (!got_next_clip && next_clip_callback != nullptr && time_left_this_clip <= clip.fade_time_seconds) {
+                               // Find the next clip so that we can begin a fade.
+                               tie(next_clip, next_clip_idx) = next_clip_callback();
+                               if (next_clip.pts_in != -1) {
+                                       got_next_clip = true;
+
+                                       double duration_next_clip = (next_clip.pts_out - next_clip.pts_in) / TIMEBASE / speed;
+                                       next_clip_fade_time = std::min(time_left_this_clip, duration_next_clip);
+                                       in_pts_start_next_clip = next_clip.pts_in + lrint(next_clip_fade_time * TIMEBASE * speed);
+                               }
+                       }
+
+                       // pts not affected by the swapping below.
+                       int64_t in_pts_for_progress = in_pts, in_pts_secondary_for_progress = -1;
+
+                       int primary_stream_idx = stream_idx;
+                       FrameOnDisk secondary_frame;
+                       int secondary_stream_idx = -1;
+                       float fade_alpha = 0.0f;
+                       if (got_next_clip && time_left_this_clip <= next_clip_fade_time) {
+                               secondary_stream_idx = next_clip.stream_idx;
+                               int64_t in_pts_secondary = lrint(next_clip.pts_in + (next_clip_fade_time - time_left_this_clip) * TIMEBASE * speed);
+                               in_pts_secondary_for_progress = in_pts_secondary;
+                               fade_alpha = 1.0f - time_left_this_clip / next_clip_fade_time;
+
+                               // If more than half-way through the fade, interpolate the next clip
+                               // instead of the current one, since it's more visible.
+                               if (fade_alpha >= 0.5f) {
+                                       swap(primary_stream_idx, secondary_stream_idx);
+                                       swap(in_pts, in_pts_secondary);
+                                       fade_alpha = 1.0f - fade_alpha;
+                               }
+
+                               FrameOnDisk frame_lower, frame_upper;
+                               bool ok = find_surrounding_frames(in_pts_secondary, secondary_stream_idx, &frame_lower, &frame_upper);
+                               if (ok) {
+                                       secondary_frame = frame_lower;
+                               }
+                       }
+
+                       if (progress_callback != nullptr) {
+                               // NOTE: None of this will take into account any snapping done below.
+                               double played_this_clip = double(in_pts_for_progress - clip.pts_in) / TIMEBASE / speed;
+                               double total_length = double(clip.pts_out - clip.pts_in) / TIMEBASE / speed;
+                               map<size_t, double> progress{{ clip_idx, played_this_clip / total_length }};
+
+                               if (got_next_clip && time_left_this_clip <= next_clip_fade_time) {
+                                       double played_next_clip = double(in_pts_secondary_for_progress - next_clip.pts_in) / TIMEBASE / speed;
+                                       double total_next_length = double(next_clip.pts_out - next_clip.pts_in) / TIMEBASE / speed;
+                                       progress[next_clip_idx] = played_next_clip / total_next_length;
+                               }
+                               progress_callback(progress);
+                       }
+
+                       FrameOnDisk frame_lower, frame_upper;
+                       bool ok = find_surrounding_frames(in_pts, primary_stream_idx, &frame_lower, &frame_upper);
+                       if (!ok) {
+                               break;
+                       }
+
+                       {
+                               unique_lock<mutex> lock(queue_state_mu);
+                               if (video_stream == nullptr) {
+                                       // No queue, just wait until the right time and then show the frame.
+                                       new_clip_changed.wait_until(lock, next_frame_start, [this]{
+                                               return new_clip_ready || override_stream_idx != -1;
+                                       });
+                               } else {
+                                       // If the queue is full (which is really the state we'd like to be in),
+                                       // wait until there's room for one more frame (ie., one was output from
+                                       // VideoStream), or until or until there's a new clip we're supposed to play.
+                                       //
+                                       // In this case, we don't sleep until next_frame_start; the displaying is
+                                       // done by the queue.
+                                       new_clip_changed.wait(lock, [this]{
+                                               if (num_queued_frames < max_queued_frames) {
+                                                       return true;
+                                               }
+                                               return new_clip_ready || override_stream_idx != -1;
+                                       });
+                               }
+                               if (new_clip_ready) {
+                                       if (video_stream != nullptr) {
+                                               lock.unlock();  // Urg.
+                                               video_stream->clear_queue();
+                                               lock.lock();
+                                       }
+                                       got_next_clip = false;
+                                       goto wait_for_clip;
+                               }
+                               if (override_stream_idx != -1) {
+                                       stream_idx = override_stream_idx;
+                                       override_stream_idx = -1;
+                                       continue;
+                               }
+                       }
+
+                       if (frame_lower.pts == frame_upper.pts) {
+                               auto display_func = [this, primary_stream_idx, frame_lower, secondary_frame, fade_alpha]{
+                                       destination->setFrame(primary_stream_idx, frame_lower, secondary_frame, fade_alpha);
+                               };
+                               if (video_stream == nullptr) {
+                                       display_func();
+                               } else {
+                                       if (secondary_stream_idx == -1) {
+                                               video_stream->schedule_original_frame(
+                                                       next_frame_start, pts, display_func, QueueSpotHolder(this),
+                                                       frame_lower);
+                                       } else {
+                                               assert(secondary_frame.pts != -1);
+                                               video_stream->schedule_faded_frame(next_frame_start, pts, display_func,
+                                                       QueueSpotHolder(this), frame_lower,
+                                                       secondary_frame, fade_alpha);
+                                       }
+                               }
+                               continue;
+                       }
+
+                       // Snap to input frame: If we can do so with less than 1% jitter
+                       // (ie., move less than 1% of an _output_ frame), do so.
+                       // TODO: Snap secondary (fade-to) clips in the same fashion.
+                       bool snapped = false;
+                       for (int64_t snap_pts : { frame_lower.pts, frame_upper.pts }) {
+                               double snap_pts_as_frameno = (snap_pts - in_pts_origin) * output_framerate / TIMEBASE / speed;
+                               if (fabs(snap_pts_as_frameno - frameno) < 0.01) {
+                                       FrameOnDisk snap_frame = frame_lower;
+                                       snap_frame.pts = snap_pts;
+                                       auto display_func = [this, primary_stream_idx, snap_frame, secondary_frame, fade_alpha]{
+                                               destination->setFrame(primary_stream_idx, snap_frame, secondary_frame, fade_alpha);
+                                       };
+                                       if (video_stream == nullptr) {
+                                               display_func();
+                                       } else {
+                                               if (secondary_stream_idx == -1) {
+                                                       video_stream->schedule_original_frame(
+                                                               next_frame_start, pts, display_func,
+                                                               QueueSpotHolder(this), snap_frame);
+                                               } else {
+                                                       assert(secondary_frame.pts != -1);
+                                                       video_stream->schedule_faded_frame(
+                                                               next_frame_start, pts, display_func, QueueSpotHolder(this),
+                                                               snap_frame, secondary_frame, fade_alpha);
+                                               }
+                                       }
+                                       in_pts_origin += snap_pts - in_pts;
+                                       snapped = true;
+                                       break;
+                               }
+                       }
+                       if (snapped) {
+                               continue;
+                       }
+
+                       if (time_behind >= milliseconds(100)) {
+                               fprintf(stderr, "WARNING: %ld ms behind, dropping an interpolated frame.\n",
+                                       lrint(1e3 * duration<double>(time_behind).count()));
+                               continue;
+                       }
+
+                       double alpha = double(in_pts - frame_lower.pts) / (frame_upper.pts - frame_lower.pts);
+
+                       if (video_stream == nullptr) {
+                               // Previews don't do any interpolation.
+                               assert(secondary_stream_idx == -1);
+                               destination->setFrame(primary_stream_idx, frame_lower);
+                       } else {
+                               auto display_func = [this](shared_ptr<Frame> frame) {
+                                       destination->setFrame(frame);
+                               };
+                               video_stream->schedule_interpolated_frame(
+                                       next_frame_start, pts, display_func, QueueSpotHolder(this),
+                                       frame_lower, frame_upper, alpha,
+                                       secondary_frame, fade_alpha);
+                       }
+               }
+
+               // The clip ended.
+
+               // Last-ditch effort to get the next clip (if e.g. the fade time was zero seconds).
+               if (!got_next_clip && next_clip_callback != nullptr) {
+                       tie(next_clip, next_clip_idx) = next_clip_callback();
+                       if (next_clip.pts_in != -1) {
+                               got_next_clip = true;
+                               in_pts_start_next_clip = next_clip.pts_in;
+                       }
+               }
+
+               // Switch to next clip if we got it.
+               if (got_next_clip) {
+                       clip = next_clip;
+                       clip_idx = next_clip_idx;
+                       stream_idx = next_clip.stream_idx;  // Override is used for previews only, and next_clip is used for live ony.
+                       if (done_callback != nullptr) {
+                               done_callback();
+                       }
+                       got_next_clip = false;
+
+                       // Start the next clip from the point where the fade went out.
+                       origin = steady_clock::now();
+                       in_pts_origin = in_pts_start_next_clip;
+                       goto got_clip;
+               }
+
+               {
+                       unique_lock<mutex> lock(queue_state_mu);
+                       playing = false;
+               }
+               if (done_callback != nullptr) {
+                       done_callback();
+               }
+       }
+}
+
+// Find the frame immediately before and after this point.
+bool Player::find_surrounding_frames(int64_t pts, int stream_idx, FrameOnDisk *frame_lower, FrameOnDisk *frame_upper)
+{
+       lock_guard<mutex> lock(frame_mu);
+
+       // Find the first frame such that frame.pts >= pts.
+       auto it = lower_bound(frames[stream_idx].begin(),
+               frames[stream_idx].end(),
+               pts,
+               [](const FrameOnDisk &frame, int64_t pts) { return frame.pts < pts; });
+       if (it == frames[stream_idx].end()) {
+               return false;
+       }
+       *frame_upper = *it;
+
+       // Find the last frame such that in_pts <= frame.pts (if any).
+       if (it == frames[stream_idx].begin()) {
+               *frame_lower = *it;
+       } else {
+               *frame_lower = *(it - 1);
+       }
+       assert(pts >= frame_lower->pts);
+       assert(pts <= frame_upper->pts);
+       return true;
+}
+
+Player::Player(JPEGFrameView *destination, bool also_output_to_stream)
+       : destination(destination)
+{
+       thread(&Player::thread_func, this, also_output_to_stream).detach();
+}
+
+void Player::play_clip(const Clip &clip, size_t clip_idx, unsigned stream_idx)
+{
+       {
+               lock_guard<mutex> lock(mu);
+               current_clip = clip;
+               current_stream_idx = stream_idx;
+               current_clip_idx = clip_idx;
+       }
+
+       {
+               lock_guard<mutex> lock(queue_state_mu);
+               new_clip_ready = true;
+               override_stream_idx = -1;
+               new_clip_changed.notify_all();
+       }
+}
+
+void Player::override_angle(unsigned stream_idx)
+{
+       // Corner case: If a new clip is waiting to be played, change its stream and then we're done.
+       {
+               unique_lock<mutex> lock(queue_state_mu);
+               if (new_clip_ready) {
+                       lock_guard<mutex> lock2(mu);
+                       current_stream_idx = stream_idx;
+                       return;
+               }
+       }
+
+       // If we are playing a clip, set override_stream_idx, and the player thread will
+       // pick it up and change its internal index.
+       {
+               unique_lock<mutex> lock(queue_state_mu);
+               if (playing) {
+                       override_stream_idx = stream_idx;
+                       new_clip_changed.notify_all();
+               }
+       }
+
+       // OK, so we're standing still, presumably at the end of a clip.
+       // Look at the current pts_out (if it exists), and show the closest
+       // thing we've got.
+       int64_t pts_out;
+       {
+               lock_guard<mutex> lock(mu);
+               if (current_clip.pts_out < 0) {
+                       return;
+               }
+               pts_out = current_clip.pts_out;
+       }
+
+       lock_guard<mutex> lock(frame_mu);
+       auto it = upper_bound(frames[stream_idx].begin(), frames[stream_idx].end(), pts_out,
+               [](int64_t pts, const FrameOnDisk &frame) { return pts < frame.pts; });
+       if (it == frames[stream_idx].end()) {
+               return;
+       }
+       destination->setFrame(stream_idx, *it);
+}
+
+void Player::take_queue_spot()
+{
+       unique_lock<mutex> lock(queue_state_mu);
+       ++num_queued_frames;
+}
+
+void Player::release_queue_spot()
+{
+       unique_lock<mutex> lock(queue_state_mu);
+       assert(num_queued_frames > 0);
+       --num_queued_frames;
+       new_clip_changed.notify_all();
+}
diff --git a/futatabi/player.h b/futatabi/player.h
new file mode 100644 (file)
index 0000000..c7f8e07
--- /dev/null
@@ -0,0 +1,82 @@
+#ifndef _PLAYER_H
+#define _PLAYER_H 1
+
+#include "clip_list.h"
+#include "frame_on_disk.h"
+#include "queue_spot_holder.h"
+
+extern "C" {
+#include <libavformat/avio.h>
+}
+
+#include <condition_variable>
+#include <functional>
+#include <mutex>
+
+class JPEGFrameView;
+class VideoStream;
+class QSurface;
+class QSurfaceFormat;
+
+class Player : public QueueInterface {
+public:
+       Player(JPEGFrameView *destination, bool also_output_to_stream);
+
+       void play_clip(const Clip &clip, size_t clip_idx, unsigned stream_idx);
+       void override_angle(unsigned stream_idx);  // For the current clip only.
+
+       // Not thread-safe to set concurrently with playing.
+       // Will be called back from the player thread.
+       using done_callback_func = std::function<void()>;
+       void set_done_callback(done_callback_func cb) { done_callback = cb; }
+
+       // Not thread-safe to set concurrently with playing.
+       // Will be called back from the player thread.
+       // The second parameter is the clip's position in the play list.
+       using next_clip_callback_func = std::function<std::pair<Clip, size_t>()>;
+       void set_next_clip_callback(next_clip_callback_func cb) { next_clip_callback = cb; }
+
+       // Not thread-safe to set concurrently with playing.
+       // Will be called back from the player thread.
+       using progress_callback_func = std::function<void(const std::map<size_t, double> &progress)>;
+       void set_progress_callback(progress_callback_func cb) { progress_callback = cb; }
+
+       // QueueInterface.
+       void take_queue_spot() override;
+       void release_queue_spot() override;
+
+private:
+       void thread_func(bool also_output_to_stream);
+       void open_output_stream();
+       static int write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
+       int write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
+
+       // Find the frame immediately before and after this point.
+       // Returns false if pts is after the last frame.
+       bool find_surrounding_frames(int64_t pts, int stream_idx, FrameOnDisk *frame_lower, FrameOnDisk *frame_upper);
+
+       JPEGFrameView *destination;
+       done_callback_func done_callback;
+       next_clip_callback_func next_clip_callback;
+       progress_callback_func progress_callback;
+
+       std::mutex mu;
+       Clip current_clip;  // Under mu. Can have pts_in = -1 for no clip.
+       size_t current_clip_idx;  // Under mu.
+       unsigned current_stream_idx;  // Under mu.
+
+       std::mutex queue_state_mu;
+       std::condition_variable new_clip_changed;
+       bool new_clip_ready = false;  // Under queue_state_mu.
+       bool playing = false;  // Under queue_state_mu.
+       int override_stream_idx = -1;  // Under queue_state_mu.
+
+       std::unique_ptr<VideoStream> video_stream;  // Can be nullptr.
+
+       // under queue_state_mu. Part of this instead of VideoStream so that we own
+       // its lock and can sleep on it.
+       size_t num_queued_frames = 0;
+       static constexpr size_t max_queued_frames = 10;
+};
+
+#endif  // !defined(_PLAYER_H)
diff --git a/futatabi/prewarp.frag b/futatabi/prewarp.frag
new file mode 100644 (file)
index 0000000..baf24d1
--- /dev/null
@@ -0,0 +1,22 @@
+#version 450 core
+
+// Warps I_1 according to the flow, then computes the mean and difference to I_0.
+
+in vec3 tc;
+out float I, I_t;
+out vec2 normalized_flow;
+
+uniform sampler2DArray image_tex, flow_tex;
+
+void main()
+{
+       vec3 flow = texture(flow_tex, tc).xyz;
+       flow.xy /= flow.z;  // Normalize the sum coming out of the densification.
+
+       float I_0 = texture(image_tex, tc).x;
+       float I_w = texture(image_tex, vec3(tc.xy + flow.xy, 1.0f - tc.z)).x;  // NOTE: This is effectively a reverse warp since texture() is a gather operation and flow is conceptually scatter.
+
+       I = 0.5f * (I_0 + I_w);
+       I_t = I_w - I_0;
+       normalized_flow = flow.xy * textureSize(image_tex, 0).xy;
+}
diff --git a/futatabi/queue_spot_holder.h b/futatabi/queue_spot_holder.h
new file mode 100644 (file)
index 0000000..b9dee06
--- /dev/null
@@ -0,0 +1,46 @@
+#ifndef _QUEUE_SPOT_HOLDER
+#define _QUEUE_SPOT_HOLDER 1
+
+// A RAII class to hold a shared resource, in our case an (unordered!) spot in a queue,
+// for as long as a frame is under computation.
+
+class QueueInterface {
+public:
+       virtual ~QueueInterface() {}
+       virtual void take_queue_spot() = 0;
+       virtual void release_queue_spot() = 0;
+};
+
+class QueueSpotHolder {
+public:
+       QueueSpotHolder() : queue(nullptr) {}
+
+       explicit QueueSpotHolder(QueueInterface *queue) : queue(queue) {
+               queue->take_queue_spot();
+       }
+
+       QueueSpotHolder(QueueSpotHolder &&other) : queue(other.queue) {
+               other.queue = nullptr;
+       }
+
+       QueueSpotHolder &operator=(QueueSpotHolder &&other) {
+               queue = other.queue;
+               other.queue = nullptr;
+               return *this;
+       }
+
+       ~QueueSpotHolder() {
+               if (queue != nullptr) {
+                       queue->release_queue_spot();
+               }
+       }
+
+       // Movable only.
+       QueueSpotHolder(QueueSpotHolder &) = delete;
+       QueueSpotHolder &operator=(QueueSpotHolder &) = delete;
+
+private:
+       QueueInterface *queue;  
+};
+
+#endif // !defined(_QUEUE_SPOT_HOLDER)
diff --git a/futatabi/resize_flow.frag b/futatabi/resize_flow.frag
new file mode 100644 (file)
index 0000000..4efc975
--- /dev/null
@@ -0,0 +1,12 @@
+#version 450 core
+
+in vec3 tc;
+out vec2 flow;
+
+uniform sampler2DArray flow_tex;
+uniform vec2 scale_factor;
+
+void main()
+{
+       flow = texture(flow_tex, tc).xy * scale_factor;
+}
diff --git a/futatabi/sobel.frag b/futatabi/sobel.frag
new file mode 100644 (file)
index 0000000..8c5c6ee
--- /dev/null
@@ -0,0 +1,64 @@
+#version 450 core
+
+in vec3 tc;
+out uint packed_gradients;
+
+uniform sampler2DArray tex;
+
+uint pack_gradients(float x, float y, float v)
+{
+       x = clamp(x, -0.5f, 0.5f);
+       y = clamp(y, -0.5f, 0.5f);
+
+       uint vi = uint(round(v * 255.0f));
+       uint xi = uint(round((x + 0.5f) * 4095.0f));
+       uint yi = uint(round((y + 0.5f) * 4095.0f));
+       return vi | (xi << 8) | (yi << 20);
+}
+
+void main()
+{
+       // There are two common Sobel filters, horizontal and vertical
+       // (see e.g. Wikipedia, or the OpenCV documentation):
+       //
+       //  [1 0 -1]     [-1 -2 -1]
+       //  [2 0 -2]     [ 0  0  0]
+       //  [1 0 -1]     [ 1  2  1]
+       // Horizontal     Vertical
+       //
+       // Note that Wikipedia and OpenCV gives entirely opposite definitions
+       // with regards to sign! This appears to be an error in the OpenCV
+       // documentation, forgetting that for convolution, the filters must be
+       // flipped. We have to flip the vertical matrix again comparing to
+       // Wikipedia, though, since we have bottom-left origin (y = up)
+       // and they define y as pointing downwards.
+       //
+       // Computing both directions at once allows us to get away with eight
+       // texture samples instead of twelve.
+
+       float top_left     = textureOffset(tex, tc, ivec2(-1,  1)).x;  // Note the bottom-left coordinate system.
+       float left         = textureOffset(tex, tc, ivec2(-1,  0)).x;
+       float bottom_left  = textureOffset(tex, tc, ivec2(-1, -1)).x;
+
+       float top          = textureOffset(tex, tc, ivec2( 0,  1)).x;
+       float bottom       = textureOffset(tex, tc, ivec2( 0, -1)).x;
+
+       float top_right    = textureOffset(tex, tc, ivec2( 1,  1)).x;
+       float right        = textureOffset(tex, tc, ivec2( 1,  0)).x;
+       float bottom_right = textureOffset(tex, tc, ivec2( 1, -1)).x;
+
+       vec2 gradients;
+       gradients.x = (top_right + 2.0f * right + bottom_right) - (top_left + 2.0f * left + bottom_left);
+       gradients.y = (top_left + 2.0 * top + top_right) - (bottom_left + 2.0f * bottom + bottom_right);
+
+       // Normalize so that we have a normalized unit of intensity levels per pixel.
+       gradients.x *= 0.125;
+       gradients.y *= 0.125;
+
+       // Also store the actual pixel value, so that we get it “for free”
+       // when we sample the gradients in motion_search.frag later.
+       float center = texture(tex, tc).x;
+
+       // Pack everything into a single 32-bit value, using simple fixed-point.
+       packed_gradients = pack_gradients(gradients.x, gradients.y, center);
+}
diff --git a/futatabi/sor.frag b/futatabi/sor.frag
new file mode 100644 (file)
index 0000000..9a8e1e4
--- /dev/null
@@ -0,0 +1,103 @@
+#version 450 core
+
+in vec3 tc, tc_left, tc_down;
+in vec3 equation_tc_assuming_left, equation_tc_assuming_right;
+in float element_x_idx, element_sum_idx;
+out vec2 diff_flow;
+
+uniform sampler2DArray diff_flow_tex, diffusivity_tex;
+uniform usampler2DArray equation_red_tex, equation_black_tex;
+uniform int phase;
+
+uniform int num_nonzero_phases;
+
+// See pack_floats_shared() in equations.frag.
+vec2 unpack_floats_shared(uint c)
+{
+       // Recover the exponent, and multiply it in. Add one because
+       // we have denormalized mantissas, then another one because we
+       // already reduced the exponent by one. Then subtract 20, because
+       // we are going to shift up the number by 20 below to recover the sign bits.
+       float normalizer = uintBitsToFloat(((c >> 1) & 0x7f800000u) - (18 << 23));
+       normalizer *= (1.0 / 2047.0);
+
+       // Shift the values up so that we recover the sign bit, then normalize.
+       float a = int(uint(c & 0x000fffu) << 20) * normalizer;
+       float b = int(uint(c & 0xfff000u) << 8) * normalizer;
+
+       return vec2(a, b);
+}
+
+float zero_if_outside_border(vec4 val)
+{
+       if (val.w < 1.0f) {
+               // We hit the border (or more like half-way to it), so zero smoothness.
+               return 0.0f;
+       } else {
+               return val.x;
+       }
+}
+
+void main()
+{
+       // Red-black SOR: Every other pass, we update every other element in a
+       // checkerboard pattern. This is rather suboptimal for the GPU, as it
+       // just immediately throws away half of the warp, but it helps convergence
+       // a _lot_ (rough testing indicates that five iterations of SOR is as good
+       // as ~50 iterations of Jacobi). We could probably do better by reorganizing
+       // the data into two-values-per-pixel, so-called “twinned buffering”;
+       // seemingly, it helps Haswell by ~15% on the SOR code, but GTX 950 not at all
+       // (at least not on 720p). Presumably the latter is already bandwidth bound.
+       int color = int(round(element_sum_idx)) & 1;
+       if (color != phase) discard;
+
+       uvec4 equation;
+       vec3 equation_tc;
+       if ((int(round(element_x_idx)) & 1) == 0) {
+               equation_tc = equation_tc_assuming_left;
+       } else {
+               equation_tc = equation_tc_assuming_right;
+       }
+       if (phase == 0) {
+               equation = texture(equation_red_tex, equation_tc);
+       } else {
+               equation = texture(equation_black_tex, equation_tc);
+       }
+       float inv_A11 = uintBitsToFloat(equation.x);
+       float A12 = uintBitsToFloat(equation.y);
+       float inv_A22 = uintBitsToFloat(equation.z);
+       vec2 b = unpack_floats_shared(equation.w);
+
+       const float omega = 1.8;  // Marginally better than 1.6, it seems.
+
+       if (num_nonzero_phases == 0) {
+               // Simplified version of the code below, assuming diff_flow == 0.0f everywhere.
+               diff_flow.x = omega * b.x * inv_A11;
+               diff_flow.y = omega * b.y * inv_A22;
+       } else {
+               // Subtract the missing terms from the right-hand side
+               // (it couldn't be done earlier, because we didn't know
+               // the values of the neighboring pixels; they change for
+               // each SOR iteration).
+               float smooth_l = zero_if_outside_border(texture(diffusivity_tex, tc_left));
+               float smooth_r = zero_if_outside_border(textureOffset(diffusivity_tex, tc_left, ivec2(1, 0)));
+               float smooth_d = zero_if_outside_border(texture(diffusivity_tex, tc_down));
+               float smooth_u = zero_if_outside_border(textureOffset(diffusivity_tex, tc_down, ivec2(0, 1)));
+               b += smooth_l * textureOffset(diff_flow_tex, tc, ivec2(-1,  0)).xy;
+               b += smooth_r * textureOffset(diff_flow_tex, tc, ivec2( 1,  0)).xy;
+               b += smooth_d * textureOffset(diff_flow_tex, tc, ivec2( 0, -1)).xy;
+               b += smooth_u * textureOffset(diff_flow_tex, tc, ivec2( 0,  1)).xy;
+
+               if (num_nonzero_phases == 1) {
+                       diff_flow = vec2(0.0f);
+               } else {
+                       diff_flow = texture(diff_flow_tex, tc).xy;
+               }
+
+               // From https://en.wikipedia.org/wiki/Successive_over-relaxation.
+               float sigma_u = A12 * diff_flow.y;
+               diff_flow.x += omega * ((b.x - sigma_u) * inv_A11 - diff_flow.x);
+               float sigma_v = A12 * diff_flow.x;
+               diff_flow.y += omega * ((b.y - sigma_v) * inv_A22 - diff_flow.y);
+       }
+}
diff --git a/futatabi/sor.vert b/futatabi/sor.vert
new file mode 100644 (file)
index 0000000..c68b1db
--- /dev/null
@@ -0,0 +1,43 @@
+#version 450 core
+#extension GL_ARB_shader_viewport_layer_array : require
+
+layout(location=0) in vec2 position;
+out vec3 tc, tc_left, tc_down;
+out vec3 equation_tc_assuming_left, equation_tc_assuming_right;
+out float element_x_idx;
+out float element_sum_idx;
+
+uniform sampler2DArray diff_flow_tex, diffusivity_tex;
+uniform usampler2DArray equation_red_tex;
+
+void main()
+{
+       // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+       //
+       //   2.000  0.000  0.000 -1.000
+       //   0.000  2.000  0.000 -1.000
+       //   0.000  0.000 -2.000 -1.000
+       //   0.000  0.000  0.000  1.000
+       gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
+       gl_Layer = gl_InstanceID;
+
+       tc = vec3(position, gl_InstanceID);
+       tc_left = vec3(tc.x - 0.5f / textureSize(diffusivity_tex, 0).x, tc.y, gl_InstanceID);
+       tc_down = vec3(tc.x, tc.y - 0.5f / textureSize(diffusivity_tex, 0).y, gl_InstanceID);
+
+       // The equation textures have half the horizontal width, so we need to adjust the texel centers.
+       // It becomes extra tricky since the SOR texture might be of odd size, and then
+       // the equation texture is not exactly half the size.
+       vec2 element_idx = position * textureSize(diff_flow_tex, 0).xy - 0.5f;
+       float equation_texel_number_assuming_left = element_idx.x / 2.0f;
+       float equation_texel_number_assuming_right = (element_idx.x - 1.0f) / 2.0f;
+       equation_tc_assuming_left.x = (equation_texel_number_assuming_left + 0.5f) / textureSize(equation_red_tex, 0).x;
+       equation_tc_assuming_right.x = (equation_texel_number_assuming_right + 0.5f) / textureSize(equation_red_tex, 0).x;
+       equation_tc_assuming_left.y = tc.y;
+       equation_tc_assuming_right.y = tc.y;
+       equation_tc_assuming_left.z = gl_InstanceID;
+       equation_tc_assuming_right.z = gl_InstanceID;
+
+       element_x_idx = element_idx.x;
+       element_sum_idx = element_idx.x + element_idx.y;
+}
diff --git a/futatabi/splat.frag b/futatabi/splat.frag
new file mode 100644 (file)
index 0000000..6e873bc
--- /dev/null
@@ -0,0 +1,18 @@
+#version 450 core
+
+in vec2 image_pos;
+flat in vec2 flow, I_0_check_offset, I_1_check_offset;
+out vec2 out_flow;
+
+uniform sampler2DArray gray_tex;
+
+void main()
+{
+       out_flow = flow;
+
+       // TODO: Check if we are sampling out-of-image.
+       float I_0 = texture(gray_tex, vec3(image_pos + I_0_check_offset, 0)).r;
+       float I_1 = texture(gray_tex, vec3(image_pos + I_1_check_offset, 1)).r;
+       float diff = abs(I_1 - I_0);
+       gl_FragDepth = 0.125 * diff.x;  // Make sure we stay well under the 1.0 maximum.
+}
diff --git a/futatabi/splat.vert b/futatabi/splat.vert
new file mode 100644 (file)
index 0000000..0846231
--- /dev/null
@@ -0,0 +1,51 @@
+#version 450 core
+
+layout(location=0) in vec2 position;
+out vec2 image_pos;
+flat out vec2 flow, I_0_check_offset, I_1_check_offset;
+
+uniform vec2 splat_size;  // In 0..1 coordinates.
+uniform vec2 inv_flow_size;
+uniform float alpha;
+uniform sampler2DArray flow_tex;  // 0 = forward flow, 1 = backward flow.
+
+void main()
+{
+       int instance = gl_InstanceID;
+       int num_pixels_per_layer = textureSize(flow_tex, 0).x * textureSize(flow_tex, 0).y;
+       int src_layer;
+       if (instance >= num_pixels_per_layer) {
+               instance -= num_pixels_per_layer;
+               src_layer = 1;
+       } else {
+               src_layer = 0;
+       }
+       int x = instance % textureSize(flow_tex, 0).x;
+       int y = instance / textureSize(flow_tex, 0).x;
+
+       // Find out where to splat this to.
+       vec2 full_flow = texelFetch(flow_tex, ivec3(x, y, src_layer), 0).xy;
+       float splat_alpha;
+       if (src_layer == 1) {  // Reverse flow.
+               full_flow = -full_flow;
+               splat_alpha = 1.0f - alpha;
+       } else {
+               splat_alpha = alpha;
+       }
+       full_flow *= inv_flow_size;
+       
+       vec2 patch_center = (ivec2(x, y) + 0.5) * inv_flow_size + full_flow * splat_alpha;
+       image_pos = patch_center + splat_size * (position - 0.5);
+
+       flow = full_flow;
+       I_0_check_offset = full_flow * -alpha;
+       I_1_check_offset = full_flow * (1.0f - alpha);
+
+       // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+       //
+       //   2.000  0.000  0.000 -1.000
+       //   0.000  2.000  0.000 -1.000
+       //   0.000  0.000 -2.000 -1.000
+       //   0.000  0.000  0.000  1.000
+       gl_Position = vec4(2.0 * image_pos.x - 1.0, 2.0 * image_pos.y - 1.0, -1.0, 1.0);
+}
diff --git a/futatabi/state.proto b/futatabi/state.proto
new file mode 100644 (file)
index 0000000..d76bf02
--- /dev/null
@@ -0,0 +1,19 @@
+syntax = "proto3";
+
+// Corresponds to struct Clip.
+message ClipProto {
+       int64 pts_in = 1;
+       int64 pts_out = 2;
+       repeated string description = 3;
+       int64 stream_idx = 4;
+       double fade_time_seconds = 5;
+}
+
+message ClipListProto {
+       repeated ClipProto clip = 1;
+}
+
+message StateProto {
+       ClipListProto clip_list = 1;
+       ClipListProto play_list = 2;
+}
diff --git a/futatabi/util.cpp b/futatabi/util.cpp
new file mode 100644 (file)
index 0000000..061408d
--- /dev/null
@@ -0,0 +1,25 @@
+#include "util.h"
+
+#include <assert.h>
+#include <memory>
+#include <stdio.h>
+
+using namespace std;
+
+Flow read_flow(const char *filename)
+{
+       FILE *flowfp = fopen(filename, "rb");
+       uint32_t hdr, width, height;
+       fread(&hdr, sizeof(hdr), 1, flowfp);
+       fread(&width, sizeof(width), 1, flowfp);
+       fread(&height, sizeof(height), 1, flowfp);
+
+       unique_ptr<Vec2[]> flow(new Vec2[width * height]);
+       fread(flow.get(), width * height * sizeof(Vec2), 1, flowfp);
+
+       Flow ret;
+       ret.width = width;
+       ret.height = height;
+       ret.flow = move(flow);
+       return ret;
+}
diff --git a/futatabi/util.h b/futatabi/util.h
new file mode 100644 (file)
index 0000000..4a0aed8
--- /dev/null
@@ -0,0 +1,64 @@
+#ifndef _UTIL_H
+#define _UTIL_H 1
+
+#include <algorithm>
+#include <math.h>
+#include <memory>
+#include <stdint.h>
+
+struct Vec2 {
+       float du, dv;
+};
+
+struct Flow {
+       uint32_t width, height;
+       std::unique_ptr<Vec2[]> flow;
+};
+
+Flow read_flow(const char *filename);
+
+// du and dv are in pixels.
+inline void flow2rgb(float du, float dv, uint8_t *rr, uint8_t *gg, uint8_t *bb)
+{
+       float angle = atan2(dv, du);
+       float magnitude = std::min(hypot(du, dv) / 20.0, 1.0);
+
+       // HSV to RGB (from Wikipedia). Saturation is 1.
+       float c = magnitude;
+       float h = (angle + M_PI) * 6.0 / (2.0 * M_PI);
+       float X = c * (1.0 - fabs(fmod(h, 2.0) - 1.0));
+       float r = 0.0f, g = 0.0f, b = 0.0f;
+       if (h <= 1.0f) {
+               r = c;
+               g = X;
+       } else if (h <= 2.0f) {
+               r = X;
+               g = c;
+       } else if (h <= 3.0f) {
+               g = c;
+               b = X;
+       } else if (h <= 4.0f) {
+               g = X;
+               b = c;
+       } else if (h <= 5.0f) {
+               r = X;
+               b = c;
+       } else if (h <= 6.0f) {
+               r = c;
+               b = X;
+       } else {
+               // h is NaN, so black is fine.
+       }
+       float m = magnitude - c;
+       r += m;
+       g += m;
+       b += m;
+       r = std::max(std::min(r, 1.0f), 0.0f);
+       g = std::max(std::min(g, 1.0f), 0.0f);
+       b = std::max(std::min(b, 1.0f), 0.0f);
+       *rr = lrintf(r * 255.0f);
+       *gg = lrintf(g * 255.0f);
+       *bb = lrintf(b * 255.0f);
+}
+
+#endif  // !defined(_UTIL_H)
diff --git a/futatabi/vaapi_jpeg_decoder.cpp b/futatabi/vaapi_jpeg_decoder.cpp
new file mode 100644 (file)
index 0000000..2cd2f00
--- /dev/null
@@ -0,0 +1,568 @@
+#include "vaapi_jpeg_decoder.h"
+
+#include "jpeg_destroyer.h"
+#include "jpeg_frame.h"
+#include "shared/memcpy_interleaved.h"
+
+#include <X11/Xlib.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <glob.h>
+#include <jpeglib.h>
+#include <list>
+#include <mutex>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <string>
+#include <unistd.h>
+#include <va/va.h>
+#include <va/va_drm.h>
+#include <va/va_x11.h>
+
+using namespace std;
+
+static unique_ptr<VADisplayWithCleanup> va_dpy;
+static VAConfigID config_id;
+static VAImageFormat uyvy_format;
+bool vaapi_jpeg_decoding_usable = false;
+
+struct VAResources {
+       unsigned width, height;
+       VASurfaceID surface;
+       VAContextID context;
+       VAImage image;
+};
+static list<VAResources> va_resources_freelist;
+static mutex va_resources_mutex;
+
+#define CHECK_VASTATUS(va_status, func)                                 \
+    if (va_status != VA_STATUS_SUCCESS) {                               \
+        fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
+        exit(1);                                                        \
+    }
+
+#define CHECK_VASTATUS_RET(va_status, func)                             \
+    if (va_status != VA_STATUS_SUCCESS) {                               \
+        fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
+        return nullptr;                                                 \
+    }
+
+// From libjpeg (although it's of course identical between implementations).
+static const int jpeg_natural_order[DCTSIZE2] = {
+        0,  1,  8, 16,  9,  2,  3, 10,
+       17, 24, 32, 25, 18, 11,  4,  5,
+       12, 19, 26, 33, 40, 48, 41, 34,
+       27, 20, 13,  6,  7, 14, 21, 28,
+       35, 42, 49, 56, 57, 50, 43, 36,
+       29, 22, 15, 23, 30, 37, 44, 51,
+       58, 59, 52, 45, 38, 31, 39, 46,
+       53, 60, 61, 54, 47, 55, 62, 63,
+};
+
+VAResources get_va_resources(unsigned width, unsigned height)
+{
+       {
+               lock_guard<mutex> lock(va_resources_mutex);
+               for (auto it = va_resources_freelist.begin(); it != va_resources_freelist.end(); ++it) {
+                       if (it->width == width && it->height == height) {
+                               VAResources ret = *it;
+                               va_resources_freelist.erase(it);
+                               return ret;
+                       }
+               }
+       }
+
+       VAResources ret;
+
+       ret.width = width;
+       ret.height = height;
+
+       VAStatus va_status = vaCreateSurfaces(va_dpy->va_dpy, VA_RT_FORMAT_YUV422,
+               width, height,
+               &ret.surface, 1, nullptr, 0);
+       CHECK_VASTATUS(va_status, "vaCreateSurfaces");
+
+       va_status = vaCreateContext(va_dpy->va_dpy, config_id, width, height, 0, &ret.surface, 1, &ret.context);
+       CHECK_VASTATUS(va_status, "vaCreateContext");
+
+       va_status = vaCreateImage(va_dpy->va_dpy, &uyvy_format, width, height, &ret.image);
+       CHECK_VASTATUS(va_status, "vaCreateImage");
+
+       return ret;
+}
+
+void release_va_resources(VAResources resources)
+{
+       lock_guard<mutex> lock(va_resources_mutex);
+       if (va_resources_freelist.size() > 10) {
+               auto it = va_resources_freelist.end();
+               --it;
+
+               VAStatus va_status = vaDestroyImage(va_dpy->va_dpy, it->image.image_id);
+               CHECK_VASTATUS(va_status, "vaDestroyImage");
+
+               va_status = vaDestroyContext(va_dpy->va_dpy, it->context);
+               CHECK_VASTATUS(va_status, "vaDestroyContext");
+
+               va_status = vaDestroySurfaces(va_dpy->va_dpy, &it->surface, 1);
+               CHECK_VASTATUS(va_status, "vaDestroySurfaces");
+
+               va_resources_freelist.erase(it);
+       }
+
+       va_resources_freelist.push_front(resources);
+}
+
+// RAII wrapper to release VAResources on return (even on error).
+class ReleaseVAResources {
+public:
+       ReleaseVAResources(const VAResources &resources)
+               : resources(resources) {}
+       ~ReleaseVAResources()
+       {
+               if (!committed) {
+                       release_va_resources(resources);
+               }
+       }
+
+       void commit() { committed = true; }
+
+private:
+       const VAResources &resources;
+       bool committed = false;
+};
+
+VADisplayWithCleanup::~VADisplayWithCleanup()
+{
+       if (va_dpy != nullptr) {
+               vaTerminate(va_dpy);
+       }
+       if (x11_display != nullptr) {
+               XCloseDisplay(x11_display);
+       }
+       if (drm_fd != -1) {
+               close(drm_fd);
+       }
+}
+
+unique_ptr<VADisplayWithCleanup> va_open_display(const string &va_display)
+{
+       if (va_display.empty() || va_display[0] != '/') {  // An X display.
+               Display *x11_display = XOpenDisplay(va_display.empty() ? nullptr : va_display.c_str());
+               if (x11_display == nullptr) {
+                       fprintf(stderr, "error: can't connect to X server!\n");
+                       return nullptr;
+               }
+
+               unique_ptr<VADisplayWithCleanup> ret(new VADisplayWithCleanup);
+               ret->x11_display = x11_display;
+               ret->va_dpy = vaGetDisplay(x11_display);
+               if (ret->va_dpy == nullptr) {
+                       return nullptr;
+               }
+               return ret;
+       } else {  // A DRM node on the filesystem (e.g. /dev/dri/renderD128).
+               int drm_fd = open(va_display.c_str(), O_RDWR);
+               if (drm_fd == -1) {
+                       perror(va_display.c_str());
+                       return nullptr;
+               }
+               unique_ptr<VADisplayWithCleanup> ret(new VADisplayWithCleanup);
+               ret->drm_fd = drm_fd;
+               ret->va_dpy = vaGetDisplayDRM(drm_fd);
+               if (ret->va_dpy == nullptr) {
+                       return nullptr;
+               }
+               return ret;
+       }
+}
+
+unique_ptr<VADisplayWithCleanup> try_open_va(const string &va_display, string *error)
+{
+       unique_ptr<VADisplayWithCleanup> va_dpy = va_open_display(va_display);
+       if (va_dpy == nullptr) {
+               if (error)
+                       *error = "Opening VA display failed";
+               return nullptr;
+       }
+       int major_ver, minor_ver;
+       VAStatus va_status = vaInitialize(va_dpy->va_dpy, &major_ver, &minor_ver);
+       if (va_status != VA_STATUS_SUCCESS) {
+               char buf[256];
+               snprintf(buf, sizeof(buf), "vaInitialize() failed with status %d\n", va_status);
+               if (error != nullptr)
+                       *error = buf;
+               return nullptr;
+       }
+
+       int num_entrypoints = vaMaxNumEntrypoints(va_dpy->va_dpy);
+       unique_ptr<VAEntrypoint[]> entrypoints(new VAEntrypoint[num_entrypoints]);
+       if (entrypoints == nullptr) {
+               if (error != nullptr)
+                       *error = "Failed to allocate memory for VA entry points";
+               return nullptr;
+       }
+
+       vaQueryConfigEntrypoints(va_dpy->va_dpy, VAProfileJPEGBaseline, entrypoints.get(), &num_entrypoints);
+       for (int slice_entrypoint = 0; slice_entrypoint < num_entrypoints; slice_entrypoint++) {
+               if (entrypoints[slice_entrypoint] != VAEntrypointVLD) {
+                       continue;
+               }
+
+               // We found a usable decode, so return it.
+               return va_dpy;
+       }
+
+       if (error != nullptr)
+               *error = "Can't find VAEntrypointVLD for the JPEG profile";
+       return nullptr;
+}
+
+string get_usable_va_display()
+{
+       // Reduce the amount of chatter while probing,
+       // unless the user has specified otherwise.
+       bool need_env_reset = false;
+       if (getenv("LIBVA_MESSAGING_LEVEL") == nullptr) {
+               setenv("LIBVA_MESSAGING_LEVEL", "0", true);
+               need_env_reset = true;
+       }
+
+       // First try the default (ie., whatever $DISPLAY is set to).
+       unique_ptr<VADisplayWithCleanup> va_dpy = try_open_va("", nullptr);
+       if (va_dpy != nullptr) {
+               if (need_env_reset) {
+                       unsetenv("LIBVA_MESSAGING_LEVEL");
+               }
+               return "";
+       }
+
+       fprintf(stderr, "The X11 display did not expose a VA-API JPEG decoder.\n");
+
+       // Try all /dev/dri/render* in turn. TODO: Accept /dev/dri/card*, too?
+       glob_t g;
+       int err = glob("/dev/dri/renderD*", 0, nullptr, &g);
+       if (err != 0) {
+               fprintf(stderr, "Couldn't list render nodes (%s) when trying to autodetect a replacement.\n", strerror(errno));
+       } else {
+               for (size_t i = 0; i < g.gl_pathc; ++i) {
+                       string path = g.gl_pathv[i];
+                       va_dpy = try_open_va(path, nullptr);
+                       if (va_dpy != nullptr) {
+                               fprintf(stderr, "Autodetected %s as a suitable replacement; using it.\n",
+                                       path.c_str());
+                               globfree(&g);
+                               if (need_env_reset) {
+                                       unsetenv("LIBVA_MESSAGING_LEVEL");
+                               }
+                               return path;
+                       }
+               }
+       }
+
+       fprintf(stderr, "No suitable VA-API JPEG decoders were found in /dev/dri; giving up.\n");
+       fprintf(stderr, "Note that if you are using an Intel CPU with an external GPU,\n");
+       fprintf(stderr, "you may need to enable the integrated Intel GPU in your BIOS\n");
+       fprintf(stderr, "to expose Quick Sync.\n");
+       return "none";
+}
+
+void init_jpeg_vaapi()
+{
+       string dpy = get_usable_va_display();
+       if (dpy == "none") {
+               return;
+       }
+
+       va_dpy = try_open_va(dpy, nullptr);
+       if (va_dpy == nullptr) {
+               return;
+       }
+
+       VAConfigAttrib attr = { VAConfigAttribRTFormat, VA_RT_FORMAT_YUV422 };
+
+       VAStatus va_status = vaCreateConfig(va_dpy->va_dpy, VAProfileJPEGBaseline, VAEntrypointVLD,
+               &attr, 1, &config_id);
+       CHECK_VASTATUS(va_status, "vaCreateConfig");
+
+       int num_formats = vaMaxNumImageFormats(va_dpy->va_dpy);
+       assert(num_formats > 0);
+
+       unique_ptr<VAImageFormat[]> formats(new VAImageFormat[num_formats]);
+       va_status = vaQueryImageFormats(va_dpy->va_dpy, formats.get(), &num_formats);
+       CHECK_VASTATUS(va_status, "vaQueryImageFormats");
+
+       bool found = false;
+       for (int i = 0; i < num_formats; ++i) {
+               // Seemingly VA_FOURCC_422H is no good for vaGetImage(). :-/
+               if (formats[i].fourcc == VA_FOURCC_UYVY) {
+                       memcpy(&uyvy_format, &formats[i], sizeof(VAImageFormat));
+                       found = true;
+                       break;
+               }
+       }
+       if (!found) {
+               return;
+       }
+
+       fprintf(stderr, "VA-API JPEG decoding initialized.\n");
+       vaapi_jpeg_decoding_usable = true;
+}
+
+class VABufferDestroyer {
+public:
+       VABufferDestroyer(VADisplay dpy, VABufferID buf)
+               : dpy(dpy), buf(buf) {}
+
+       ~VABufferDestroyer() {
+               VAStatus va_status = vaDestroyBuffer(dpy, buf);
+               CHECK_VASTATUS(va_status, "vaDestroyBuffer");
+       }
+
+private:
+       VADisplay dpy;
+       VABufferID buf;
+};
+
+shared_ptr<Frame> decode_jpeg_vaapi(const string &jpeg)
+{
+       jpeg_decompress_struct dinfo;
+       jpeg_error_mgr jerr;
+       dinfo.err = jpeg_std_error(&jerr);
+       jpeg_create_decompress(&dinfo);
+       JPEGDestroyer destroy_dinfo(&dinfo);
+
+       jpeg_mem_src(&dinfo, reinterpret_cast<const unsigned char *>(jpeg.data()), jpeg.size());
+       jpeg_read_header(&dinfo, true);
+
+       if (dinfo.num_components != 3) {
+               fprintf(stderr, "Not a color JPEG. (%d components, Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
+                       dinfo.num_components,
+                       dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
+                       dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
+                       dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
+               return nullptr;
+       }
+       if (dinfo.comp_info[0].h_samp_factor != 2 ||
+           dinfo.comp_info[1].h_samp_factor != 1 ||
+           dinfo.comp_info[1].v_samp_factor != dinfo.comp_info[0].v_samp_factor ||
+           dinfo.comp_info[2].h_samp_factor != 1 ||
+           dinfo.comp_info[2].v_samp_factor != dinfo.comp_info[0].v_samp_factor) {
+               fprintf(stderr, "Not 4:2:2. (Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
+                       dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
+                       dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
+                       dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
+               return nullptr;
+       }
+
+       // Picture parameters.
+       VAPictureParameterBufferJPEGBaseline pic_param;
+       memset(&pic_param, 0, sizeof(pic_param));
+       pic_param.picture_width = dinfo.image_width;
+       pic_param.picture_height = dinfo.image_height;
+       for (int component_idx = 0; component_idx < dinfo.num_components; ++component_idx) {
+               const jpeg_component_info *comp = &dinfo.comp_info[component_idx];
+               pic_param.components[component_idx].component_id = comp->component_id;
+               pic_param.components[component_idx].h_sampling_factor = comp->h_samp_factor;
+               pic_param.components[component_idx].v_sampling_factor = comp->v_samp_factor;
+               pic_param.components[component_idx].quantiser_table_selector = comp->quant_tbl_no;
+       }
+       pic_param.num_components = dinfo.num_components;
+       pic_param.color_space = 0;  // YUV.
+       pic_param.rotation = VA_ROTATION_NONE;
+
+       VABufferID pic_param_buffer;
+       VAStatus va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAPictureParameterBufferType, sizeof(pic_param), 1, &pic_param, &pic_param_buffer);
+       CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
+       VABufferDestroyer destroy_pic_param(va_dpy->va_dpy, pic_param_buffer);
+
+       // Quantization matrices.
+       VAIQMatrixBufferJPEGBaseline iq;
+       memset(&iq, 0, sizeof(iq));
+
+       for (int quant_tbl_idx = 0; quant_tbl_idx < min(4, NUM_QUANT_TBLS); ++quant_tbl_idx) {
+               const JQUANT_TBL *qtbl = dinfo.quant_tbl_ptrs[quant_tbl_idx];
+               if (qtbl == nullptr) {
+                       iq.load_quantiser_table[quant_tbl_idx] = 0;
+               } else {
+                       iq.load_quantiser_table[quant_tbl_idx] = 1;
+                       for (int i = 0; i < 64; ++i) {
+                               if (qtbl->quantval[i] > 255) {
+                                       fprintf(stderr, "Baseline JPEG only!\n");
+                                       return nullptr;
+                               }
+                               iq.quantiser_table[quant_tbl_idx][i] = qtbl->quantval[jpeg_natural_order[i]];
+                       }
+               }
+       }
+
+       VABufferID iq_buffer;
+       va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAIQMatrixBufferType, sizeof(iq), 1, &iq, &iq_buffer);
+       CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
+       VABufferDestroyer destroy_iq(va_dpy->va_dpy, iq_buffer);
+
+       // Huffman tables (arithmetic is not supported).
+       VAHuffmanTableBufferJPEGBaseline huff;
+       memset(&huff, 0, sizeof(huff));
+
+       for (int huff_tbl_idx = 0; huff_tbl_idx < min(2, NUM_HUFF_TBLS); ++huff_tbl_idx) {
+               const JHUFF_TBL *ac_hufftbl = dinfo.ac_huff_tbl_ptrs[huff_tbl_idx];
+               const JHUFF_TBL *dc_hufftbl = dinfo.dc_huff_tbl_ptrs[huff_tbl_idx];
+               if (ac_hufftbl == nullptr) {
+                       assert(dc_hufftbl == nullptr);
+                       huff.load_huffman_table[huff_tbl_idx] = 0;
+               } else {
+                       assert(dc_hufftbl != nullptr);
+                       huff.load_huffman_table[huff_tbl_idx] = 1;
+
+                       for (int i = 0; i < 16; ++i) {
+                               huff.huffman_table[huff_tbl_idx].num_dc_codes[i] = dc_hufftbl->bits[i + 1];
+                       }
+                       for (int i = 0; i < 12; ++i) {
+                               huff.huffman_table[huff_tbl_idx].dc_values[i] = dc_hufftbl->huffval[i];
+                       }
+                       for (int i = 0; i < 16; ++i) {
+                               huff.huffman_table[huff_tbl_idx].num_ac_codes[i] = ac_hufftbl->bits[i + 1];
+                       }
+                       for (int i = 0; i < 162; ++i) {
+                               huff.huffman_table[huff_tbl_idx].ac_values[i] = ac_hufftbl->huffval[i];
+                       }
+               }
+       }
+
+       VABufferID huff_buffer;
+       va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAHuffmanTableBufferType, sizeof(huff), 1, &huff, &huff_buffer);
+       CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
+       VABufferDestroyer destroy_huff(va_dpy->va_dpy, huff_buffer);
+
+       // Slice parameters (metadata about the slice).
+       VASliceParameterBufferJPEGBaseline parms;
+       memset(&parms, 0, sizeof(parms));
+       parms.slice_data_size = dinfo.src->bytes_in_buffer;
+       parms.slice_data_offset = 0;
+       parms.slice_data_flag = VA_SLICE_DATA_FLAG_ALL;
+       parms.slice_horizontal_position = 0;
+       parms.slice_vertical_position = 0;
+       for (int component_idx = 0; component_idx < dinfo.num_components; ++component_idx) {
+               const jpeg_component_info *comp = &dinfo.comp_info[component_idx];
+               parms.components[component_idx].component_selector = comp->component_id;
+               parms.components[component_idx].dc_table_selector = comp->dc_tbl_no;
+               parms.components[component_idx].ac_table_selector = comp->ac_tbl_no;
+               if (parms.components[component_idx].dc_table_selector > 1 ||
+                   parms.components[component_idx].ac_table_selector > 1) {
+                       fprintf(stderr, "Uses too many Huffman tables\n");
+                       return nullptr;
+               }
+       }
+       parms.num_components = dinfo.num_components;
+       parms.restart_interval = dinfo.restart_interval;
+       int horiz_mcus = (dinfo.image_width + (DCTSIZE * 2) - 1) / (DCTSIZE * 2);
+       int vert_mcus = (dinfo.image_height + DCTSIZE - 1) / DCTSIZE;
+       parms.num_mcus = horiz_mcus * vert_mcus;
+
+       VABufferID slice_param_buffer;
+       va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VASliceParameterBufferType, sizeof(parms), 1, &parms, &slice_param_buffer);
+       CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
+       VABufferDestroyer destroy_slice_param(va_dpy->va_dpy, slice_param_buffer);
+
+       // The actual data. VA-API will destuff and all for us.
+       VABufferID data_buffer;
+       va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VASliceDataBufferType, dinfo.src->bytes_in_buffer, 1, const_cast<unsigned char *>(dinfo.src->next_input_byte), &data_buffer);
+       CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
+       VABufferDestroyer destroy_data(va_dpy->va_dpy, data_buffer);
+
+       VAResources resources = get_va_resources(dinfo.image_width, dinfo.image_height);
+       ReleaseVAResources release(resources);
+
+       va_status = vaBeginPicture(va_dpy->va_dpy, resources.context, resources.surface);
+       CHECK_VASTATUS_RET(va_status, "vaBeginPicture");
+       va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &pic_param_buffer, 1);
+       CHECK_VASTATUS_RET(va_status, "vaRenderPicture(pic_param)");
+       va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &iq_buffer, 1);
+       CHECK_VASTATUS_RET(va_status, "vaRenderPicture(iq)");
+       va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &huff_buffer, 1);
+       CHECK_VASTATUS_RET(va_status, "vaRenderPicture(huff)");
+       va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &slice_param_buffer, 1);
+       CHECK_VASTATUS_RET(va_status, "vaRenderPicture(slice_param)");
+       va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &data_buffer, 1);
+       CHECK_VASTATUS_RET(va_status, "vaRenderPicture(data)");
+       va_status = vaEndPicture(va_dpy->va_dpy, resources.context);
+       CHECK_VASTATUS_RET(va_status, "vaEndPicture");
+
+       // vaDeriveImage() works, but the resulting image seems to live in
+       // uncached memory, which makes copying data out from it very, very slow.
+       // Thanks to FFmpeg for the observation that you can vaGetImage() the
+       // surface onto your own image (although then, it can't be planar, which
+       // is unfortunate for us).
+#if 0
+       VAImage image;
+       va_status = vaDeriveImage(va_dpy->va_dpy, surf, &image);
+       CHECK_VASTATUS_RET(va_status, "vaDeriveImage");
+#else
+       va_status = vaSyncSurface(va_dpy->va_dpy, resources.surface);
+       CHECK_VASTATUS_RET(va_status, "vaSyncSurface");
+
+       va_status = vaGetImage(va_dpy->va_dpy, resources.surface, 0, 0, dinfo.image_width, dinfo.image_height, resources.image.image_id);
+       CHECK_VASTATUS_RET(va_status, "vaGetImage");
+#endif
+
+       void *mapped;
+       va_status = vaMapBuffer(va_dpy->va_dpy, resources.image.buf, &mapped);
+       CHECK_VASTATUS_RET(va_status, "vaMapBuffer");
+
+       shared_ptr<Frame> frame(new Frame);
+#if 0
+       // 4:2:2 planar (for vaDeriveImage).
+       frame->y.reset(new uint8_t[dinfo.image_width * dinfo.image_height]);
+       frame->cb.reset(new uint8_t[(dinfo.image_width / 2) * dinfo.image_height]);
+       frame->cr.reset(new uint8_t[(dinfo.image_width / 2) * dinfo.image_height]);
+       for (int component_idx = 0; component_idx < dinfo.num_components; ++component_idx) {
+               uint8_t *dptr;
+               size_t width;
+               if (component_idx == 0) {
+                       dptr = frame->y.get();
+                       width = dinfo.image_width;
+               } else if (component_idx == 1) {
+                       dptr = frame->cb.get();
+                       width = dinfo.image_width / 2;
+               } else if (component_idx == 2) {
+                       dptr = frame->cr.get();
+                       width = dinfo.image_width / 2;
+               } else {
+                       assert(false);
+               }
+               const uint8_t *sptr = (const uint8_t *)mapped + image.offsets[component_idx];
+               size_t spitch = image.pitches[component_idx];
+               for (size_t y = 0; y < dinfo.image_height; ++y) {
+                       memcpy(dptr + y * width, sptr + y * spitch, width);
+               }
+       }
+#else
+       // Convert Y'CbCr to separate Y' and CbCr.
+       frame->is_semiplanar = true;
+       frame->y.reset(new uint8_t[dinfo.image_width * dinfo.image_height]);
+       frame->cbcr.reset(new uint8_t[dinfo.image_width * dinfo.image_height]);
+       const uint8_t *src = (const uint8_t *)mapped + resources.image.offsets[0];
+       if (resources.image.pitches[0] == dinfo.image_width * 2) {
+               memcpy_interleaved(frame->cbcr.get(), frame->y.get(), src, dinfo.image_width * dinfo.image_height * 2);
+       } else {
+               for (unsigned y = 0; y < dinfo.image_height; ++y) {
+                       memcpy_interleaved(frame->cbcr.get() + y * dinfo.image_width, frame->y.get() + y * dinfo.image_width,
+                                          src + y * resources.image.pitches[0], dinfo.image_width * 2);
+               }
+       }
+#endif
+       frame->width = dinfo.image_width;
+       frame->height = dinfo.image_height;
+       frame->chroma_subsampling_x = 2;
+       frame->chroma_subsampling_y = 1;
+       frame->pitch_y = dinfo.image_width;
+       frame->pitch_chroma = dinfo.image_width / 2;
+
+       va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf);
+       CHECK_VASTATUS_RET(va_status, "vaUnmapBuffer");
+
+       return frame;
+}
diff --git a/futatabi/vaapi_jpeg_decoder.h b/futatabi/vaapi_jpeg_decoder.h
new file mode 100644 (file)
index 0000000..4182cfc
--- /dev/null
@@ -0,0 +1,26 @@
+#ifndef _VAAPI_JPEG_DECODER_H
+#define _VAAPI_JPEG_DECODER_H 1
+
+#include <X11/Xlib.h>
+#include <memory>
+#include <string>
+#include <va/va.h>
+
+struct Frame;
+
+struct VADisplayWithCleanup {
+       ~VADisplayWithCleanup();
+
+       VADisplay va_dpy;
+       Display *x11_display = nullptr;
+       int drm_fd = -1;
+};
+std::unique_ptr<VADisplayWithCleanup> va_open_display(const std::string &va_display);  // Can return nullptr on failure.
+std::string get_usable_va_display();
+
+void init_jpeg_vaapi();
+std::shared_ptr<Frame> decode_jpeg_vaapi(const std::string &jpeg);
+
+extern bool vaapi_jpeg_decoding_usable;
+
+#endif  // !defined(_VAAPI_JPEG_DECODER_H)
diff --git a/futatabi/variational_refinement.txt b/futatabi/variational_refinement.txt
new file mode 100644 (file)
index 0000000..0392011
--- /dev/null
@@ -0,0 +1,537 @@
+Variational refinement -- an introduction and derivation
+
+The variational refinement is probably the most difficult part of the
+algorithm to understand, in part because the description in most papers
+are very heavy on notation and rather light on exposition. I've tried
+to give a somewhat friendlier introduction to this specific algorithm
+below.
+
+The general idea is fairly simple; we try to optimize the flow field
+as a whole, by minimizing some mathematical notion of badness expressed
+as an energy function. The one used in the dense inverse search paper
+[Kroeger16; se references below] has this form:
+
+  E(U) = int( σ Ψ(E_I) + γ Ψ(E_G) + α Ψ(E_S) ) dx
+
+where Ψ(a²) = sqrt(a² + ε²) for some small constant ε = 0.001, and
+σ, γ, α are empirically set weighting constants. (We'll get to what the
+different enery terms are in a minute.) The integral is, for all practical
+purposes, just a sum over all the pixels in the flow.
+
+In general, such formulas are nonconvex and highly nonlinear, so we
+cannot hope to find a global minimum -- but if we start from the flow
+generated by the motion search, we can at least hope to make it somehow
+better by walking towards a local minimum. (In fact, there are many
+methods for optical flow that work _only_ by such minimization,
+so the word “refinement” is maybe not doing the method justice.
+One could just as well say that the motion search is a way of
+finding a reasonable starting point for the optimization.)
+
+The dense inverse search paper [Kroeger16; se references below] sets
+up the energy terms as described by some motion tensors and normalizations,
+then says simply that it is optimized by “θ_vo fixed point iterations
+and θ_vi iterations of Successive Over Relaxation (SOR) for the linear
+system”. It's not immediately obvious what this means, but it gives
+a reference to [Brox04]. However, that paper describes a numerical
+approximation scheme that is _far_ more complicated than what the DIS
+code actually does.
+
+Rather, one must look at the other main reference they are giving,
+which is [Weinzaepfel13], describing a system called DeepFlow.
+DIS borrows most of the exposition and code for its variational
+refinement from DeepFlow, just removing some terms and fixing up
+a few issues here and there. (There are some slight differences in
+the paper, like the use of ∂z instead of ∂t, but that looks mostly
+like an error to me.) Unfortunately, that paper in turn refers to
+[Brox11], which appears no more useful in clearing up the notation
+to me.
+
+However, digging down in the references, finally one finds [Zimmer11],
+which is where the tensor notation appears to come from. This allows
+us to look at the first term in the energy, E_I, which comes from the
+intensity constant assumption. The basic idea is optical flow nearly
+by definition should preserve intensity after the warp:
+
+  I_0(x + u) = I_1(x) 
+
+where I_0 is the first picture, I_1 is the second, x is any 2D
+coordinate and u is the flow at x (which we are optimizing over).
+In general, we'll be optimizing over the entire field of u
+(potentially hundreds of thousands of values), but we'll be looking
+mostly at individual points, so we'll skip the coordinates when we
+can (e.g. we write u instead of or u(x, y)). u is of course the 2D
+flow, although often, we'll write its components separately as u and v
+instead of as a vector u.
+
+Before we go further, we need to add some more notation:
+
+  * I_x is the partial derivative of I with respect to x (at some
+    point), and similarly for I_y. These do not depend on u,
+    so they can be precalculated before the optimization.
+  * I_xx is the double partial derivative of I, and similar for
+    I_yy and I_xy (the latter is the same as I_yx).
+  * I_t is the temporal derivative of I, ie. in practice just
+    I_t(x) = I_1(x) - I_0(x).
+
+Returning now to our original assertion:
+
+  I_0(x + u) = I_1(x)
+
+Classically in optical flow, one assumes that the flow is smooth
+and linear around the point x, which allows one to approximate this
+equation by
+
+  I_x u + I_y v + I_t = 0
+
+This is usually simply called “the optical flow constraint”,
+and gives rise to a very natural part of the energy:
+
+  E_I = I_x u + I_y v + I_t
+
+Remember that we send E_I through the function Ψ(a²) = sqrt(a² + ε²),
+so clearly Ψ(E_I) will be minimized if indeed E_I is zero.
+
+At this point, many papers start talking about Euler-Lagrange
+multivariate equations, which is a fairly daunting concept
+(at least the Wikipedia page is suitable for scaring small children).
+However, for the first two terms, we don't need its general form,
+and it reduces to something much simpler; just differentiate the energy
+by u and equate the result to zero (finding some minimum; it can't be
+a maximum, since *wave hands intensely*). Then differentiate the energy
+by v and set that to zero, too; now you have two equations in two
+unknowns (or, since we're optimizing over a field, maybe 500k
+equations in 500k unknowns -- although the equation set will be
+very sparse), which is hopefully solvable using linear methods.
+We'll look at what this gives for E_I in a moment, then try to apply
+the same notions to E_G and E_S later.
+
+First we modify E_I a bit by adding some normalization:
+
+  E_I = β_0 (I_x u + I_y v + I_t)
+
+where β_0 = 1/(abs(∇I)² + 0.01). Note that β_0 depends on I only,
+so for the purposes of optimizing u, it's a constant and can be
+precomputed across I. (β_0 will, of course, depend on x, but so
+do all the other terms in the equation.)
+
+Now we give it to Maple, differentiating first by u and then by v:
+
+> M := (u,v) -> B_0 * (I_x * u + I_y * v + I_t);
+                   M := (u, v) -> B_0 (I_x u + I_y v + I_t)
+
+> diff(sqrt(M(u,v)^2 + e), u);                  
+                           2
+                        B_0  (I_x u + I_y v + I_t) I_x
+                     ------------------------------------
+                         2                      2     1/2
+                     (B_0  (I_x u + I_y v + I_t)  + e)
+
+> diff(sqrt(M(u,v)^2 + e), v);
+                           2
+                        B_0  (I_x u + I_y v + I_t) I_y
+                     ------------------------------------
+                         2                      2     1/2
+                     (B_0  (I_x u + I_y v + I_t)  + e)
+
+
+So these are the two expressions to be set to zero (for each
+point). We'll notice immediately that this isn't very linear
+in u and v, so here's where the “fixed point iterations” come in;
+we simply assume that our previous values for u and v are
+approximately good enough for the denominator, and optimize
+them in the numerator only. Then we get new values that are
+hopefully a bit closer, which we can then use for the
+denominator, and so on. (This is seemingly an old technique;
+[Brox05] cites [Ciarlet78]. It is justifiable in the sense
+that the only thing really held constant is the derivative
+of the penalizer.) In other words, if we define the constant
+
+  k1 = β_0² / sqrt(β_0² (I_x u' + I_y v' + I_t)² + ε²)
+
+(where u' and v' are the guesses for u and v from the previous
+iteration)
+
+we have the much more manageable
+
+  k1 I_x²    u + k1 I_x I_y v = - k1 I_t I_x
+  k1 I_x I_y u + k1 I_y²    v = - k1 I_t I_y
+
+ie., two linear equations in u and v. Now, you will notice two
+immediate problems by this equation set:
+
+  * The factor k1 is completely useless, since it's just multiplied
+    in everywhere.
+  * The set of equations is colinear (the determinant of the matrix
+    is zero), and thus there is an infinite number of possible
+    solutions—this is known as the so-called “aperture problem”.
+    It shouldn't be surprising, though, as we cannot expect that
+    starting with a single constraint should allow us to solve
+    for two unknowns.
+
+However, both problems will go away as soon as we start adding
+more terms, so let's look at the gradient constancy term E_G next.
+It is fairly similar to the brightness constancy term, except it
+uses the (spatial) gradient instead of intensity:
+
+  ∇I_0(x + u) = ∇I_1(x)
+
+or equivalently (by definition):
+
+  (∂I/∂x)_0(x + u) = (∂I/∂x)_1(x)
+  (∂I/∂y)_0(x + u) = (∂I/∂y)_1(x)
+
+The idea is that this is more robust to changes in lighting.
+It doesn't replace the intensity term, but augments it; the weighting
+constants σ and γ control their relative importance. Also note that
+this actually gives us two independent equations, unlike the brightness
+constancy term.
+
+However, it is not obvious at all how to discretize this. In particular,
+most papers, including [Brox04], appear to want _not_ to make any linear
+assumptions of the flow in this case, and end up with tons of terms.
+(The DIS and DeepFlow papers do, again, use some tensor notation that
+I do not understand, but I'm not convinced it actually contains any
+of the discretization.)
+
+Yet more paper searching eventually turns up [Fahad07], which simply
+states that the discretized versions of these equations are:
+
+  I_xx u + I_xy v + I_xt = 0
+  I_yx u + I_yy v + I_yt = 0.
+
+which seems to match well what the DIS code uses. Note that even though
+this is an equation set equal to zero, we can't just solve for them;
+we need to make (penalized, normalized) energy terms and add them to
+the other terms. This gives
+  
+  E_G = β_x (I_xx u + I_xy v + I_xt) + β_y (I_yx u + I_yy v + I_yt)
+
+with normalization terms
+
+  β_x = 1 / (abs(∇(I_x))² + 0.01)  (∇(I_x) is the gradient of ∂I/∂x)
+  β_y = 1 / (abs(∇(I_y))² + 0.01)
+
+(The DIS paper writes ∇I_dx and ∇I_dy instead of ∇I_x and ∇I_y, but I believe
+that's a typo; the DeepFlow paper says ∇I_x and ∇I_y.)
+
+The papers both write that Ψ(E_G) is used, which would mean that the penalized
+term is
+
+  E_G = sqrt((β_x (I_xx u + I_xy v + I_xt) + β_y (I_yx u + I_yy v + I_yt))² + ε²)
+
+but that isn't what the code actually does. Instead, it seems that the two
+terms are squared independently:
+  
+  E_G = sqrt((β_x (I_xx u + I_xy v + I_xt))² + (β_y (I_yx u + I_yy v + I_yt))² + ε²)
+
+Both are solvable just fine, and it probably does not matter all that much
+which we use in practice (although [Zimmer11] suggests that if we are using
+multichannel images, we should penalize the three channels separately),
+but we follow what the code actually does here.
+
+We can differentiate them and equate them to zero as before:
+
+> M_x := (u,v) -> B_x * (I_xx * u + I_xy * v + I_xt);
+                      M_x := (u, v) -> B_x (I_xx u + I_xy v + I_xt)
+
+> M_y := (u,v) -> B_y * (I_xy * u + I_yy * v + I_yt);
+                      M_y := (u, v) -> B_y (I_xy u + I_yy v + I_yt)
+
+> diff(sqrt(M_x(u,v)^2 + M_y(u,v)^2 + e), u);        
+                                     2             2
+       2 (I_xx u + I_xy v + I_xt) B_x  I_xx + 2 B_y  (I_xy u + I_yy v + I_yt) I_xy
+       ---------------------------------------------------------------------------
+                                  2    2      2                         2     1/2
+       2 ((I_xx u + I_xy v + I_xt)  B_x  + B_y  (I_xy u + I_yy v + I_yt)  + e)
+
+> diff(sqrt(M_x(u,v)^2 + M_y(u,v)^2 + e), v);
+                                     2             2
+       2 (I_xx u + I_xy v + I_xt) B_x  I_xy + 2 B_y  (I_xy u + I_yy v + I_yt) I_yy
+       ---------------------------------------------------------------------------
+                                  2    2      2                         2     1/2
+       2 ((I_xx u + I_xy v + I_xt)  B_x  + B_y  (I_xy u + I_yy v + I_yt)  + e)
+
+Using the same fixed-point scheme where we hold the terms in the
+denominator constant and equal to last iteration's values, we get
+a new common constant
+
+  k2 = 1 / sqrt(β_x² (I_xx u' + I_xy v' + I_xt)² + β_y² (I_xy u' + I_yy v' + I_yt)²)
+
+and for brevity
+
+  k_x = k2 β_x²
+  k_y = k2 β_y²
+
+and thus, collecting terms for u and v, we get the two equations:
+
+  (k_x I_xx² + k_y I_xy²)         u + (k_x I_xx I_xy + k_y I_xy I_yy) v = - k_x I_xx I_xt - k_y I_xy I_yt
+  (k_x I_xx I_xy + k_y I_xy I_yy) u + (k_x I_xy² + k_y I_yy²)         v = - k_x I_xy I_xt - k_y I_yy I_yt
+
+which is linear in u and v, not colinear (unless we are extremely
+unlucky), and can be easily solved.
+
+Of course, for optimizing the weighted sum σ Ψ(E_I) + γ Ψ(E_G),
+we just add the two equation sets pairwise with appropriate weights.
+
+There's a small discrepancy here: The equations suggest that we should
+be be squaring the normalization terms β_0², β_x², β_y²; however, the
+code does not appear to do so. It's possible that they were intended to be
+added outside of the penalization, e.g. Ψ(a²) = sqrt(β a² + ε²), but given
+that these come from [Zimmer11], which mentions nothing of the sort,
+I'll just have to assume that this is an implementation mishap.
+
+The final smoothness term the one that binds the flow field together as a whole
+so that we don't have WxH completely independent equations (with its positive
+and negative sides, of course). It is the simplest in terms of notation,
+but it requires the full power of the Euler-Lagrange equations to minimize,
+so we'll need to figure that part out.
+
+  E_S = abs(∇u)² + abs(∇v)²
+
+or
+
+  E_S = (u_x² + u_y²) + (v_x² + v_y²)
+
+The penalized form used in the DeepFlow notation, contrary to what you'd expect
+from the paper, is:
+
+  E_S = sqrt(u_x² + u_y² + v_x² + v_y² + ε²)
+
+How would one go about to minimize such an expression by u? (We'll deal with v
+later.) It's perhaps no big surprise that the expression involves double
+derivatives, but the full form involves the Euler-Lagrange equations.
+They allow us to minimize expressions that contain x, y, u(x, y) _and_ the partial
+derivatives u_x(x, y) and u_y(x, y), although the answer becomes a differential
+equation.
+
+The Wikipedia page is, unfortunately, not very beginner-friendly,
+but the general idea is: Differentiate the expression by u_x
+(yes, differentiating by a partial derivative!), negate it, and then
+differentiate the result by x. Then do the same thing by u_y and y,
+add the two results together and equate to zero. Mathematically
+(https://en.wikipedia.org/wiki/Euler%E2%80%93Lagrange_equation#Several_functions_of_several_variables_with_single_derivative):
+
+  ∂E/∂u - ∂/∂x (∂E/∂u_x) - ∂/∂y (∂E/∂u_y) = 0
+
+The first term disappears, since we don't have a non-differentiated
+u(x, y) in E_S. (Previously, the two _other_ terms would disappear,
+because we didn't have u_x or u_y in E_I or E_G.) This means we get
+
+  - ∂/∂x (u_x / sqrt(u_x² + u_y² + v_x² + v_y² + ε²)) - ∂/∂y (u_y / sqrt(u_x² + u_y² + v_x² + v_y² + ε²)) = 0
+
+(We don't remove the minus signs since this is supposed to be added to
+all the other terms.)
+
+This is what's called an _anisotropic diffusion_ (or Perona–Malik diffusion)
+equation, and is extensively described in literature. It has the effect of
+smoothing the flow more in some places than others; in particular, it does
+not smooth as strongly near edges, so it is edge-preserving. (It's a bit odd to
+call it anisotropic, since it does smooth equally in all directions;
+[Brox05] calls it vector-valued diffusion.)
+
+We'd love to our usual trick of keeping the nonlinear terms in the denominator
+constant, but alas, we can't do that yet, since it's under the differentiation
+operator; this factor has to be discretized together with u before we can treat
+it as a constant. So instead, we'll define it as a function (called the
+_diffusivity_ at the given point):
+
+  g(x, y) = 1 / sqrt(u_x² + u_y² + v_x² + v_y² + ε²) = 0
+
+which gives us
+
+  - ∂/∂x ( g(x, y) u_x ) - ∂/∂y ( g(x, y) u_y ) = 0
+
+We'll also have a similar equation for minimizing v, of course:
+
+  - ∂/∂x ( g(x, y) v_x ) - ∂/∂y ( g(x, y) v_y ) = 0
+
+There's no normalization term β here, unlike the other terms; DeepFlow2
+adds one, but we're not including it here.
+
+At this point, we make a tweak. This seemingly goes back to at least
+[Brox04], which also makes the same tweak to all the other terms
+(which we don't, but see below). We split u (and v) into something
+based on the original value plus a differential du (and dv), and then
+solve for du (or dv) instead. (In math-speak, we are moving to an
+implicit method, which is often more numerically stable.) In other words,
+
+  u(x, y) = u0(x, y) + du(x, y)
+
+where u0(x, y) is the initial guess for the flow. (It's not the value
+from previous iteration, for reasons that will be clear later, it's
+the first one. [Brox04] differs here, but it does a number of things
+differently in the numerics anyway.)
+
+This gives us:
+
+  - ∂/∂x ( g(x, y) (u0 + du)_x ) - ∂/∂y ( g(x, y) (u0 + du)_y ) = 0
+
+or
+
+  - ∂/∂x ( g(x, y) du_x ) - ∂/∂y ( g(x, y) du_y ) = ∂/∂x ( g(x, y) u0_x ) + ∂/∂y ( g(x, y) u0_y )
+
+where the right-hand side is effectively a constant for these purposes
+(although it still needs to be calculated anew for each iteration,
+since g(x, y) changes).
+
+Of course, now we have a different problem; all the other terms are
+formulated in terms of u and v, not du and dv. DeepFlow solves this
+by not searching for the flow between I_0 and I_1, but between I_0 and
+a pre-warped I_1. In other words, before any of the derivatives involving
+I_t are calculated, we calculate an I_w with bilinear interpolation:
+
+  I_w(x, y) = I_1(x + u0(x, y), y + v0(x, y))
+
+and then redefine I_t (occasionally called I_z) as
+
+  I_t(x, y) = I_w(x, y) - I_0(x, y)
+
+Note that the plus sign effectively means inverting the flow, so if
+the u0 and v0 were already correctly estimated, perfectly smooth and linear
+everywhere, I_w = I_0. (All spatial derivatives are calculated on the mean
+between I_0 and I_w; the paper doesn't mention this.) After this, all the
+equations for E_I and E_G earlier will still hold, they will just be
+calculating du and dv instead. Note that this means we have three values
+for the flow; there's u0 for the initial guess, du for the current guess
+of delta from u0 (which makes u0 + du the current guess of the flow),
+and du' for the previous guess of delta from u0. (The initial values for
+du' and dv' will be zero.)
+
+Now back to our equations, as we look at practical implementation:
+
+  - ∂/∂x ( g(x, y) du_x ) - ∂/∂y ( g(x, y) du_y ) = ∂/∂x ( g(x, y) u0_x ) + ∂/∂y ( g(x, y) u0_y )
+  - ∂/∂x ( g(x, y) dv_x ) - ∂/∂y ( g(x, y) dv_y ) = ∂/∂x ( g(x, y) v0_x ) + ∂/∂y ( g(x, y) v0_y )
+
+We can discretize the left-hand and right-hand side identically (they differ
+only in signs and in variable), so let's look only at
+
+  - ∂/∂x ( g(x, y) du_x ) - ∂/∂y ( g(x, y) du_y )
+
+[Brox05] equation (2.14) (which refers to a 1998 book, although I couldn't
+immediately find the equation in question in that book) discretizes this as
+
+  - 1/2 (g(x+1, y) + g(x, y)) (du(x+1, y) - du(x, y))
+  + 1/2 (g(x-1, y) + g(x, y)) (du(x, y) - du(x-1, y))
+  - 1/2 (g(x, y+1) + g(x, y)) (du(x, y+1) - du(x, y))
+  + 1/2 (g(x, y-1) + g(x, y)) (du(x, y) - du(x, y-1))
+
+It also mentions that it would be better to sample g at the half-way points,
+e.g. g(x+0.5, y), but that begs the question exactly how we'd do that, and
+DeepFlow doesn't seem to care, so we stick with their version.
+
+Now we can finally let g use the values of the flow (note that this is the
+actual flow u and v, not du and dv!) from the previous iteration, as before:
+
+  g(x, y) = 1 / sqrt(u'_x² + u'_y² + v'_x² + v'_y² + ε²)
+
+The single derivatives in g(x) are approximated by standard central differences
+(see https://en.wikipedia.org/wiki/Finite_difference_coefficient), e.g.
+
+  u_x(x, y) = 1/2 (u(x + 1, y) - u(x - 1, y))
+
+although the derivatives of I are using the fancier
+
+  I_x(x, y) = 1/12 (-I(x - 2, y) + 8 I(x - 1, y) - 8 I(x - 1, y) + I(x - 2, y))
+
+I assume this is because I_x derivatives are calculated only once, so we can
+afford more accurate derivatives (or possibly simply because of influence
+from earlier papers).
+
+Let's now define a smoothness constant between the neighbors (x,y) and (x1,y1):
+
+  s(x1, y1) = 1/2 (g(x, y) + g(x1, y1))
+
+Collecting all the du(x, y) terms of the discretized equation above,
+ignoring the right-hand side, which is just a constant for us anyway:
+
+  - s(x+1, y) (du(x+1, y) - du(x, y))
+  + s(x-1, y) (du(x, y) - du(x-1, y))
+  - s(x, y+1) (du(x, y+1) - du(x, y))
+  + s(x, y-1) (du(x, y) - du(x, y-1)) = C
+
+  - s(x+1, y) du(x+1, y) + s(x+1, y) du(x, y)
+  + s(x-1, y) du(x, y) - s(x-1, y) du(x-1, y)
+  - s(x, y+1) du(x, y+1) + s(x, y+1) du(x, y)
+  + s(x, y-1) du(x, y) - s(x, y-1) du(x, y-1) = C
+
+  (s(x+1, y) + s(x-1, y) + s(x, y+1) + s(x, y-1)) du(x, y) =
+  s(x+1, y) du(x+1, y) + s(x-1, y) du(x-1, y) + s(x, y+1) du(x, y+1) + s(x, y-1) du(x, y-1) + C
+
+It is interesting to note that if s = 1 uniformly, which would be the case
+without our penalizer Ψ(a²), we would have the familiar discrete Laplacian,
+where du(x, y) would seek to simply become the average of its four immediate
+neighbors.
+
+Now our equation system is finally complete and linear, and the rest is
+fairly pedestrian. The last term connects all the unknowns together,
+but we still solve them mostly as 2x2 matrices. The most basic iterative
+method is Jacobi, where we solve du(x, y) and dv(x,y) using the
+previous iteration's value for all other du/dv values. (That this converges
+at all it beyond this text to prove, but it does. Not that we bother
+iterating until it converges; a few iterations is good enough.)
+Gauss-Seidel iterations improve on this in that (surprisingly!) using this
+iteration's computed du/dv values if they're ready; this improves convergence,
+but is hard to parallelize.
+
+Successive over-relaxation (SOR) improves further on this, in that it
+assumes that the solution moves towards the right value, so why not
+just go a bit further? That is, if Gauss-Seidel would tell you to increase
+the flow by 1.0 pixel to the right, perhaps go 1.5 pixels to the right
+instead (this value is called ω). Again, the convergence proof is beyond the
+scope here, but SOR converges for any ω between 1 and 2 (1 gives plain
+Gauss-Seidel, and over 2, we risk overshooting and never converging). Optimal
+ω depends on the equation system; DIS uses ω = 1.6, which presumably was
+measured, while we do ω = 1.8 (seems to be marginally better after some
+light testing).
+
+Efficient GPU implementation of SOR is not trivial; like noted before,
+Gauss-Seidel is inherently serial, which is a poor match for the GPU.
+Worse, doing SOR with Jacobi as base instead of Gauss-Seidel makes for
+an algorithm which simply does not converge. We solve this by using a
+method called red-black SOR (not to be confused with red-black binary
+trees). Conceptually, it assigns every unknown a color, with every other
+being red or black (similar to a checkerboard). Since red values now
+only depend on black values and vice versa, one can do all red values
+in parallel, then all black values, and so on. (This is equivalent to
+reordering the equation set; different such orderings can have different
+convergence speeds.)
+
+Our GPU SOR implementation is not overly efficient, so essentially one such
+half-iteration of red-black SOR costs the same as one full iteration of
+Jacobi but convergence is so much faster that it's worth it. Generally
+speaking, Gauss-Seidel converges twice as fast as Jacobi (ie., if Jacobi
+converges in N iterations, Gauss-Seidel does so in N/2), but SOR converges
+_geometrically_ faster, ie., in O(√N) iterations.
+
+Do note that the DeepFlow code does not fully use SOR or even Gauss-Seidel;
+it solves every 2x2 block (ie., single du/dv pair) using Cramer's rule,
+and then pushes that vector 60% further, SOR-style. This would be clearly
+more accurate if we didn't have SOR in the mix (since du and dv would
+converge immediately relative to each other, bar Cramer's numerical issues),
+but I'm not sure whether it's better given SOR. (DIS changes this to a more
+traditional SOR formulation, which we also use. It doesn't seem to be much
+different in practical testing; perhaps minutely worse, but I haven't done
+a deep analysis here.)
+
+And that's it. References:
+
+[Brox04]: Brox, Bruhn, Papenberg, Weickert: “High Accuracy Optical Flow
+  Estimation Based on a Theory for Warping”, in Proceedings of the European
+  Conference on Computer Vision (ECCV), 2004
+[Brox05]: Brox: “From Pixels to Regions: Partial Differential Equations in
+  Image Analysis”, PhD thesis, 2005
+[Brox11]: Brox, Malik: “Large Displacement Optical Flow: Descriptor Matching in
+  Variational Motion Estimation”, IEEE Transactions on Pattern Analysis and
+  Machine Intelligence, 2011
+[Ciarlet78]: Ciarlet: “The Finite Element Method for Elliptic Problems”, 1978
+[Fahad07]: Fahad, Morris: “Multiple Combined Constraints for Optical Flow
+  Estimation”, in Proceedings of the 3rd International Conference on Advances
+  in Visual Computing (ISVC), 2007
+[Kroeger16]: Kroeger, Timofte, Dai, van Gool: “Fast Optical Flow using Dense
+  Inverse Search”, in Proceedings of the European Conference on Computer Vision
+  (ECCV), 2016
+[Weinzaepfel13]: Weinzaepfel, Revaud, Harchaoui, Schmid: “DeepFlow: Large
+  displacement optical flow with deep matching”, in IEEE International Conference
+  on Computer Vision (ICCV), 2013
+[Zimmer11]: Zimmer, Bruhn, Weickert: “Optic Flow in Harmony”, International
+  Journal of Computer Vision, 2011
diff --git a/futatabi/video_stream.cpp b/futatabi/video_stream.cpp
new file mode 100644 (file)
index 0000000..8f11714
--- /dev/null
@@ -0,0 +1,657 @@
+#include "video_stream.h"
+
+extern "C" {
+#include <libavformat/avformat.h>
+#include <libavformat/avio.h>
+}
+
+#include "chroma_subsampler.h"
+#include "shared/context.h"
+#include "flags.h"
+#include "flow.h"
+#include "shared/httpd.h"
+#include "jpeg_frame_view.h"
+#include "movit/util.h"
+#include "shared/mux.h"
+#include "player.h"
+#include "util.h"
+#include "ycbcr_converter.h"
+
+#include <epoxy/glx.h>
+#include <jpeglib.h>
+#include <unistd.h>
+
+using namespace std;
+using namespace std::chrono;
+
+extern HTTPD *global_httpd;
+
+struct VectorDestinationManager {
+       jpeg_destination_mgr pub;
+       std::vector<uint8_t> dest;
+
+       VectorDestinationManager()
+       {
+               pub.init_destination = init_destination_thunk;
+               pub.empty_output_buffer = empty_output_buffer_thunk;
+               pub.term_destination = term_destination_thunk;
+       }
+
+       static void init_destination_thunk(j_compress_ptr ptr)
+       {
+               ((VectorDestinationManager *)(ptr->dest))->init_destination();
+       }
+
+       inline void init_destination()
+       {
+               make_room(0);
+       }
+
+       static boolean empty_output_buffer_thunk(j_compress_ptr ptr)
+       {
+               return ((VectorDestinationManager *)(ptr->dest))->empty_output_buffer();
+       }
+
+       inline bool empty_output_buffer()
+       {
+               make_room(dest.size());  // Should ignore pub.free_in_buffer!
+               return true;
+       }
+
+       inline void make_room(size_t bytes_used)
+       {
+               dest.resize(bytes_used + 4096);
+               dest.resize(dest.capacity());
+               pub.next_output_byte = dest.data() + bytes_used;
+               pub.free_in_buffer = dest.size() - bytes_used;
+       }
+
+       static void term_destination_thunk(j_compress_ptr ptr)
+       {
+               ((VectorDestinationManager *)(ptr->dest))->term_destination();
+       }
+
+       inline void term_destination()
+       {
+               dest.resize(dest.size() - pub.free_in_buffer);
+       }
+};
+static_assert(std::is_standard_layout<VectorDestinationManager>::value, "");
+
+vector<uint8_t> encode_jpeg(const uint8_t *y_data, const uint8_t *cb_data, const uint8_t *cr_data, unsigned width, unsigned height)
+{
+       VectorDestinationManager dest;
+
+       jpeg_compress_struct cinfo;
+       jpeg_error_mgr jerr;
+       cinfo.err = jpeg_std_error(&jerr);
+       jpeg_create_compress(&cinfo);
+
+       cinfo.dest = (jpeg_destination_mgr *)&dest;
+       cinfo.input_components = 3;
+       cinfo.in_color_space = JCS_RGB;
+       jpeg_set_defaults(&cinfo);
+       constexpr int quality = 90;
+       jpeg_set_quality(&cinfo, quality, /*force_baseline=*/false);
+
+       cinfo.image_width = width;
+       cinfo.image_height = height;
+       cinfo.raw_data_in = true;
+       jpeg_set_colorspace(&cinfo, JCS_YCbCr);
+       cinfo.comp_info[0].h_samp_factor = 2;
+       cinfo.comp_info[0].v_samp_factor = 1;
+       cinfo.comp_info[1].h_samp_factor = 1;
+       cinfo.comp_info[1].v_samp_factor = 1;
+       cinfo.comp_info[2].h_samp_factor = 1;
+       cinfo.comp_info[2].v_samp_factor = 1;
+       cinfo.CCIR601_sampling = true;  // Seems to be mostly ignored by libjpeg, though.
+       jpeg_start_compress(&cinfo, true);
+
+       JSAMPROW yptr[8], cbptr[8], crptr[8];
+       JSAMPARRAY data[3] = { yptr, cbptr, crptr };
+       for (unsigned y = 0; y < height; y += 8) {
+               for (unsigned yy = 0; yy < 8; ++yy) {
+                       yptr[yy] = const_cast<JSAMPROW>(&y_data[(y + yy) * width]);
+                       cbptr[yy] = const_cast<JSAMPROW>(&cb_data[(y + yy) * width / 2]);
+                       crptr[yy] = const_cast<JSAMPROW>(&cr_data[(y + yy) * width / 2]);
+               }
+
+               jpeg_write_raw_data(&cinfo, data, /*num_lines=*/8);
+       }
+
+       jpeg_finish_compress(&cinfo);
+       jpeg_destroy_compress(&cinfo);
+
+       return move(dest.dest);
+}
+
+VideoStream::VideoStream()
+{
+       ycbcr_converter.reset(new YCbCrConverter(YCbCrConverter::OUTPUT_TO_DUAL_YCBCR, /*resource_pool=*/nullptr));
+       ycbcr_semiplanar_converter.reset(new YCbCrConverter(YCbCrConverter::OUTPUT_TO_SEMIPLANAR, /*resource_pool=*/nullptr));
+
+       GLuint input_tex[num_interpolate_slots], gray_tex[num_interpolate_slots];
+       GLuint fade_y_output_tex[num_interpolate_slots], fade_cbcr_output_tex[num_interpolate_slots];
+       GLuint cb_tex[num_interpolate_slots], cr_tex[num_interpolate_slots];
+
+       glCreateTextures(GL_TEXTURE_2D_ARRAY, num_interpolate_slots, input_tex);
+       glCreateTextures(GL_TEXTURE_2D_ARRAY, num_interpolate_slots, gray_tex);
+       glCreateTextures(GL_TEXTURE_2D, num_interpolate_slots, fade_y_output_tex);
+       glCreateTextures(GL_TEXTURE_2D, num_interpolate_slots, fade_cbcr_output_tex);
+       glCreateTextures(GL_TEXTURE_2D, num_interpolate_slots, cb_tex);
+       glCreateTextures(GL_TEXTURE_2D, num_interpolate_slots, cr_tex);
+       check_error();
+
+       constexpr size_t width = 1280, height = 720;  // FIXME: adjustable width, height
+       int levels = find_num_levels(width, height);
+       for (size_t i = 0; i < num_interpolate_slots; ++i) {
+               glTextureStorage3D(input_tex[i], levels, GL_RGBA8, width, height, 2);
+               check_error();
+               glTextureStorage3D(gray_tex[i], levels, GL_R8, width, height, 2);
+               check_error();
+               glTextureStorage2D(fade_y_output_tex[i], 1, GL_R8, width, height);
+               check_error();
+               glTextureStorage2D(fade_cbcr_output_tex[i], 1, GL_RG8, width, height);
+               check_error();
+               glTextureStorage2D(cb_tex[i], 1, GL_R8, width / 2, height);
+               check_error();
+               glTextureStorage2D(cr_tex[i], 1, GL_R8, width / 2, height);
+               check_error();
+
+               unique_ptr<InterpolatedFrameResources> resource(new InterpolatedFrameResources);
+               resource->owner = this;
+               resource->input_tex = input_tex[i];
+               resource->gray_tex = gray_tex[i];
+               resource->fade_y_output_tex = fade_y_output_tex[i];
+               resource->fade_cbcr_output_tex = fade_cbcr_output_tex[i];
+               resource->cb_tex = cb_tex[i];
+               resource->cr_tex = cr_tex[i];
+               glCreateFramebuffers(2, resource->input_fbos);
+               check_error();
+               glCreateFramebuffers(1, &resource->fade_fbo);
+               check_error();
+
+               glNamedFramebufferTextureLayer(resource->input_fbos[0], GL_COLOR_ATTACHMENT0, input_tex[i], 0, 0);
+               check_error();
+               glNamedFramebufferTextureLayer(resource->input_fbos[0], GL_COLOR_ATTACHMENT1, gray_tex[i], 0, 0);
+               check_error();
+               glNamedFramebufferTextureLayer(resource->input_fbos[1], GL_COLOR_ATTACHMENT0, input_tex[i], 0, 1);
+               check_error();
+               glNamedFramebufferTextureLayer(resource->input_fbos[1], GL_COLOR_ATTACHMENT1, gray_tex[i], 0, 1);
+               check_error();
+               glNamedFramebufferTexture(resource->fade_fbo, GL_COLOR_ATTACHMENT0, fade_y_output_tex[i], 0);
+               check_error();
+               glNamedFramebufferTexture(resource->fade_fbo, GL_COLOR_ATTACHMENT1, fade_cbcr_output_tex[i], 0);
+               check_error();
+
+               GLuint bufs[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1 };
+               glNamedFramebufferDrawBuffers(resource->input_fbos[0], 2, bufs);
+               check_error();
+               glNamedFramebufferDrawBuffers(resource->input_fbos[1], 2, bufs);
+               check_error();
+               glNamedFramebufferDrawBuffers(resource->fade_fbo, 2, bufs);
+               check_error();
+
+               glCreateBuffers(1, &resource->pbo);
+               check_error();
+               glNamedBufferStorage(resource->pbo, width * height * 4, nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+               check_error();
+               resource->pbo_contents = glMapNamedBufferRange(resource->pbo, 0, width * height * 4, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+               interpolate_resources.push_back(move(resource));
+       }
+
+       check_error();
+
+       OperatingPoint op;
+       if (global_flags.interpolation_quality == 1) {
+               op = operating_point1;
+       } else if (global_flags.interpolation_quality == 2) {
+               op = operating_point2;
+       } else if (global_flags.interpolation_quality == 3) {
+               op = operating_point3;
+       } else if (global_flags.interpolation_quality == 4) {
+               op = operating_point4;
+       } else {
+               assert(false);
+       }
+
+       compute_flow.reset(new DISComputeFlow(width, height, op));
+       interpolate.reset(new Interpolate(op, /*split_ycbcr_output=*/true));
+       interpolate_no_split.reset(new Interpolate(op, /*split_ycbcr_output=*/false));
+       chroma_subsampler.reset(new ChromaSubsampler);
+       check_error();
+
+       // The “last frame” is initially black.
+       unique_ptr<uint8_t[]> y(new uint8_t[1280 * 720]);
+       unique_ptr<uint8_t[]> cb_or_cr(new uint8_t[640 * 720]);
+       memset(y.get(), 16, 1280 * 720);
+       memset(cb_or_cr.get(), 128, 640 * 720);
+       last_frame = encode_jpeg(y.get(), cb_or_cr.get(), cb_or_cr.get(), 1280, 720);
+}
+
+VideoStream::~VideoStream() {}
+
+void VideoStream::start()
+{
+       AVFormatContext *avctx = avformat_alloc_context();
+       avctx->oformat = av_guess_format("nut", nullptr, nullptr);
+
+       uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE);
+       avctx->pb = avio_alloc_context(buf, MUX_BUFFER_SIZE, 1, this, nullptr, nullptr, nullptr);
+       avctx->pb->write_data_type = &VideoStream::write_packet2_thunk;
+       avctx->pb->ignore_boundary_point = 1;
+
+       Mux::Codec video_codec = Mux::CODEC_MJPEG;
+
+       avctx->flags = AVFMT_FLAG_CUSTOM_IO;
+
+       string video_extradata;
+
+       constexpr int width = 1280, height = 720;  // Doesn't matter for MJPEG.
+       stream_mux.reset(new Mux(avctx, width, height, video_codec, video_extradata, /*audio_codec_parameters=*/nullptr,
+               AVCOL_SPC_BT709, Mux::WITHOUT_AUDIO,
+               COARSE_TIMEBASE, /*write_callback=*/nullptr, Mux::WRITE_FOREGROUND, {}));
+
+
+       encode_thread = thread(&VideoStream::encode_thread_func, this);
+}
+
+void VideoStream::stop()
+{
+       encode_thread.join();
+}
+
+void VideoStream::clear_queue()
+{
+       deque<QueuedFrame> q;
+
+       {
+               unique_lock<mutex> lock(queue_lock);
+               q = move(frame_queue);
+       }
+
+       // These are not RAII-ed, unfortunately, so we'll need to clean them ourselves.
+       // Note that release_texture() is thread-safe.
+       for (const QueuedFrame &qf : q) {
+               if (qf.type == QueuedFrame::INTERPOLATED ||
+                   qf.type == QueuedFrame::FADED_INTERPOLATED) {
+                       compute_flow->release_texture(qf.flow_tex);
+               }
+               if (qf.type == QueuedFrame::INTERPOLATED) {
+                       interpolate->release_texture(qf.output_tex);
+                       interpolate->release_texture(qf.cbcr_tex);
+               }
+       }
+
+       // Destroy q outside the mutex, as that would be a double-lock.
+}
+
+void VideoStream::schedule_original_frame(steady_clock::time_point local_pts,
+                                          int64_t output_pts, function<void()> &&display_func,
+                                          QueueSpotHolder &&queue_spot_holder,
+                                          FrameOnDisk frame)
+{
+       fprintf(stderr, "output_pts=%ld  original      input_pts=%ld\n", output_pts, frame.pts);
+
+       // Preload the file from disk, so that the encoder thread does not get stalled.
+       // TODO: Consider sending it through the queue instead.
+       (void)frame_reader.read_frame(frame);
+
+       QueuedFrame qf;
+       qf.local_pts = local_pts;
+       qf.type = QueuedFrame::ORIGINAL;
+       qf.output_pts = output_pts;
+       qf.frame1 = frame;
+       qf.display_func = move(display_func);
+       qf.queue_spot_holder = move(queue_spot_holder);
+
+       unique_lock<mutex> lock(queue_lock);
+       frame_queue.push_back(move(qf));
+       queue_changed.notify_all();
+}
+
+void VideoStream::schedule_faded_frame(steady_clock::time_point local_pts, int64_t output_pts,
+                                       function<void()> &&display_func,
+                                       QueueSpotHolder &&queue_spot_holder,
+                                       FrameOnDisk frame1_spec, FrameOnDisk frame2_spec,
+                                       float fade_alpha)
+{
+       fprintf(stderr, "output_pts=%ld  faded         input_pts=%ld,%ld  fade_alpha=%.2f\n", output_pts, frame1_spec.pts, frame2_spec.pts, fade_alpha);
+
+       // Get the temporary OpenGL resources we need for doing the fade.
+       // (We share these with interpolated frames, which is slightly
+       // overkill, but there's no need to waste resources on keeping
+       // separate pools around.)
+       BorrowedInterpolatedFrameResources resources;
+       {
+               unique_lock<mutex> lock(queue_lock);
+               if (interpolate_resources.empty()) {
+                       fprintf(stderr, "WARNING: Too many interpolated frames already in transit; dropping one.\n");
+                       return;
+               }
+               resources = BorrowedInterpolatedFrameResources(interpolate_resources.front().release());
+               interpolate_resources.pop_front();
+       }
+
+       bool did_decode;
+
+       shared_ptr<Frame> frame1 = decode_jpeg_with_cache(frame1_spec, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
+       shared_ptr<Frame> frame2 = decode_jpeg_with_cache(frame2_spec, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
+
+       ycbcr_semiplanar_converter->prepare_chain_for_fade(frame1, frame2, fade_alpha)->render_to_fbo(resources->fade_fbo, 1280, 720);
+
+       QueuedFrame qf;
+       qf.local_pts = local_pts;
+       qf.type = QueuedFrame::FADED;
+       qf.output_pts = output_pts;
+       qf.frame1 = frame1_spec;
+       qf.display_func = move(display_func);
+       qf.queue_spot_holder = move(queue_spot_holder);
+
+       qf.secondary_frame = frame2_spec;
+
+       // Subsample and split Cb/Cr.
+       chroma_subsampler->subsample_chroma(resources->fade_cbcr_output_tex, 1280, 720, resources->cb_tex, resources->cr_tex);
+
+       // Read it down (asynchronously) to the CPU.
+       glPixelStorei(GL_PACK_ROW_LENGTH, 0);
+       glBindBuffer(GL_PIXEL_PACK_BUFFER, resources->pbo);
+       check_error();
+       glGetTextureImage(resources->fade_y_output_tex, 0, GL_RED, GL_UNSIGNED_BYTE, 1280 * 720 * 4, BUFFER_OFFSET(0));
+       check_error();
+       glGetTextureImage(resources->cb_tex, 0, GL_RED, GL_UNSIGNED_BYTE, 1280 * 720 * 3, BUFFER_OFFSET(1280 * 720));
+       check_error();
+       glGetTextureImage(resources->cr_tex, 0, GL_RED, GL_UNSIGNED_BYTE, 1280 * 720 * 3 - 640 * 720, BUFFER_OFFSET(1280 * 720 + 640 * 720));
+       check_error();
+       glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+
+       // Set a fence we can wait for to make sure the CPU sees the read.
+       glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
+       check_error();
+       qf.fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
+       check_error();
+       qf.resources = move(resources);
+       qf.local_pts = local_pts;
+
+       unique_lock<mutex> lock(queue_lock);
+       frame_queue.push_back(move(qf));
+       queue_changed.notify_all();
+}
+
+void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts,
+                                              int64_t output_pts, function<void(shared_ptr<Frame>)> &&display_func,
+                                              QueueSpotHolder &&queue_spot_holder,
+                                              FrameOnDisk frame1, FrameOnDisk frame2,
+                                              float alpha, FrameOnDisk secondary_frame, float fade_alpha)
+{
+       if (secondary_frame.pts != -1) {
+               fprintf(stderr, "output_pts=%ld  interpolated  input_pts1=%ld input_pts2=%ld alpha=%.3f  secondary_pts=%ld  fade_alpha=%.2f\n", output_pts, frame1.pts, frame2.pts, alpha, secondary_frame.pts, fade_alpha);
+       } else {
+               fprintf(stderr, "output_pts=%ld  interpolated  input_pts1=%ld input_pts2=%ld alpha=%.3f\n", output_pts, frame1.pts, frame2.pts, alpha);
+       }
+
+       // Get the temporary OpenGL resources we need for doing the interpolation.
+       BorrowedInterpolatedFrameResources resources;
+       {
+               unique_lock<mutex> lock(queue_lock);
+               if (interpolate_resources.empty()) {
+                       fprintf(stderr, "WARNING: Too many interpolated frames already in transit; dropping one.\n");
+                       return;
+               }
+               resources = BorrowedInterpolatedFrameResources(interpolate_resources.front().release());
+               interpolate_resources.pop_front();
+       }
+
+       QueuedFrame qf;
+       qf.type = (secondary_frame.pts == -1) ? QueuedFrame::INTERPOLATED : QueuedFrame::FADED_INTERPOLATED;
+       qf.output_pts = output_pts;
+       qf.display_decoded_func = move(display_func);
+       qf.queue_spot_holder = move(queue_spot_holder);
+       qf.local_pts = local_pts;
+
+       check_error();
+
+       // Convert frame0 and frame1 to OpenGL textures.
+       for (size_t frame_no = 0; frame_no < 2; ++frame_no) {
+               FrameOnDisk frame_spec = frame_no == 1 ? frame2 : frame1;
+               bool did_decode;
+               shared_ptr<Frame> frame = decode_jpeg_with_cache(frame_spec, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
+               ycbcr_converter->prepare_chain_for_conversion(frame)->render_to_fbo(resources->input_fbos[frame_no], 1280, 720);
+       }
+
+       glGenerateTextureMipmap(resources->input_tex);
+       check_error();
+       glGenerateTextureMipmap(resources->gray_tex);
+       check_error();
+
+       // Compute the interpolated frame.
+       qf.flow_tex = compute_flow->exec(resources->gray_tex, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
+       check_error();
+
+       if (secondary_frame.pts != -1) {
+               // Fade. First kick off the interpolation.
+               tie(qf.output_tex, ignore) = interpolate_no_split->exec(resources->input_tex, resources->gray_tex, qf.flow_tex, 1280, 720, alpha);
+               check_error();
+
+               // Now decode the image we are fading against.
+               bool did_decode;
+               shared_ptr<Frame> frame2 = decode_jpeg_with_cache(secondary_frame, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
+
+               // Then fade against it, putting it into the fade Y' and CbCr textures.
+               ycbcr_semiplanar_converter->prepare_chain_for_fade_from_texture(qf.output_tex, frame2, fade_alpha)->render_to_fbo(resources->fade_fbo, 1280, 720);
+
+               // Subsample and split Cb/Cr.
+               chroma_subsampler->subsample_chroma(resources->fade_cbcr_output_tex, 1280, 720, resources->cb_tex, resources->cr_tex);
+
+               interpolate_no_split->release_texture(qf.output_tex);
+       } else {
+               tie(qf.output_tex, qf.cbcr_tex) = interpolate->exec(resources->input_tex, resources->gray_tex, qf.flow_tex, 1280, 720, alpha);
+               check_error();
+
+               // Subsample and split Cb/Cr.
+               chroma_subsampler->subsample_chroma(qf.cbcr_tex, 1280, 720, resources->cb_tex, resources->cr_tex);
+       }
+
+       // We could have released qf.flow_tex here, but to make sure we don't cause a stall
+       // when trying to reuse it for the next frame, we can just as well hold on to it
+       // and release it only when the readback is done.
+
+       // Read it down (asynchronously) to the CPU.
+       glPixelStorei(GL_PACK_ROW_LENGTH, 0);
+       glBindBuffer(GL_PIXEL_PACK_BUFFER, resources->pbo);
+       check_error();
+       if (secondary_frame.pts != -1) {
+               glGetTextureImage(resources->fade_y_output_tex, 0, GL_RED, GL_UNSIGNED_BYTE, 1280 * 720 * 4, BUFFER_OFFSET(0));
+       } else {
+               glGetTextureImage(qf.output_tex, 0, GL_RED, GL_UNSIGNED_BYTE, 1280 * 720 * 4, BUFFER_OFFSET(0));
+       }
+       check_error();
+       glGetTextureImage(resources->cb_tex, 0, GL_RED, GL_UNSIGNED_BYTE, 1280 * 720 * 3, BUFFER_OFFSET(1280 * 720));
+       check_error();
+       glGetTextureImage(resources->cr_tex, 0, GL_RED, GL_UNSIGNED_BYTE, 1280 * 720 * 3 - 640 * 720, BUFFER_OFFSET(1280 * 720 + 640 * 720));
+       check_error();
+       glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+
+       // Set a fence we can wait for to make sure the CPU sees the read.
+       glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
+       check_error();
+       qf.fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
+       check_error();
+       qf.resources = move(resources);
+
+       unique_lock<mutex> lock(queue_lock);
+       frame_queue.push_back(move(qf));
+       queue_changed.notify_all();
+}
+
+void VideoStream::schedule_refresh_frame(steady_clock::time_point local_pts,
+                                         int64_t output_pts, function<void()> &&display_func,
+                                         QueueSpotHolder &&queue_spot_holder)
+{
+       QueuedFrame qf;
+       qf.type = QueuedFrame::REFRESH;
+       qf.output_pts = output_pts;
+       qf.display_func = move(display_func);
+       qf.queue_spot_holder = move(queue_spot_holder);
+
+       unique_lock<mutex> lock(queue_lock);
+       frame_queue.push_back(move(qf));
+       queue_changed.notify_all();
+}
+
+namespace {
+
+shared_ptr<Frame> frame_from_pbo(void *contents, size_t width, size_t height)
+{
+       size_t chroma_width = width / 2;
+
+       const uint8_t *y = (const uint8_t *)contents;
+       const uint8_t *cb = (const uint8_t *)contents + width * height;
+       const uint8_t *cr = (const uint8_t *)contents + width * height + chroma_width * height;
+
+       shared_ptr<Frame> frame(new Frame);
+       frame->y.reset(new uint8_t[width * height]);
+       frame->cb.reset(new uint8_t[chroma_width * height]);
+       frame->cr.reset(new uint8_t[chroma_width * height]);
+       for (unsigned yy = 0; yy < height; ++yy) {
+               memcpy(frame->y.get() + width * yy, y + width * yy, width);
+               memcpy(frame->cb.get() + chroma_width * yy, cb + chroma_width * yy, chroma_width);
+               memcpy(frame->cr.get() + chroma_width * yy, cr + chroma_width * yy, chroma_width);
+       }
+       frame->is_semiplanar = false;
+       frame->width = width;
+       frame->height = height;
+       frame->chroma_subsampling_x = 2;
+       frame->chroma_subsampling_y = 1;
+       frame->pitch_y = width;
+       frame->pitch_chroma = chroma_width;
+       return frame;
+}
+
+}  // namespace
+
+void VideoStream::encode_thread_func()
+{
+       pthread_setname_np(pthread_self(), "VideoStream");
+       QSurface *surface = create_surface();
+       QOpenGLContext *context = create_context(surface);
+       bool ok = make_current(context, surface);
+       if (!ok) {
+               fprintf(stderr, "Video stream couldn't get an OpenGL context\n");
+               exit(1);
+       }
+
+       for ( ;; ) {
+               QueuedFrame qf;
+               {
+                       unique_lock<mutex> lock(queue_lock);
+
+                       // Wait until we have a frame to play.
+                       queue_changed.wait(lock, [this]{
+                               return !frame_queue.empty();
+                       });
+                       steady_clock::time_point frame_start = frame_queue.front().local_pts;
+
+                       // Now sleep until the frame is supposed to start (the usual case),
+                       // _or_ clear_queue() happened.
+                       bool aborted = queue_changed.wait_until(lock, frame_start, [this, frame_start]{
+                               return frame_queue.empty() || frame_queue.front().local_pts != frame_start;
+                       });
+                       if (aborted) {
+                               // clear_queue() happened, so don't play this frame after all.
+                               continue;
+                       }
+                       qf = move(frame_queue.front());
+                       frame_queue.pop_front();
+               }
+
+               if (qf.type == QueuedFrame::ORIGINAL) {
+                       // Send the JPEG frame on, unchanged.
+                       string jpeg = frame_reader.read_frame(qf.frame1);
+                       AVPacket pkt;
+                       av_init_packet(&pkt);
+                       pkt.stream_index = 0;
+                       pkt.data = (uint8_t *)jpeg.data();
+                       pkt.size = jpeg.size();
+                       stream_mux->add_packet(pkt, qf.output_pts, qf.output_pts);
+
+                       last_frame.assign(&jpeg[0], &jpeg[0] + jpeg.size());
+               } else if (qf.type == QueuedFrame::FADED) {
+                       glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
+
+                       shared_ptr<Frame> frame = frame_from_pbo(qf.resources->pbo_contents, 1280, 720);
+
+                       // Now JPEG encode it, and send it on to the stream.
+                       vector<uint8_t> jpeg = encode_jpeg(frame->y.get(), frame->cb.get(), frame->cr.get(), 1280, 720);
+
+                       AVPacket pkt;
+                       av_init_packet(&pkt);
+                       pkt.stream_index = 0;
+                       pkt.data = (uint8_t *)jpeg.data();
+                       pkt.size = jpeg.size();
+                       stream_mux->add_packet(pkt, qf.output_pts, qf.output_pts);
+                       last_frame = move(jpeg);
+               } else if (qf.type == QueuedFrame::INTERPOLATED || qf.type == QueuedFrame::FADED_INTERPOLATED) {
+                       glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
+
+                       // Send it on to display.
+                       shared_ptr<Frame> frame = frame_from_pbo(qf.resources->pbo_contents, 1280, 720);
+                       if (qf.display_decoded_func != nullptr) {
+                               qf.display_decoded_func(frame);
+                       }
+
+                       // Now JPEG encode it, and send it on to the stream.
+                       vector<uint8_t> jpeg = encode_jpeg(frame->y.get(), frame->cb.get(), frame->cr.get(), 1280, 720);
+                       compute_flow->release_texture(qf.flow_tex);
+                       if (qf.type != QueuedFrame::FADED_INTERPOLATED) {
+                               interpolate->release_texture(qf.output_tex);
+                               interpolate->release_texture(qf.cbcr_tex);
+                       }
+
+                       AVPacket pkt;
+                       av_init_packet(&pkt);
+                       pkt.stream_index = 0;
+                       pkt.data = (uint8_t *)jpeg.data();
+                       pkt.size = jpeg.size();
+                       stream_mux->add_packet(pkt, qf.output_pts, qf.output_pts);
+                       last_frame = move(jpeg);
+               } else if (qf.type == QueuedFrame::REFRESH) {
+                       AVPacket pkt;
+                       av_init_packet(&pkt);
+                       pkt.stream_index = 0;
+                       pkt.data = (uint8_t *)last_frame.data();
+                       pkt.size = last_frame.size();
+                       stream_mux->add_packet(pkt, qf.output_pts, qf.output_pts);
+               } else {
+                       assert(false);
+               }
+               if (qf.display_func != nullptr) {
+                       qf.display_func();
+               }
+       }
+}
+
+int VideoStream::write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
+{
+       VideoStream *video_stream = (VideoStream *)opaque;
+       return video_stream->write_packet2(buf, buf_size, type, time);
+}
+
+int VideoStream::write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
+{
+       if (type == AVIO_DATA_MARKER_SYNC_POINT || type == AVIO_DATA_MARKER_BOUNDARY_POINT) {
+               seen_sync_markers = true;
+       } else if (type == AVIO_DATA_MARKER_UNKNOWN && !seen_sync_markers) {
+               // We don't know if this is a keyframe or not (the muxer could
+               // avoid marking it), so we just have to make the best of it.
+               type = AVIO_DATA_MARKER_SYNC_POINT;
+       }
+
+       if (type == AVIO_DATA_MARKER_HEADER) {
+               stream_mux_header.append((char *)buf, buf_size);
+               global_httpd->set_header(HTTPD::MAIN_STREAM, stream_mux_header);
+       } else {
+               global_httpd->add_data(HTTPD::MAIN_STREAM, (char *)buf, buf_size, type == AVIO_DATA_MARKER_SYNC_POINT, time, AVRational{ AV_TIME_BASE, 1 });
+       }
+       return buf_size;
+}
diff --git a/futatabi/video_stream.h b/futatabi/video_stream.h
new file mode 100644 (file)
index 0000000..d0634e0
--- /dev/null
@@ -0,0 +1,147 @@
+#ifndef _VIDEO_STREAM_H
+#define _VIDEO_STREAM_H 1
+
+#include <epoxy/gl.h>
+#include <stdint.h>
+
+extern "C" {
+#include <libavformat/avio.h>
+}
+
+#include "frame_on_disk.h"
+#include "jpeg_frame_view.h"
+#include "shared/ref_counted_gl_sync.h"
+#include "queue_spot_holder.h"
+
+#include <chrono>
+#include <condition_variable>
+#include <deque>
+#include <functional>
+#include <movit/effect_chain.h>
+#include <movit/mix_effect.h>
+#include <movit/ycbcr_input.h>
+#include <mutex>
+#include <string>
+#include <thread>
+
+class ChromaSubsampler;
+class DISComputeFlow;
+class Interpolate;
+class Mux;
+class QSurface;
+class QSurfaceFormat;
+class YCbCrConverter;
+
+class VideoStream {
+public:
+       VideoStream();
+       ~VideoStream();
+       void start();
+       void stop();
+       void clear_queue();
+
+       // “display_func” is called after the frame has been calculated (if needed)
+       // and has gone out to the stream.
+       void schedule_original_frame(std::chrono::steady_clock::time_point,
+                                    int64_t output_pts, std::function<void()> &&display_func,
+                                    QueueSpotHolder &&queue_spot_holder,
+                                    FrameOnDisk frame);
+       void schedule_faded_frame(std::chrono::steady_clock::time_point, int64_t output_pts,
+                                 std::function<void()> &&display_func,
+                                 QueueSpotHolder &&queue_spot_holder,
+                                 FrameOnDisk frame1, FrameOnDisk frame2,
+                                 float fade_alpha);
+       void schedule_interpolated_frame(std::chrono::steady_clock::time_point, int64_t output_pts,
+                                 std::function<void(std::shared_ptr<Frame>)> &&display_func,
+                                 QueueSpotHolder &&queue_spot_holder,
+                                 FrameOnDisk frame1, FrameOnDisk frame2,
+                                 float alpha, FrameOnDisk secondary_frame = {},  // Empty = no secondary (fade) frame.
+                                 float fade_alpha = 0.0f);
+       void schedule_refresh_frame(std::chrono::steady_clock::time_point, int64_t output_pts,
+                                   std::function<void()> &&display_func,
+                                   QueueSpotHolder &&queue_spot_holder);
+
+private:
+       FrameReader frame_reader;
+
+       void encode_thread_func();
+       std::thread encode_thread;
+
+       static int write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
+       int write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
+
+       // Allocated at the very start; if we're empty, we start dropping frames
+       // (so that we don't build up an infinite interpolation backlog).
+       struct InterpolatedFrameResources {
+               VideoStream *owner;  // Used only for IFRReleaser, below.
+
+               GLuint input_tex;  // Layered (contains both input frames), Y'CbCr.
+               GLuint gray_tex;  // Same, but Y only.
+               GLuint input_fbos[2];  // For rendering to the two layers of input_tex.
+
+               // Destination textures and FBO if there is a fade.
+               GLuint fade_y_output_tex, fade_cbcr_output_tex;
+               GLuint fade_fbo;
+
+               GLuint cb_tex, cr_tex;  // Subsampled, final output.
+
+               GLuint pbo;  // For reading the data back.
+               void *pbo_contents;  // Persistently mapped.
+       };
+       std::mutex queue_lock;
+       std::deque<std::unique_ptr<InterpolatedFrameResources>> interpolate_resources;  // Under <queue_lock>.
+       static constexpr size_t num_interpolate_slots = 15;  // Should be larger than Player::max_queued_frames, or we risk mass-dropping frames.
+
+       struct IFRReleaser {
+               void operator() (InterpolatedFrameResources *ifr) const
+               {
+                       if (ifr != nullptr) {
+                               std::unique_lock<std::mutex> lock(ifr->owner->queue_lock);
+                               ifr->owner->interpolate_resources.emplace_back(ifr);
+                       }
+               }
+       };
+       using BorrowedInterpolatedFrameResources = std::unique_ptr<InterpolatedFrameResources, IFRReleaser>;
+
+       struct QueuedFrame {
+               std::chrono::steady_clock::time_point local_pts;
+
+               int64_t output_pts;
+               enum Type { ORIGINAL, FADED, INTERPOLATED, FADED_INTERPOLATED, REFRESH } type;
+               FrameOnDisk frame1;  // The only frame for original frames.
+
+               // For fades only (including fades against interpolated frames).
+               FrameOnDisk secondary_frame;
+
+               // For interpolated frames only.
+               FrameOnDisk frame2;
+               float alpha;
+               BorrowedInterpolatedFrameResources resources;
+               RefCountedGLsync fence;  // Set when the interpolated image is read back to the CPU.
+               GLuint flow_tex, output_tex, cbcr_tex;  // Released in the receiving thread; not really used for anything else.
+               FrameOnDisk id;
+
+               std::function<void()> display_func;  // Called when the image is done decoding.
+               std::function<void(std::shared_ptr<Frame>)> display_decoded_func;  // Same, except for INTERPOLATED and FADED_INTERPOLATED.
+
+               QueueSpotHolder queue_spot_holder;
+       };
+       std::deque<QueuedFrame> frame_queue;  // Under <queue_lock>.
+       std::condition_variable queue_changed;
+
+       std::unique_ptr<Mux> stream_mux;  // To HTTP.
+       std::string stream_mux_header;
+       bool seen_sync_markers = false;
+
+       std::unique_ptr<YCbCrConverter> ycbcr_converter;
+       std::unique_ptr<YCbCrConverter> ycbcr_semiplanar_converter;
+
+       // Frame interpolation.
+       std::unique_ptr<DISComputeFlow> compute_flow;
+       std::unique_ptr<Interpolate> interpolate, interpolate_no_split;
+       std::unique_ptr<ChromaSubsampler> chroma_subsampler;
+
+       std::vector<uint8_t> last_frame;
+};
+
+#endif  // !defined(_VIDEO_STREAM_H)
diff --git a/futatabi/vis.cpp b/futatabi/vis.cpp
new file mode 100644 (file)
index 0000000..c67a0cc
--- /dev/null
@@ -0,0 +1,35 @@
+// Visualize a .flo file.
+
+#include "util.h"
+
+#include <assert.h>
+#include <memory>
+#include <stdio.h>
+
+using namespace std;
+
+int main(int argc, char **argv)
+{
+       if (argc != 3) {
+               fprintf(stderr, "Usage: ./vis input.flo out.ppm\n");
+               exit(1);
+       }
+
+       Flow flow = read_flow(argv[1]);
+
+       FILE *fp = fopen(argv[2], "wb");
+       fprintf(fp, "P6\n%d %d\n255\n", flow.width, flow.height);
+       for (unsigned y = 0; y < unsigned(flow.height); ++y) {
+               for (unsigned x = 0; x < unsigned(flow.width); ++x) {
+                       float du = flow.flow[y * flow.width + x].du;
+                       float dv = flow.flow[y * flow.width + x].dv;
+
+                       uint8_t r, g, b;
+                       flow2rgb(du, dv, &r, &g, &b);
+                       putc(r, fp);
+                       putc(g, fp);
+                       putc(b, fp);
+               }
+       }
+       fclose(fp);
+}
diff --git a/futatabi/vs.vert b/futatabi/vs.vert
new file mode 100644 (file)
index 0000000..61ad91a
--- /dev/null
@@ -0,0 +1,20 @@
+#version 450 core
+#extension GL_ARB_shader_viewport_layer_array : require
+
+layout(location=0) in vec2 position;
+out vec3 tc;
+
+void main()
+{
+       // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+       //
+       //   2.000  0.000  0.000 -1.000
+       //   0.000  2.000  0.000 -1.000
+       //   0.000  0.000 -2.000 -1.000
+       //   0.000  0.000  0.000  1.000
+       gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
+       tc.xy = position;
+       tc.z = gl_InstanceID;
+
+       gl_Layer = gl_InstanceID;
+}
diff --git a/futatabi/ycbcr_converter.cpp b/futatabi/ycbcr_converter.cpp
new file mode 100644 (file)
index 0000000..694ba97
--- /dev/null
@@ -0,0 +1,189 @@
+#include "ycbcr_converter.h"
+
+#include "jpeg_frame.h"
+
+#include <movit/mix_effect.h>
+#include <movit/ycbcr_input.h>
+
+using namespace std;
+using namespace movit;
+
+namespace {
+
+void setup_outputs(YCbCrConverter::OutputMode output_mode, const ImageFormat &output_format, const YCbCrFormat &ycbcr_output_format, EffectChain *chain)
+{
+       if (output_mode == YCbCrConverter::OUTPUT_TO_RGBA) {
+               chain->add_output(output_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED);
+               chain->set_output_origin(OUTPUT_ORIGIN_BOTTOM_LEFT);
+       } else if (output_mode == YCbCrConverter::OUTPUT_TO_SEMIPLANAR) {
+               chain->add_ycbcr_output(output_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, ycbcr_output_format, YCBCR_OUTPUT_SPLIT_Y_AND_CBCR);
+               chain->set_output_origin(OUTPUT_ORIGIN_TOP_LEFT);
+       } else {
+               assert(output_mode == YCbCrConverter::OUTPUT_TO_DUAL_YCBCR);
+
+               // One full Y'CbCr texture (for interpolation), one that's just Y (throwing away the
+               // Cb and Cr channels). The second copy is sort of redundant, but it's the easiest way
+               // of getting the gray data into a layered texture.
+               chain->add_ycbcr_output(output_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, ycbcr_output_format);
+               chain->add_ycbcr_output(output_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, ycbcr_output_format);
+               chain->set_output_origin(OUTPUT_ORIGIN_TOP_LEFT);
+       }
+}
+
+}  // namespace
+
+YCbCrConverter::YCbCrConverter(YCbCrConverter::OutputMode output_mode, ResourcePool *resource_pool)
+{
+       ImageFormat inout_format;
+       inout_format.color_space = COLORSPACE_sRGB;
+       inout_format.gamma_curve = GAMMA_sRGB;
+
+       ycbcr_format.luma_coefficients = YCBCR_REC_709;
+       ycbcr_format.num_levels = 256;
+       ycbcr_format.chroma_subsampling_x = 2;
+       ycbcr_format.chroma_subsampling_y = 1;
+       ycbcr_format.cb_x_position = 0.0f;  // H.264 -- _not_ JPEG, even though our input is MJPEG-encoded
+       ycbcr_format.cb_y_position = 0.5f;  // Irrelevant.
+       ycbcr_format.cr_x_position = 0.0f;
+       ycbcr_format.cr_y_position = 0.5f;
+
+       // This is a hack. Even though we're sending MJPEG around, which is
+       // full-range, it's mostly transporting signals from limited-range
+       // sources with no conversion, so we ought to have had false here.
+       // However, in the off chance that we're actually getting real MJPEG,
+       // we don't want to crush its blacks (or whites) by clamping. All of
+       // our processing is fades, so if we're in limited-range input, we'll
+       // stay in limited-range output. (Fading between limited-range and
+       // full-range sources will be broken, of course.) There will be some
+       // slight confusion in the parts of the algorithms dealing with RGB,
+       // but they're small and we'll manage.
+       ycbcr_format.full_range = true;
+
+       YCbCrFormat ycbcr_output_format = ycbcr_format;
+       ycbcr_output_format.chroma_subsampling_x = 1;
+
+       // Planar Y'CbCr decoding chain.
+       planar_chain.reset(new EffectChain(1280, 720, resource_pool));
+       ycbcr_planar_input = (YCbCrInput *)planar_chain->add_input(new YCbCrInput(inout_format, ycbcr_format, 1280, 720, YCBCR_INPUT_PLANAR));
+       setup_outputs(output_mode, inout_format, ycbcr_output_format, planar_chain.get());
+       planar_chain->set_dither_bits(8);
+       planar_chain->finalize();
+
+       // Semiplanar Y'CbCr decoding chain (for images coming from VA-API).
+       semiplanar_chain.reset(new EffectChain(1280, 720, resource_pool));
+       ycbcr_semiplanar_input = (YCbCrInput *)semiplanar_chain->add_input(new YCbCrInput(inout_format, ycbcr_format, 1280, 720, YCBCR_INPUT_SPLIT_Y_AND_CBCR));
+       setup_outputs(output_mode, inout_format, ycbcr_output_format, semiplanar_chain.get());
+       semiplanar_chain->set_dither_bits(8);
+       semiplanar_chain->finalize();
+
+       // Fade chains.
+       for (bool first_input_is_semiplanar : { false, true }) {
+               for (bool second_input_is_semiplanar : { false, true }) {
+                       FadeChain &fade_chain = fade_chains[first_input_is_semiplanar][second_input_is_semiplanar];
+                       fade_chain.chain.reset(new EffectChain(1280, 720, resource_pool));
+                       fade_chain.input[0] = (movit::YCbCrInput *)fade_chain.chain->add_input(
+                               new YCbCrInput(inout_format, ycbcr_format, 1280, 720,
+                                       first_input_is_semiplanar ? YCBCR_INPUT_SPLIT_Y_AND_CBCR : YCBCR_INPUT_PLANAR));
+                       fade_chain.input[1] = (movit::YCbCrInput *)fade_chain.chain->add_input(
+                               new YCbCrInput(inout_format, ycbcr_format, 1280, 720,
+                                       second_input_is_semiplanar ? YCBCR_INPUT_SPLIT_Y_AND_CBCR : YCBCR_INPUT_PLANAR));
+                       fade_chain.mix_effect = (movit::MixEffect *)fade_chain.chain->add_effect(
+                               new MixEffect, fade_chain.input[0], fade_chain.input[1]);
+                       setup_outputs(output_mode, inout_format, ycbcr_output_format, fade_chain.chain.get());
+                       fade_chain.chain->set_dither_bits(8);
+                       fade_chain.chain->finalize();
+               }
+       }
+
+       // Fade from interleaved chain (ie., first input is interleaved, since it comes
+       // directly from the GPU anyway).
+       for (bool second_input_is_semiplanar : { false, true }) {
+               FadeChain &fade_chain = interleaved_fade_chains[second_input_is_semiplanar];
+               fade_chain.chain.reset(new EffectChain(1280, 720, resource_pool));
+
+               ycbcr_format.chroma_subsampling_x = 1;
+               fade_chain.input[0] = (movit::YCbCrInput *)fade_chain.chain->add_input(
+                       new YCbCrInput(inout_format, ycbcr_format, 1280, 720,
+                               YCBCR_INPUT_INTERLEAVED));
+
+               ycbcr_format.chroma_subsampling_x = 2;
+               fade_chain.input[1] = (movit::YCbCrInput *)fade_chain.chain->add_input(
+                       new YCbCrInput(inout_format, ycbcr_format, 1280, 720,
+                               second_input_is_semiplanar ? YCBCR_INPUT_SPLIT_Y_AND_CBCR : YCBCR_INPUT_PLANAR));
+
+               fade_chain.mix_effect = (movit::MixEffect *)fade_chain.chain->add_effect(
+                       new MixEffect, fade_chain.input[0], fade_chain.input[1]);
+               setup_outputs(output_mode, inout_format, ycbcr_output_format, fade_chain.chain.get());
+               fade_chain.chain->set_dither_bits(8);
+               fade_chain.chain->finalize();
+       }
+}
+
+EffectChain *YCbCrConverter::prepare_chain_for_conversion(shared_ptr<Frame> frame)
+{
+       if (frame->is_semiplanar) {
+               setup_input_for_frame(frame, ycbcr_format, ycbcr_semiplanar_input);
+               return semiplanar_chain.get();
+       } else {
+               setup_input_for_frame(frame, ycbcr_format, ycbcr_planar_input);
+               return planar_chain.get();
+       }
+}
+
+EffectChain *YCbCrConverter::prepare_chain_for_fade(shared_ptr<Frame> frame, shared_ptr<Frame> secondary_frame, float fade_alpha)
+{
+       const FadeChain &fade_chain = fade_chains[frame->is_semiplanar][secondary_frame->is_semiplanar];
+       setup_input_for_frame(frame, ycbcr_format, fade_chain.input[0]);
+       setup_input_for_frame(secondary_frame, ycbcr_format, fade_chain.input[1]);
+       bool ok = fade_chain.mix_effect->set_float("strength_first", 1.0f - fade_alpha);
+       ok |= fade_chain.mix_effect->set_float("strength_second", fade_alpha);
+       assert(ok);
+       return fade_chain.chain.get();
+}
+
+EffectChain *YCbCrConverter::prepare_chain_for_fade_from_texture(GLuint tex, std::shared_ptr<Frame> secondary_frame, float fade_alpha)
+{
+       const FadeChain &fade_chain = interleaved_fade_chains[secondary_frame->is_semiplanar];
+       {
+               YCbCrFormat format_copy = ycbcr_format;
+               format_copy.chroma_subsampling_x = 1;
+               format_copy.chroma_subsampling_y = 1;
+               fade_chain.input[0]->change_ycbcr_format(format_copy);
+
+               fade_chain.input[0]->set_width(1280);  // FIXME
+               fade_chain.input[0]->set_height(720);
+               fade_chain.input[0]->set_texture_num(0, tex);
+
+               glTextureParameteri(tex, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+               glTextureParameteri(tex, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+               glTextureParameteri(tex, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
+               glTextureParameteri(tex, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
+       }
+       setup_input_for_frame(secondary_frame, ycbcr_format, fade_chain.input[1]);
+       bool ok = fade_chain.mix_effect->set_float("strength_first", 1.0f - fade_alpha);
+       ok |= fade_chain.mix_effect->set_float("strength_second", fade_alpha);
+       assert(ok);
+       return fade_chain.chain.get();
+}
+
+void setup_input_for_frame(shared_ptr<Frame> frame, const YCbCrFormat &ycbcr_format, YCbCrInput *input)
+{
+       YCbCrFormat format_copy = ycbcr_format;
+       format_copy.chroma_subsampling_x = frame->chroma_subsampling_x;
+       format_copy.chroma_subsampling_y = frame->chroma_subsampling_y;
+       input->change_ycbcr_format(format_copy);
+
+       input->set_width(frame->width);
+       input->set_height(frame->height);
+       input->set_pixel_data(0, frame->y.get());
+       input->set_pitch(0, frame->pitch_y);
+       if (frame->is_semiplanar) {
+               input->set_pixel_data(1, frame->cbcr.get());
+               input->set_pitch(1, frame->pitch_chroma);
+       } else {
+               input->set_pixel_data(1, frame->cb.get());
+               input->set_pixel_data(2, frame->cr.get());
+               input->set_pitch(1, frame->pitch_chroma);
+               input->set_pitch(2, frame->pitch_chroma);
+       }
+}
diff --git a/futatabi/ycbcr_converter.h b/futatabi/ycbcr_converter.h
new file mode 100644 (file)
index 0000000..459377c
--- /dev/null
@@ -0,0 +1,60 @@
+#ifndef _YCBCR_CONVERTER_H
+#define _YCBCR_CONVERTER_H 1
+
+#include <epoxy/gl.h>
+#include <memory>
+#include <movit/ycbcr_input.h>
+
+namespace movit {
+
+class EffectChain;
+class MixEffect;
+class ResourcePool;
+struct YCbCrFormat;
+
+}  // namespace movit
+
+struct Frame;
+
+class YCbCrConverter {
+public:
+       enum OutputMode {
+               OUTPUT_TO_RGBA,         // One texture (bottom-left origin): RGBA
+               OUTPUT_TO_SEMIPLANAR,   // Two textures (top-left origin):   Y, CbCr
+               OUTPUT_TO_DUAL_YCBCR    // Two textures (top-left origin):   Y'CbCr, Y'CbCr
+       };
+       YCbCrConverter(OutputMode output_mode, movit::ResourcePool *resource_pool);
+
+       // Returns the appropriate chain for rendering.
+       movit::EffectChain *prepare_chain_for_conversion(std::shared_ptr<Frame> frame);
+       movit::EffectChain *prepare_chain_for_fade(std::shared_ptr<Frame> frame, std::shared_ptr<Frame> secondary_frame, float fade_alpha);
+
+       // <tex> must be interleaved Y'CbCr.
+       movit::EffectChain *prepare_chain_for_fade_from_texture(GLuint tex, std::shared_ptr<Frame> secondary_frame, float fade_alpha);
+
+private:
+       movit::YCbCrFormat ycbcr_format;
+
+       // Effectively only converts from 4:2:2 to 4:4:4.
+       // TODO: Have a separate version with ResampleEffect, for scaling?
+       std::unique_ptr<movit::EffectChain> planar_chain, semiplanar_chain;
+       movit::YCbCrInput *ycbcr_planar_input, *ycbcr_semiplanar_input;
+
+       // These do fades, parametrized on whether the two inputs are planar
+       // or semiplanar.
+       struct FadeChain {
+               std::unique_ptr<movit::EffectChain> chain;
+               movit::YCbCrInput *input[2];
+               movit::MixEffect *mix_effect;
+       };
+       FadeChain fade_chains[2][2];
+
+       // These do fades, where the first input is interleaved and the second is
+       // either planar or semiplanar.
+       FadeChain interleaved_fade_chains[2];
+};
+
+// TODO: make private
+void setup_input_for_frame(std::shared_ptr<Frame> frame, const movit::YCbCrFormat &ycbcr_format, movit::YCbCrInput *input);
+
+#endif  // !defined(_YCBCR_CONVERTER_H)
index d35dcb5661a7c0b3a66d880a87003f79de3d131c..296615e1ec7632879505ddebdfe45a2080088a81 100644 (file)
@@ -1,55 +1,21 @@
 project('nageru', 'cpp', default_options: ['buildtype=debugoptimized'])
-qt5 = import('qt5')
-protoc = find_program('protoc')
+
 cxx = meson.get_compiler('cpp')
 
 # Use lld if we can; it links a lot faster than ld.bfd or gold.
-nageru_link_args = []
 code = '''#include <stdio.h>
 int main() { printf("Hello, world!\n"); return 0; }
 '''
 if cxx.links(code, args: '-fuse-ld=lld', name: 'check for LLD')
-       nageru_link_args += '-fuse-ld=lld'
+       add_project_link_arguments('-fuse-ld=lld')
 endif
 
-embedded_bmusb = get_option('embedded_bmusb')
-
-alsadep = dependency('alsa')
-bmusbdep = dependency('bmusb', required: not embedded_bmusb)
-dldep = cxx.find_library('dl')
-epoxydep = dependency('epoxy')
-libavcodecdep = dependency('libavcodec')
-libavformatdep = dependency('libavformat')
-libavresampledep = dependency('libavresample')
-libavutildep = dependency('libavutil')
-libjpegdep = dependency('libjpeg')
-libmicrohttpddep = dependency('libmicrohttpd')
-libswscaledep = dependency('libswscale')
-libusbdep = dependency('libusb-1.0')
-luajitdep = dependency('luajit')
-movitdep = dependency('movit')
-protobufdep = dependency('protobuf')
-qcustomplotdep = cxx.find_library('qcustomplot')
-qt5deps = dependency('qt5', modules: ['Core', 'Gui', 'Widgets', 'OpenGLExtensions', 'OpenGL', 'PrintSupport'])
-threaddep = dependency('threads')
-vadrmdep = dependency('libva-drm')
-vax11dep = dependency('libva-x11')
-x11dep = dependency('x11')
-x264dep = dependency('x264')
-zitaresamplerdep = cxx.find_library('zita-resampler')
-
-srcs = []
-nageru_deps = [qt5deps, libjpegdep, movitdep, libmicrohttpddep, protobufdep,
-       vax11dep, vadrmdep, x11dep, libavformatdep, libavresampledep, libavcodecdep, libavutildep,
-       libswscaledep, libusbdep, luajitdep, dldep, x264dep, alsadep, zitaresamplerdep,
-       qcustomplotdep, threaddep]
-nageru_include_dirs = []
-nageru_link_with = []
-nageru_build_rpath = ''
-nageru_install_rpath = ''
-
-kaeru_link_with = []
-kaeru_extra_deps = []
+# Add the right MOVIT_SHADER_DIR definition.
+r = run_command('pkg-config', '--variable=shaderdir', 'movit')
+if r.returncode() != 0
+       error('Movit pkg-config installation is broken.')
+endif
+add_project_arguments('-DMOVIT_SHADER_DIR="' + r.stdout().strip() + '"', language: 'cpp')
 
 # DeckLink has these issues, and we include it from various places.
 if cxx.has_argument('-Wno-non-virtual-dtor')
@@ -62,191 +28,13 @@ if cxx.has_argument('-Wno-deprecated-declarations')
        add_project_arguments('-Wno-deprecated-declarations', language: 'cpp')
 endif
 
-# Add the right MOVIT_SHADER_DIR definition.
-r = run_command('pkg-config', '--variable=shaderdir', 'movit')
-if r.returncode() != 0
-       error('Movit pkg-config installation is broken.')
-endif
-add_project_arguments('-DMOVIT_SHADER_DIR="' + r.stdout().strip() + '"', language: 'cpp')
-
-# CEF.
-exe_dir = join_paths(get_option('prefix'), 'lib/nageru')
-cef_dir = get_option('cef_dir')
-cef_build_type = get_option('cef_build_type')
-have_cef = (cef_dir != '')
-if have_cef
+# This needs to be done before declaring any build targets.
+if get_option('cef_dir') != ''
        add_project_arguments('-DHAVE_CEF=1', language: 'cpp')
-
-       system_cef = (cef_build_type == 'system')
-       if system_cef
-               cef_lib_dir = cef_dir
-               cef_resource_dir = '/usr/share/cef/Resources'
-       else
-               cef_lib_dir = join_paths(cef_dir, cef_build_type)
-               cef_resource_dir = join_paths(cef_dir, 'Resources')
-
-               nageru_include_dirs += include_directories(cef_dir)
-               nageru_include_dirs += include_directories(join_paths(cef_dir, 'include'))
-               nageru_build_rpath = cef_lib_dir
-               nageru_install_rpath = '$ORIGIN/'
-       endif
-
-       cefdep = cxx.find_library('cef')
-       nageru_deps += cefdep
-
-       # CEF wrapper library; not built as part of the CEF binary distribution,
-       # but should be if CEF is installed as a system library.
-       if system_cef
-               cefdlldep = cxx.find_library('cef_dll_wrapper')
-               nageru_deps += cefdlldep
-       else
-               cmake = find_program('cmake')
-               cef_compile_script = find_program('scripts/compile_cef_dll_wrapper.sh')
-
-               cef_dll_target = custom_target('libcef_dll_wrapper',
-                       input: join_paths(cef_dir, 'libcef_dll/CMakeLists.txt'),
-                       output: ['libcef_dll_wrapper.a', 'cef-stamp'],
-                       command: [cef_compile_script, '@BUILD_DIR@', cef_dir, cmake, '@OUTPUT@'])
-
-               # Putting the .a in sources seemingly hits a bug where the .a files get sorted
-               # in the wrong order. This is a workaround; see
-               # https://github.com/mesonbuild/meson/issues/3613#issuecomment-408276296 .
-               cefdlldep = declare_dependency(sources: cef_dll_target[1], link_args: cef_dll_target.full_path())
-               nageru_deps += cefdlldep
-       endif
-
-       cef_libs = ['libEGL.so', 'libGLESv2.so', 'natives_blob.bin', 'snapshot_blob.bin', 'v8_context_snapshot.bin']
-       cef_resources = ['cef.pak', 'cef_100_percent.pak', 'cef_200_percent.pak', 'cef_extensions.pak', 'devtools_resources.pak']
-       if not get_option('cef_no_icudtl')
-               cef_resources += ['icudtl.dat']
-       endif
-       if cef_build_type != 'system'
-               cef_libs += ['libcef.so']
-       endif
-
-       # Symlink the files into the build directory, so that running nageru without ninja install works.
-       run_command('mkdir', join_paths(meson.current_build_dir(), 'locales/'))
-       foreach file : cef_libs
-               run_command('ln', '-s', join_paths(cef_lib_dir, file), meson.current_build_dir())
-               install_data(join_paths(cef_lib_dir, file), install_dir: exe_dir)
-       endforeach
-       foreach file : cef_resources
-               run_command('ln', '-s', join_paths(cef_resource_dir, file), meson.current_build_dir())
-               install_data(join_paths(cef_resource_dir, file), install_dir: exe_dir)
-       endforeach
-       run_command('ln', '-s', join_paths(cef_resource_dir, 'locales/en-US.pak'), join_paths(meson.current_build_dir(), 'locales/'))
-       install_data(join_paths(cef_resource_dir, 'locales/en-US.pak'), install_dir: join_paths(exe_dir, 'locales'))
 endif
 
-# bmusb.
-if embedded_bmusb
-       bmusb_dir = include_directories('bmusb')
-       nageru_include_dirs += bmusb_dir
-
-       bmusb = static_library('bmusb', 'bmusb/bmusb.cpp', 'bmusb/fake_capture.cpp',
-               dependencies: [libusbdep],
-               include_directories: [bmusb_dir])
-       nageru_link_with += bmusb
-       kaeru_link_with += bmusb
-else
-       nageru_deps += bmusbdep
-       kaeru_extra_deps += bmusbdep
-endif
-
-# Protobuf compilation.
-gen = generator(protoc, \
-       output    : ['@BASENAME@.pb.cc', '@BASENAME@.pb.h'],
-       arguments : ['--proto_path=@CURRENT_SOURCE_DIR@', '--cpp_out=@BUILD_DIR@', '@INPUT@'])
-proto_generated = gen.process(['state.proto', 'midi_mapping.proto', 'json.proto'])
-protobuf_lib = static_library('protobufs', proto_generated, dependencies: nageru_deps, include_directories: nageru_include_dirs)
-protobuf_hdrs = declare_dependency(sources: proto_generated)
-nageru_link_with += protobuf_lib
-
-# Preprocess Qt as needed.
-qt_files = qt5.preprocess(
-       moc_headers: ['aboutdialog.h', 'analyzer.h', 'clickable_label.h', 'compression_reduction_meter.h', 'correlation_meter.h',
-               'ellipsis_label.h', 'glwidget.h', 'input_mapping_dialog.h', 'lrameter.h', 'mainwindow.h', 'midi_mapping_dialog.h',
-               'nonlinear_fader.h', 'vumeter.h'],
-       ui_files: ['aboutdialog.ui', 'analyzer.ui', 'audio_expanded_view.ui', 'audio_miniview.ui', 'display.ui',
-               'input_mapping.ui', 'mainwindow.ui', 'midi_mapping.ui'],
-       dependencies: qt5deps)
-
-# Qt objects.
-srcs += ['glwidget.cpp', 'mainwindow.cpp', 'vumeter.cpp', 'lrameter.cpp', 'compression_reduction_meter.cpp',
-       'correlation_meter.cpp', 'aboutdialog.cpp', 'analyzer.cpp', 'input_mapping_dialog.cpp', 'midi_mapping_dialog.cpp',
-       'nonlinear_fader.cpp', 'context_menus.cpp', 'vu_common.cpp', 'piecewise_interpolator.cpp', 'midi_mapper.cpp']
-
-# Auxiliary objects used for nearly everything.
-aux_srcs = ['metrics.cpp', 'flags.cpp']
-aux = static_library('aux', aux_srcs, dependencies: nageru_deps, include_directories: nageru_include_dirs)
-nageru_link_with += aux
-
-# Audio objects.
-audio_mixer_srcs = ['audio_mixer.cpp', 'alsa_input.cpp', 'alsa_pool.cpp', 'ebu_r128_proc.cc', 'stereocompressor.cpp',
-       'resampling_queue.cpp', 'flags.cpp', 'correlation_measurer.cpp', 'filter.cpp', 'input_mapping.cpp']
-audio = static_library('audio', audio_mixer_srcs, dependencies: [nageru_deps, protobuf_hdrs], include_directories: nageru_include_dirs)
-nageru_link_with += audio
-
-# Mixer objects.
-srcs += ['chroma_subsampler.cpp', 'v210_converter.cpp', 'mixer.cpp', 'pbo_frame_allocator.cpp',
-       'context.cpp', 'theme.cpp', 'image_input.cpp', 'alsa_output.cpp',
-       'disk_space_estimator.cpp', 'timecode_renderer.cpp', 'tweaked_inputs.cpp', 'mjpeg_encoder.cpp']
-
-# Streaming and encoding objects (largely the set that is shared between Nageru and Kaeru).
-stream_srcs = ['quicksync_encoder.cpp', 'x264_encoder.cpp', 'x264_dynamic.cpp', 'x264_speed_control.cpp', 'video_encoder.cpp',
-       'metacube2.cpp', 'mux.cpp', 'audio_encoder.cpp', 'ffmpeg_raii.cpp', 'ffmpeg_util.cpp', 'httpd.cpp', 'ffmpeg_capture.cpp',
-       'print_latency.cpp', 'basic_stats.cpp', 'ref_counted_frame.cpp']
-stream = static_library('stream', stream_srcs, dependencies: nageru_deps, include_directories: nageru_include_dirs)
-nageru_link_with += stream
-
-# DeckLink.
-srcs += ['decklink_capture.cpp', 'decklink_util.cpp', 'decklink_output.cpp', 'memcpy_interleaved.cpp',
-       'decklink/DeckLinkAPIDispatch.cpp']
-decklink_dir = include_directories('decklink')
-nageru_include_dirs += decklink_dir
-
-# CEF input.
-if have_cef
-       srcs += ['nageru_cef_app.cpp', 'cef_capture.cpp']
-endif
-
-srcs += qt_files
-srcs += proto_generated
-
-# Everything except main.cpp. (We do this because if you specify a .cpp file in
-# both Nageru and Kaeru, it gets compiled twice. In the older Makefiles, Kaeru
-# depended on a smaller set of objects.)
-core = static_library('core', srcs, dependencies: nageru_deps, include_directories: nageru_include_dirs)
-nageru_link_with += core
-
-# Nageru executable; it goes into /usr/lib/nageru since CEF files go there, too
-# (we can't put them straight into /usr/bin).
-executable('nageru', 'main.cpp',
-       dependencies: nageru_deps,
-       include_directories: nageru_include_dirs,
-       link_with: nageru_link_with,
-       link_args: nageru_link_args,
-       build_rpath: nageru_build_rpath,
-       install_rpath: nageru_install_rpath,
-       install: true,
-       install_dir: exe_dir
-)
-meson.add_install_script('scripts/setup_nageru_symlink.sh')
-
-# Kaeru executable.
-executable('kaeru', 'kaeru.cpp',
-       dependencies: [nageru_deps, kaeru_extra_deps],
-       include_directories: nageru_include_dirs,
-       link_with: [stream, aux, kaeru_link_with],
-       link_args: nageru_link_args,
-       install: true)
-
-# Audio mixer microbenchmark.
-executable('benchmark_audio_mixer', 'benchmark_audio_mixer.cpp', dependencies: nageru_deps, include_directories: nageru_include_dirs, link_args: nageru_link_args, link_with: [audio, aux])
+top_include = include_directories('.')
 
-# These are needed for a default run.
-data_files = ['theme.lua', 'simple.lua', 'bg.jpeg', 'akai_midimix.midimapping']
-install_data(data_files, install_dir: join_paths(get_option('prefix'), 'share/nageru'))
-foreach file : data_files
-       run_command('ln', '-s', join_paths(meson.current_source_dir(), file), meson.current_build_dir())
-endforeach
+subdir('shared')
+subdir('nageru')
+subdir('futatabi')
similarity index 100%
rename from aboutdialog.cpp
rename to nageru/aboutdialog.cpp
similarity index 100%
rename from aboutdialog.h
rename to nageru/aboutdialog.h
similarity index 99%
rename from aboutdialog.ui
rename to nageru/aboutdialog.ui
index 7035dc6043602bce644371fcd14eb89187073e0d..57caf43bcb9cc4cea93923f1087137ccda5d4950 100644 (file)
@@ -17,7 +17,7 @@
    <item>
     <widget class="QLabel" name="label">
      <property name="text">
-      <string>&lt;p&gt;&lt;b&gt;Nageru 1.7.4&lt;/b&gt;&lt;/p&gt;
+      <string>&lt;p&gt;&lt;b&gt;Nageru 1.7.5&lt;/b&gt;&lt;/p&gt;
 
 &lt;p&gt;Realtime video mixer&lt;/p&gt;</string>
      </property>
similarity index 99%
rename from alsa_input.cpp
rename to nageru/alsa_input.cpp
index 08a67f7fa7837ccabeb18a414eeaacf61d404bb3..90a440f68bd84c1e65be1cc6a1609df2501c9999 100644 (file)
@@ -9,7 +9,7 @@
 
 #include "alsa_pool.h"
 #include "bmusb/bmusb.h"
-#include "timebase.h"
+#include "shared/timebase.h"
 
 using namespace std;
 using namespace std::chrono;
similarity index 100%
rename from alsa_input.h
rename to nageru/alsa_input.h
similarity index 100%
rename from alsa_output.cpp
rename to nageru/alsa_output.cpp
similarity index 100%
rename from alsa_output.h
rename to nageru/alsa_output.h
similarity index 100%
rename from alsa_pool.cpp
rename to nageru/alsa_pool.cpp
similarity index 100%
rename from alsa_pool.h
rename to nageru/alsa_pool.h
similarity index 99%
rename from analyzer.cpp
rename to nageru/analyzer.cpp
index b24b46a26ff52457ae3893f7cbf6b1253a1b2f24..bdb80bc31b52c46d98ee4540220c606e706fd594 100644 (file)
@@ -9,7 +9,7 @@
 #include <movit/resource_pool.h>
 #include <movit/util.h>
 
-#include "context.h"
+#include "shared/context.h"
 #include "flags.h"
 #include "mixer.h"
 #include "ui_analyzer.h"
similarity index 100%
rename from analyzer.h
rename to nageru/analyzer.h
similarity index 100%
rename from analyzer.ui
rename to nageru/analyzer.ui
similarity index 99%
rename from audio_encoder.cpp
rename to nageru/audio_encoder.cpp
index e33d2181b46a5465a648bceead3636ba9872b535..f9faf42c349fdd926eb9f54263eb2f04d81240c7 100644 (file)
@@ -22,8 +22,8 @@ extern "C" {
 #include <vector>
 
 #include "defs.h"
-#include "mux.h"
-#include "timebase.h"
+#include "shared/mux.h"
+#include "shared/timebase.h"
 
 using namespace std;
 
similarity index 97%
rename from audio_encoder.h
rename to nageru/audio_encoder.h
index 93adbafce954d6188f3d5ca86770ff7dace53f6c..d28d7eee3c04ff1d0ee89ac38e62cdae22260402 100644 (file)
@@ -15,7 +15,7 @@ extern "C" {
 #include <libavutil/frame.h>
 }
 
-#include "ffmpeg_raii.h"
+#include "shared/ffmpeg_raii.h"
 
 class Mux;
 
similarity index 100%
rename from audio_miniview.ui
rename to nageru/audio_miniview.ui
similarity index 99%
rename from audio_mixer.cpp
rename to nageru/audio_mixer.cpp
index 9e7dd59a0dbc64ab6398e30824c8dae6d676b912..f63fe242460fcfbc1fa51f3b5f23809d627e68ab 100644 (file)
 #include <limits>
 #include <utility>
 
-#include "db.h"
+#include "decibel.h"
 #include "flags.h"
-#include "metrics.h"
+#include "shared/metrics.h"
 #include "state.pb.h"
-#include "timebase.h"
+#include "shared/timebase.h"
 
 using namespace bmusb;
 using namespace std;
similarity index 99%
rename from audio_mixer.h
rename to nageru/audio_mixer.h
index 9793646c9bac3493651e39643a610c7df75c5d0f..a7ab9a5a0d60378e632fbd34a7105865c05b0ef0 100644 (file)
@@ -23,7 +23,7 @@
 
 #include "alsa_pool.h"
 #include "correlation_measurer.h"
-#include "db.h"
+#include "decibel.h"
 #include "defs.h"
 #include "ebu_r128_proc.h"
 #include "filter.h"
similarity index 99%
rename from basic_stats.cpp
rename to nageru/basic_stats.cpp
index 937e3021c5cdf1188a23c4b086d90e894bb75b01..9e07a1c417bfce08d64e703c7b1d0f127bd48199 100644 (file)
@@ -1,5 +1,5 @@
 #include "basic_stats.h"
-#include "metrics.h"
+#include "shared/metrics.h"
 
 #include <assert.h>
 #include <sys/resource.h>
similarity index 100%
rename from basic_stats.h
rename to nageru/basic_stats.h
similarity index 99%
rename from benchmark_audio_mixer.cpp
rename to nageru/benchmark_audio_mixer.cpp
index b47c3405eb7002fe0ec5001d93a60059eedcf42e..7ec3450dc60d942d617cbfeff0210f58a9b4ddc5 100644 (file)
 #include <vector>
 
 #include "audio_mixer.h"
-#include "db.h"
+#include "decibel.h"
 #include "defs.h"
 #include "input_mapping.h"
 #include "resampling_queue.h"
-#include "timebase.h"
+#include "shared/timebase.h"
 
 #define NUM_BENCHMARK_CARDS 4
 #define NUM_WARMUP_FRAMES 100
similarity index 100%
rename from bg.jpeg
rename to nageru/bg.jpeg
diff --git a/nageru/bmusb b/nageru/bmusb
new file mode 160000 (submodule)
index 0000000..5163d25
--- /dev/null
@@ -0,0 +1 @@
+Subproject commit 5163d25c65c3028090db1aea6587ec2fb4cb823e
diff --git a/nageru/cbcr_subsample.frag b/nageru/cbcr_subsample.frag
new file mode 100644 (file)
index 0000000..3714f99
--- /dev/null
@@ -0,0 +1,9 @@
+#version 130
+
+in vec2 tc0, tc1;
+uniform sampler2D cbcr_tex;
+out vec4 FragColor, FragColor2;
+void main() {
+       FragColor = 0.5 * (texture(cbcr_tex, tc0) + texture(cbcr_tex, tc1));
+       FragColor2 = FragColor;
+}
diff --git a/nageru/cbcr_subsample.vert b/nageru/cbcr_subsample.vert
new file mode 100644 (file)
index 0000000..982cb87
--- /dev/null
@@ -0,0 +1,21 @@
+#version 130
+
+in vec2 position;
+in vec2 texcoord;
+out vec2 tc0, tc1;
+uniform vec2 foo_chroma_offset_0;
+uniform vec2 foo_chroma_offset_1;
+
+void main()
+{
+       // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+       //
+       //   2.000  0.000  0.000 -1.000
+       //   0.000  2.000  0.000 -1.000
+       //   0.000  0.000 -2.000 -1.000
+       //   0.000  0.000  0.000  1.000
+       gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
+       vec2 flipped_tc = texcoord;
+       tc0 = flipped_tc + foo_chroma_offset_0;
+       tc1 = flipped_tc + foo_chroma_offset_1;
+};
similarity index 100%
rename from cef_capture.cpp
rename to nageru/cef_capture.cpp
similarity index 100%
rename from cef_capture.h
rename to nageru/cef_capture.h
similarity index 72%
rename from chroma_subsampler.cpp
rename to nageru/chroma_subsampler.cpp
index 96adef1eedb9d360aea76136a17abafa760e8f29..14cb4d29641de6a149bf5a89189ea979bde67dc8 100644 (file)
@@ -7,6 +7,9 @@
 #include <movit/resource_pool.h>
 #include <movit/util.h>
 
+#include "embedded_files.h"
+#include "shared/read_file.h"
+
 using namespace movit;
 using namespace std;
 
@@ -69,37 +72,8 @@ ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool)
        // See also http://www.poynton.com/PDFs/Merging_RGB_and_422.pdf, pages 6–7.
 
        // Cb/Cr shader.
-       string cbcr_vert_shader =
-               "#version 130 \n"
-               " \n"
-               "in vec2 position; \n"
-               "in vec2 texcoord; \n"
-               "out vec2 tc0, tc1; \n"
-               "uniform vec2 foo_chroma_offset_0; \n"
-               "uniform vec2 foo_chroma_offset_1; \n"
-               " \n"
-               "void main() \n"
-               "{ \n"
-               "    // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: \n"
-               "    // \n"
-               "    //   2.000  0.000  0.000 -1.000 \n"
-               "    //   0.000  2.000  0.000 -1.000 \n"
-               "    //   0.000  0.000 -2.000 -1.000 \n"
-               "    //   0.000  0.000  0.000  1.000 \n"
-               "    gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); \n"
-               "    vec2 flipped_tc = texcoord; \n"
-               "    tc0 = flipped_tc + foo_chroma_offset_0; \n"
-               "    tc1 = flipped_tc + foo_chroma_offset_1; \n"
-               "} \n";
-       string cbcr_frag_shader =
-               "#version 130 \n"
-               "in vec2 tc0, tc1; \n"
-               "uniform sampler2D cbcr_tex; \n"
-               "out vec4 FragColor, FragColor2; \n"
-               "void main() { \n"
-               "    FragColor = 0.5 * (texture(cbcr_tex, tc0) + texture(cbcr_tex, tc1)); \n"
-               "    FragColor2 = FragColor; \n"
-               "} \n";
+       string cbcr_vert_shader = read_file("cbcr_subsample.vert", _binary_cbcr_subsample_vert_data, _binary_cbcr_subsample_vert_size);
+       string cbcr_frag_shader = read_file("cbcr_subsample.frag", _binary_cbcr_subsample_frag_data, _binary_cbcr_subsample_frag_size);
        cbcr_program_num = resource_pool->compile_glsl_program(cbcr_vert_shader, cbcr_frag_shader, frag_shader_outputs);
        check_error();
        cbcr_chroma_offset_0_location = get_uniform_location(cbcr_program_num, "foo", "chroma_offset_0");
@@ -115,45 +89,8 @@ ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool)
        check_error();
 
        // Same, for UYVY conversion.
-       string uyvy_vert_shader =
-               "#version 130 \n"
-               " \n"
-               "in vec2 position; \n"
-               "in vec2 texcoord; \n"
-               "out vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1; \n"
-               "uniform vec2 foo_luma_offset_0; \n"
-               "uniform vec2 foo_luma_offset_1; \n"
-               "uniform vec2 foo_chroma_offset_0; \n"
-               "uniform vec2 foo_chroma_offset_1; \n"
-               " \n"
-               "void main() \n"
-               "{ \n"
-               "    // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: \n"
-               "    // \n"
-               "    //   2.000  0.000  0.000 -1.000 \n"
-               "    //   0.000  2.000  0.000 -1.000 \n"
-               "    //   0.000  0.000 -2.000 -1.000 \n"
-               "    //   0.000  0.000  0.000  1.000 \n"
-               "    gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); \n"
-               "    vec2 flipped_tc = texcoord; \n"
-               "    y_tc0 = flipped_tc + foo_luma_offset_0; \n"
-               "    y_tc1 = flipped_tc + foo_luma_offset_1; \n"
-               "    cbcr_tc0 = flipped_tc + foo_chroma_offset_0; \n"
-               "    cbcr_tc1 = flipped_tc + foo_chroma_offset_1; \n"
-               "} \n";
-       string uyvy_frag_shader =
-               "#version 130 \n"
-               "in vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1; \n"
-               "uniform sampler2D y_tex, cbcr_tex; \n"
-               "out vec4 FragColor; \n"
-               "void main() { \n"
-               "    float y0 = texture(y_tex, y_tc0).r; \n"
-               "    float y1 = texture(y_tex, y_tc1).r; \n"
-               "    vec2 cbcr0 = texture(cbcr_tex, cbcr_tc0).rg; \n"
-               "    vec2 cbcr1 = texture(cbcr_tex, cbcr_tc1).rg; \n"
-               "    vec2 cbcr = 0.5 * (cbcr0 + cbcr1); \n"
-               "    FragColor = vec4(cbcr.g, y0, cbcr.r, y1); \n"
-               "} \n";
+       string uyvy_vert_shader = read_file("uyvy_subsample.vert", _binary_uyvy_subsample_vert_data, _binary_uyvy_subsample_vert_size);
+       string uyvy_frag_shader = read_file("uyvy_subsample.frag", _binary_uyvy_subsample_frag_data, _binary_uyvy_subsample_frag_size);
 
        uyvy_program_num = resource_pool->compile_glsl_program(uyvy_vert_shader, uyvy_frag_shader, frag_shader_outputs);
        check_error();
@@ -186,43 +123,7 @@ ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool)
 
        // v210 compute shader.
        if (v210Converter::has_hardware_support()) {
-               string v210_shader_src = R"(#version 150
-#extension GL_ARB_compute_shader : enable
-#extension GL_ARB_shader_image_load_store : enable
-layout(local_size_x=2, local_size_y=16) in;
-layout(r16) uniform restrict readonly image2D in_y;
-uniform sampler2D in_cbcr;  // Of type RG16.
-layout(rgb10_a2) uniform restrict writeonly image2D outbuf;
-uniform float inv_width, inv_height;
-
-void main()
-{
-       int xb = int(gl_GlobalInvocationID.x);  // X block number.
-       int y = int(gl_GlobalInvocationID.y);  // Y (actual line).
-       float yf = (gl_GlobalInvocationID.y + 0.5f) * inv_height;  // Y float coordinate.
-
-       // Load and scale CbCr values, sampling in-between the texels to get
-       // to (left/4 + center/2 + right/4).
-       vec2 pix_cbcr[3];
-       for (int i = 0; i < 3; ++i) {
-               vec2 a = texture(in_cbcr, vec2((xb * 6 + i * 2) * inv_width, yf)).xy;
-               vec2 b = texture(in_cbcr, vec2((xb * 6 + i * 2 + 1) * inv_width, yf)).xy;
-               pix_cbcr[i] = (a + b) * (0.5 * 65535.0 / 1023.0);
-       }
-
-       // Load and scale the Y values. Note that we use integer coordinates here,
-       // so we don't need to offset by 0.5.
-       float pix_y[6];
-       for (int i = 0; i < 6; ++i) {
-               pix_y[i] = imageLoad(in_y, ivec2(xb * 6 + i, y)).x * (65535.0 / 1023.0);
-       }
-
-       imageStore(outbuf, ivec2(xb * 4 + 0, y), vec4(pix_cbcr[0].x, pix_y[0],      pix_cbcr[0].y, 1.0));
-       imageStore(outbuf, ivec2(xb * 4 + 1, y), vec4(pix_y[1],      pix_cbcr[1].x, pix_y[2],      1.0));
-       imageStore(outbuf, ivec2(xb * 4 + 2, y), vec4(pix_cbcr[1].y, pix_y[3],      pix_cbcr[2].x, 1.0));
-       imageStore(outbuf, ivec2(xb * 4 + 3, y), vec4(pix_y[4],      pix_cbcr[2].y, pix_y[5],      1.0));
-}
-)";
+               string v210_shader_src = read_file("v210_subsample.comp", _binary_v210_subsample_comp_data, _binary_v210_subsample_comp_size);
                GLuint shader_num = movit::compile_shader(v210_shader_src, GL_COMPUTE_SHADER);
                check_error();
                v210_program_num = glCreateProgram();
similarity index 100%
rename from clickable_label.h
rename to nageru/clickable_label.h
similarity index 100%
rename from context.h
rename to nageru/context.h
similarity index 100%
rename from context_menus.cpp
rename to nageru/context_menus.cpp
similarity index 100%
rename from context_menus.h
rename to nageru/context_menus.h
similarity index 74%
rename from db.h
rename to nageru/decibel.h
index 53261abd058bd0b80f8881a182a4f93d2e3474d0..acbb5d622c7e109bc3b40fb13b49b1dcf5f94b4c 100644 (file)
--- a/db.h
@@ -1,5 +1,5 @@
-#ifndef _DB_H
-#define _DB_H 1
+#ifndef _DECIBEL_H
+#define _DECIBEL_H 1
 
 // Utility routines for working with decibels.
 
@@ -8,4 +8,4 @@
 static inline double from_db(double db) { return pow(10.0, db / 20.0); }
 static inline double to_db(double val) { return 20.0 * log10(val); }
 
-#endif  // !defined(_DB_H)
+#endif  // !defined(_DECIBEL_H)
similarity index 99%
rename from decklink_capture.cpp
rename to nageru/decklink_capture.cpp
index 0f48e3e77f48ffe18c74175675060b52ea02f50b..ea55b4668f3c83e4b5119c7ad63f91bf0d0ecaf9 100644 (file)
@@ -21,7 +21,7 @@
 #include "bmusb/bmusb.h"
 #include "decklink_util.h"
 #include "flags.h"
-#include "memcpy_interleaved.h"
+#include "shared/memcpy_interleaved.h"
 #include "v210_converter.h"
 
 #define FRAME_SIZE (8 << 20)  // 8 MB.
similarity index 100%
rename from decklink_capture.h
rename to nageru/decklink_capture.h
similarity index 99%
rename from decklink_output.cpp
rename to nageru/decklink_output.cpp
index 28f433a8872945b3191608cc7283eea5fea4669e..bd59b32a5a3abb0eb762c89ed49fcc8991bc75fc 100644 (file)
@@ -12,9 +12,9 @@
 #include "decklink_output.h"
 #include "decklink_util.h"
 #include "flags.h"
-#include "metrics.h"
+#include "shared/metrics.h"
 #include "print_latency.h"
-#include "timebase.h"
+#include "shared/timebase.h"
 #include "v210_converter.h"
 
 using namespace movit;
similarity index 98%
rename from decklink_output.h
rename to nageru/decklink_output.h
index 44eb86dea9860e6480c4552aeaf352a5990bf979..90b89a22a18f1f6e0de7a9352d94d437415f79f3 100644 (file)
 #include "DeckLinkAPITypes.h"
 #include "LinuxCOM.h"
 
-#include "context.h"
+#include "shared/context.h"
 #include "print_latency.h"
 #include "quittable_sleeper.h"
 #include "ref_counted_frame.h"
-#include "ref_counted_gl_sync.h"
+#include "shared/ref_counted_gl_sync.h"
 
 namespace movit {
 
similarity index 100%
rename from decklink_util.cpp
rename to nageru/decklink_util.cpp
similarity index 100%
rename from decklink_util.h
rename to nageru/decklink_util.h
similarity index 53%
rename from defs.h
rename to nageru/defs.h
index 7b8cc696de1120de66ce04a1b2d29138089d024c..a990330759348796474d219bd1a7b3c4d405effa 100644 (file)
--- a/defs.h
@@ -3,13 +3,6 @@
 
 #include <libavformat/version.h>
 
-// This flag is only supported in FFmpeg 3.3 and up, and we only require 3.1.
-#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 71, 100)
-#define MUX_SKIP_TRAILER "+skip_trailer"
-#else
-#define MUX_SKIP_TRAILER ""
-#endif
-
 #define OUTPUT_FREQUENCY 48000  // Currently needs to be exactly 48000, since bmusb outputs in that.
 #define MAX_FPS 60
 #define FAKE_FPS 25  // Must be an integer.
 #define LOCAL_DUMP_SUFFIX ".nut"
 #define DEFAULT_STREAM_MUX_NAME "nut"  // Only for HTTP. Local dump guesses from LOCAL_DUMP_SUFFIX.
 #define DEFAULT_HTTPD_PORT 9095
-#define MUX_OPTS { \
-       /* Make seekable .mov files, and keep MP4 muxer from using unlimited amounts of memory. */ \
-       { "movflags", "empty_moov+frag_keyframe+default_base_moof" MUX_SKIP_TRAILER }, \
-       \
-       /* Make for somewhat less bursty stream output when using .mov. */ \
-       { "frag_duration", "125000" }, \
-       \
-       /* Keep nut muxer from using unlimited amounts of memory. */ \
-       { "write_index", "0" } \
-}
-
-// In bytes. Beware, if too small, stream clients will start dropping data.
-// For mov, you want this at 10MB or so (for the reason mentioned above),
-// but for nut, there's no flushing, so such a large mux buffer would cause
-// the output to be very uneven.
-#define MUX_BUFFER_SIZE 10485760
+
+#include "shared/shared_defs.h"
 
 // In number of frames. Comes in addition to any internal queues in x264
 // (frame threading, lookahead, etc.).
similarity index 100%
rename from display.ui
rename to nageru/display.ui
similarity index 100%
rename from ebu_r128_proc.cc
rename to nageru/ebu_r128_proc.cc
similarity index 100%
rename from ebu_r128_proc.h
rename to nageru/ebu_r128_proc.h
similarity index 100%
rename from ellipsis_label.h
rename to nageru/ellipsis_label.h
diff --git a/nageru/embedded_files.h b/nageru/embedded_files.h
new file mode 100644 (file)
index 0000000..cbc14e3
--- /dev/null
@@ -0,0 +1,28 @@
+#ifndef _EMBEDDED_FILES_H
+#define _EMBEDDED_FILES_H 1
+
+// Files that are embedded into the binary as part of the build process.
+// They are used as a backup if the files are not available on disk
+// (which is typically the case if the program is installed, as opposed to
+// being run during development).
+
+#include <stddef.h>
+
+extern const unsigned char *_binary_cbcr_subsample_vert_data;
+extern const size_t _binary_cbcr_subsample_vert_size;
+extern const unsigned char *_binary_cbcr_subsample_frag_data;
+extern const size_t _binary_cbcr_subsample_frag_size;
+extern const unsigned char *_binary_uyvy_subsample_vert_data;
+extern const size_t _binary_uyvy_subsample_vert_size;
+extern const unsigned char *_binary_uyvy_subsample_frag_data;
+extern const size_t _binary_uyvy_subsample_frag_size;
+extern const unsigned char *_binary_v210_subsample_comp_data;
+extern const size_t _binary_v210_subsample_comp_size;
+extern const unsigned char *_binary_timecode_vert_data;
+extern const size_t _binary_timecode_vert_size;
+extern const unsigned char *_binary_timecode_frag_data;
+extern const size_t _binary_timecode_frag_size;
+extern const unsigned char *_binary_timecode_10bit_frag_data;
+extern const size_t _binary_timecode_10bit_frag_size;
+
+#endif  // !defined(_EMBEDDED_FILES_H)
similarity index 99%
rename from ffmpeg_capture.cpp
rename to nageru/ffmpeg_capture.cpp
index 7bd927801359b5906db8cec4641557c1898a82ed..7081da823d61f952b2140fe980136b6a021272ec 100644 (file)
@@ -28,12 +28,12 @@ extern "C" {
 #include <vector>
 
 #include "bmusb/bmusb.h"
-#include "ffmpeg_raii.h"
+#include "shared/ffmpeg_raii.h"
 #include "ffmpeg_util.h"
 #include "flags.h"
 #include "image_input.h"
 #include "ref_counted_frame.h"
-#include "timebase.h"
+#include "shared/timebase.h"
 
 #define FRAME_SIZE (8 << 20)  // 8 MB.
 
similarity index 99%
rename from ffmpeg_capture.h
rename to nageru/ffmpeg_capture.h
index 8a513df971fa20f2cba71e25ac603242b9be3be3..31e94ab41fefc103b150b5319877554450dfa720 100644 (file)
@@ -38,7 +38,7 @@ extern "C" {
 }
 
 #include "bmusb/bmusb.h"
-#include "ffmpeg_raii.h"
+#include "shared/ffmpeg_raii.h"
 #include "ref_counted_frame.h"
 #include "quittable_sleeper.h"
 
similarity index 100%
rename from ffmpeg_util.cpp
rename to nageru/ffmpeg_util.cpp
similarity index 100%
rename from ffmpeg_util.h
rename to nageru/ffmpeg_util.h
similarity index 100%
rename from filter.cpp
rename to nageru/filter.cpp
similarity index 100%
rename from filter.h
rename to nageru/filter.h
similarity index 100%
rename from flags.cpp
rename to nageru/flags.cpp
similarity index 100%
rename from flags.h
rename to nageru/flags.h
similarity index 99%
rename from glwidget.cpp
rename to nageru/glwidget.cpp
index bf537de2dbdb0806d2c41fec0b8ebf3fcd74d8e7..e6c2c582853a1bfe894e7df5dabc465104acbdc1 100644 (file)
 #include <utility>
 
 #include "audio_mixer.h"
-#include "context.h"
+#include "shared/context.h"
 #include "context_menus.h"
 #include "flags.h"
 #include "mainwindow.h"
 #include "mixer.h"
-#include "ref_counted_gl_sync.h"
+#include "shared/ref_counted_gl_sync.h"
 
 class QMouseEvent;
 
similarity index 100%
rename from glwidget.h
rename to nageru/glwidget.h
similarity index 99%
rename from image_input.cpp
rename to nageru/image_input.cpp
index 2bf4a237bb86758f92cbdcfba808c514c7c19b82..5f695702e7ffcbc3adb63127298afffec7b93f92 100644 (file)
@@ -30,7 +30,7 @@ extern "C" {
 #include <utility>
 #include <vector>
 
-#include "ffmpeg_raii.h"
+#include "shared/ffmpeg_raii.h"
 #include "ffmpeg_util.h"
 #include "flags.h"
 
similarity index 100%
rename from image_input.h
rename to nageru/image_input.h
similarity index 100%
rename from input_mapping.cpp
rename to nageru/input_mapping.cpp
similarity index 100%
rename from input_mapping.h
rename to nageru/input_mapping.h
similarity index 100%
rename from input_mapping.ui
rename to nageru/input_mapping.ui
similarity index 99%
rename from input_mapping_dialog.cpp
rename to nageru/input_mapping_dialog.cpp
index 23e889531e354fc04589b0fc09ac72c84ff4d648..b4565152a4af01b290941025633e7224b8c4d4dd 100644 (file)
@@ -22,7 +22,7 @@
 
 #include "alsa_pool.h"
 #include "defs.h"
-#include "post_to_main_thread.h"
+#include "shared/post_to_main_thread.h"
 #include "ui_input_mapping.h"
 
 using namespace std;
similarity index 100%
rename from input_state.h
rename to nageru/input_state.h
similarity index 100%
rename from json.proto
rename to nageru/json.proto
similarity index 97%
rename from kaeru.cpp
rename to nageru/kaeru.cpp
index b3a9bb3e43f92e09501198cd54c190d3a5579538..390e3cc5f7754c092baa32131a79f595943b00d1 100644 (file)
--- a/kaeru.cpp
@@ -6,9 +6,9 @@
 #include "flags.h"
 #include "ffmpeg_capture.h"
 #include "mixer.h"
-#include "mux.h"
+#include "shared/mux.h"
 #include "quittable_sleeper.h"
-#include "timebase.h"
+#include "shared/timebase.h"
 #include "x264_encoder.h"
 
 #include <assert.h>
@@ -72,7 +72,8 @@ unique_ptr<Mux> create_mux(HTTPD *httpd, AVOutputFormat *oformat, X264Encoder *x
        string video_extradata = x264_encoder->get_global_headers();
 
        unique_ptr<Mux> mux;
-       mux.reset(new Mux(avctx, global_flags.width, global_flags.height, Mux::CODEC_H264, video_extradata, audio_encoder->get_codec_parameters().get(), COARSE_TIMEBASE,
+       mux.reset(new Mux(avctx, global_flags.width, global_flags.height, Mux::CODEC_H264, video_extradata, audio_encoder->get_codec_parameters().get(),
+               get_color_space(global_flags.ycbcr_rec709_coefficients), Mux::WITH_AUDIO, COARSE_TIMEBASE,
                /*write_callback=*/nullptr, Mux::WRITE_FOREGROUND, { &stream_mux_metrics }));
        stream_mux_metrics.init({{ "destination", "http" }});
        return mux;
similarity index 100%
rename from lrameter.cpp
rename to nageru/lrameter.cpp
similarity index 100%
rename from lrameter.h
rename to nageru/lrameter.h
similarity index 98%
rename from main.cpp
rename to nageru/main.cpp
index c1a52c0489a6d7b4e9054da338635290846cf1cb..d5654a5a07c4ef26bb8924b7fa8cd7f1362cc827 100644 (file)
--- a/main.cpp
@@ -24,7 +24,7 @@ extern "C" {
 #ifdef HAVE_CEF
 #include "nageru_cef_app.h"
 #endif
-#include "context.h"
+#include "shared/context.h"
 #include "flags.h"
 #include "image_input.h"
 #include "mainwindow.h"
@@ -68,7 +68,6 @@ int main(int argc, char *argv[])
                // We normally use EGL for zerocopy, but if we use VA against DRM
                // instead of against X11, we turn it off, and then don't need EGL.
                setenv("QT_XCB_GL_INTEGRATION", "xcb_egl", 0);
-               using_egl = true;
        }
        setlinebuf(stdout);
 #if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(58, 9, 100)
similarity index 99%
rename from mainwindow.cpp
rename to nageru/mainwindow.cpp
index b542c1093243dc15af5ae006c0b274b5e9c06352..11e63913ea59734e9831ef5e625be3154020f704 100644 (file)
@@ -50,7 +50,7 @@
 #include "clickable_label.h"
 #include "context_menus.h"
 #include "correlation_meter.h"
-#include "disk_space_estimator.h"
+#include "shared/disk_space_estimator.h"
 #include "ellipsis_label.h"
 #include "flags.h"
 #include "glwidget.h"
@@ -61,7 +61,7 @@
 #include "midi_mapping_dialog.h"
 #include "mixer.h"
 #include "nonlinear_fader.h"
-#include "post_to_main_thread.h"
+#include "shared/post_to_main_thread.h"
 #include "ui_audio_expanded_view.h"
 #include "ui_audio_miniview.h"
 #include "ui_display.h"
similarity index 100%
rename from mainwindow.h
rename to nageru/mainwindow.h
similarity index 100%
rename from mainwindow.ui
rename to nageru/mainwindow.ui
diff --git a/nageru/meson.build b/nageru/meson.build
new file mode 100644 (file)
index 0000000..c66ea54
--- /dev/null
@@ -0,0 +1,231 @@
+qt5 = import('qt5')
+protoc = find_program('protoc')
+cxx = meson.get_compiler('cpp')
+
+embedded_bmusb = get_option('embedded_bmusb')
+
+alsadep = dependency('alsa')
+bmusbdep = dependency('bmusb', required: not embedded_bmusb)
+dldep = cxx.find_library('dl')
+epoxydep = dependency('epoxy')
+libavcodecdep = dependency('libavcodec')
+libavformatdep = dependency('libavformat')
+libavresampledep = dependency('libavresample')
+libavutildep = dependency('libavutil')
+libjpegdep = dependency('libjpeg')
+libswscaledep = dependency('libswscale')
+libusbdep = dependency('libusb-1.0')
+luajitdep = dependency('luajit')
+movitdep = dependency('movit')
+protobufdep = dependency('protobuf')
+qcustomplotdep = cxx.find_library('qcustomplot')
+qt5deps = dependency('qt5', modules: ['Core', 'Gui', 'Widgets', 'OpenGLExtensions', 'OpenGL', 'PrintSupport'])
+threaddep = dependency('threads')
+vadrmdep = dependency('libva-drm')
+vax11dep = dependency('libva-x11')
+x11dep = dependency('x11')
+x264dep = dependency('x264')
+zitaresamplerdep = cxx.find_library('zita-resampler')
+
+srcs = []
+nageru_deps = [shareddep, qt5deps, libjpegdep, movitdep, protobufdep,
+       vax11dep, vadrmdep, x11dep, libavformatdep, libavresampledep, libavcodecdep, libavutildep,
+       libswscaledep, libusbdep, luajitdep, dldep, x264dep, alsadep, zitaresamplerdep,
+       qcustomplotdep, threaddep]
+nageru_include_dirs = []
+nageru_link_with = []
+nageru_build_rpath = ''
+nageru_install_rpath = ''
+
+kaeru_link_with = []
+kaeru_extra_deps = []
+
+# CEF.
+exe_dir = join_paths(get_option('prefix'), 'lib/nageru')
+cef_dir = get_option('cef_dir')
+cef_build_type = get_option('cef_build_type')
+have_cef = (cef_dir != '')
+if have_cef
+       # This is done in the top-level file; just kept here for reference.
+       # add_project_arguments('-DHAVE_CEF=1', language: 'cpp')
+
+       system_cef = (cef_build_type == 'system')
+       if system_cef
+               cef_lib_dir = cef_dir
+               cef_resource_dir = '/usr/share/cef/Resources'
+       else
+               cef_lib_dir = join_paths(cef_dir, cef_build_type)
+               cef_resource_dir = join_paths(cef_dir, 'Resources')
+
+               nageru_include_dirs += include_directories(cef_dir)
+               nageru_include_dirs += include_directories(join_paths(cef_dir, 'include'))
+               nageru_build_rpath = cef_lib_dir
+               nageru_install_rpath = '$ORIGIN/'
+       endif
+
+       cefdep = cxx.find_library('cef')
+       nageru_deps += cefdep
+
+       # CEF wrapper library; not built as part of the CEF binary distribution,
+       # but should be if CEF is installed as a system library.
+       if system_cef
+               cefdlldep = cxx.find_library('cef_dll_wrapper')
+               nageru_deps += cefdlldep
+       else
+               cmake = find_program('cmake')
+               cef_compile_script = find_program('scripts/compile_cef_dll_wrapper.sh')
+
+               cef_dll_target = custom_target('libcef_dll_wrapper',
+                       input: join_paths(cef_dir, 'libcef_dll/CMakeLists.txt'),
+                       output: ['libcef_dll_wrapper.a', 'cef-stamp'],
+                       command: [cef_compile_script, '@BUILD_DIR@', cef_dir, cmake, '@OUTPUT@'])
+
+               # Putting the .a in sources seemingly hits a bug where the .a files get sorted
+               # in the wrong order. This is a workaround; see
+               # https://github.com/mesonbuild/meson/issues/3613#issuecomment-408276296 .
+               cefdlldep = declare_dependency(sources: cef_dll_target[1], link_args: cef_dll_target.full_path())
+               nageru_deps += cefdlldep
+       endif
+
+       cef_libs = ['libEGL.so', 'libGLESv2.so', 'natives_blob.bin', 'snapshot_blob.bin', 'v8_context_snapshot.bin']
+       cef_resources = ['cef.pak', 'cef_100_percent.pak', 'cef_200_percent.pak', 'cef_extensions.pak', 'devtools_resources.pak']
+       if not get_option('cef_no_icudtl')
+               cef_resources += ['icudtl.dat']
+       endif
+       if cef_build_type != 'system'
+               cef_libs += ['libcef.so']
+       endif
+
+       # Symlink the files into the build directory, so that running nageru without ninja install works.
+       run_command('mkdir', join_paths(meson.current_build_dir(), 'locales/'))
+       foreach file : cef_libs
+               run_command('ln', '-s', join_paths(cef_lib_dir, file), meson.current_build_dir())
+               install_data(join_paths(cef_lib_dir, file), install_dir: exe_dir)
+       endforeach
+       foreach file : cef_resources
+               run_command('ln', '-s', join_paths(cef_resource_dir, file), meson.current_build_dir())
+               install_data(join_paths(cef_resource_dir, file), install_dir: exe_dir)
+       endforeach
+       run_command('ln', '-s', join_paths(cef_resource_dir, 'locales/en-US.pak'), join_paths(meson.current_build_dir(), 'locales/'))
+       install_data(join_paths(cef_resource_dir, 'locales/en-US.pak'), install_dir: join_paths(exe_dir, 'locales'))
+endif
+
+# bmusb.
+if embedded_bmusb
+       bmusb_dir = include_directories('bmusb')
+       nageru_include_dirs += bmusb_dir
+
+       bmusb = static_library('bmusb', 'bmusb/bmusb.cpp', 'bmusb/fake_capture.cpp',
+               dependencies: [libusbdep],
+               include_directories: [bmusb_dir])
+       nageru_link_with += bmusb
+       kaeru_link_with += bmusb
+else
+       nageru_deps += bmusbdep
+       kaeru_extra_deps += bmusbdep
+endif
+
+# Protobuf compilation.
+gen = generator(protoc, \
+       output    : ['@BASENAME@.pb.cc', '@BASENAME@.pb.h'],
+       arguments : ['--proto_path=@CURRENT_SOURCE_DIR@', '--cpp_out=@BUILD_DIR@', '@INPUT@'])
+proto_generated = gen.process(['state.proto', 'midi_mapping.proto', 'json.proto'])
+protobuf_lib = static_library('protobufs', proto_generated, dependencies: nageru_deps, include_directories: nageru_include_dirs)
+protobuf_hdrs = declare_dependency(sources: proto_generated)
+nageru_link_with += protobuf_lib
+
+# Preprocess Qt as needed.
+qt_files = qt5.preprocess(
+       moc_headers: ['aboutdialog.h', 'analyzer.h', 'clickable_label.h', 'compression_reduction_meter.h', 'correlation_meter.h',
+               'ellipsis_label.h', 'glwidget.h', 'input_mapping_dialog.h', 'lrameter.h', 'mainwindow.h', 'midi_mapping_dialog.h',
+               'nonlinear_fader.h', 'vumeter.h'],
+       ui_files: ['aboutdialog.ui', 'analyzer.ui', 'audio_expanded_view.ui', 'audio_miniview.ui', 'display.ui',
+               'input_mapping.ui', 'mainwindow.ui', 'midi_mapping.ui'],
+       dependencies: qt5deps)
+
+# Qt objects.
+srcs += ['glwidget.cpp', 'mainwindow.cpp', 'vumeter.cpp', 'lrameter.cpp', 'compression_reduction_meter.cpp',
+       'correlation_meter.cpp', 'aboutdialog.cpp', 'analyzer.cpp', 'input_mapping_dialog.cpp', 'midi_mapping_dialog.cpp',
+       'nonlinear_fader.cpp', 'context_menus.cpp', 'vu_common.cpp', 'piecewise_interpolator.cpp', 'midi_mapper.cpp']
+
+# Auxiliary objects used for nearly everything.
+aux_srcs = ['flags.cpp']
+aux = static_library('aux', aux_srcs, dependencies: nageru_deps, include_directories: nageru_include_dirs)
+nageru_link_with += aux
+
+# Audio objects.
+audio_mixer_srcs = ['audio_mixer.cpp', 'alsa_input.cpp', 'alsa_pool.cpp', 'ebu_r128_proc.cc', 'stereocompressor.cpp',
+       'resampling_queue.cpp', 'flags.cpp', 'correlation_measurer.cpp', 'filter.cpp', 'input_mapping.cpp']
+audio = static_library('audio', audio_mixer_srcs, dependencies: [nageru_deps, protobuf_hdrs], include_directories: nageru_include_dirs)
+nageru_link_with += audio
+
+# Mixer objects.
+srcs += ['chroma_subsampler.cpp', 'v210_converter.cpp', 'mixer.cpp', 'pbo_frame_allocator.cpp',
+       'theme.cpp', 'image_input.cpp', 'alsa_output.cpp',
+       'timecode_renderer.cpp', 'tweaked_inputs.cpp', 'mjpeg_encoder.cpp']
+
+# Streaming and encoding objects (largely the set that is shared between Nageru and Kaeru).
+stream_srcs = ['quicksync_encoder.cpp', 'x264_encoder.cpp', 'x264_dynamic.cpp', 'x264_speed_control.cpp', 'video_encoder.cpp',
+       'audio_encoder.cpp', 'ffmpeg_util.cpp', 'ffmpeg_capture.cpp',
+       'print_latency.cpp', 'basic_stats.cpp', 'ref_counted_frame.cpp']
+stream = static_library('stream', stream_srcs, dependencies: nageru_deps, include_directories: nageru_include_dirs)
+nageru_link_with += stream
+
+# DeckLink.
+srcs += ['decklink_capture.cpp', 'decklink_util.cpp', 'decklink_output.cpp',
+       'decklink/DeckLinkAPIDispatch.cpp']
+decklink_dir = include_directories('decklink')
+nageru_include_dirs += decklink_dir
+
+# CEF input.
+if have_cef
+       srcs += ['nageru_cef_app.cpp', 'cef_capture.cpp']
+endif
+
+srcs += qt_files
+srcs += proto_generated
+
+# Shaders needed at runtime.
+shaders = ['cbcr_subsample.vert', 'cbcr_subsample.frag', 'uyvy_subsample.vert', 'uyvy_subsample.frag', 'v210_subsample.comp', 'timecode.vert', 'timecode.frag', 'timecode_10bit.frag']
+foreach shader : shaders
+  run_command('ln', '-s', join_paths(meson.current_source_dir(), shader), meson.current_build_dir())
+endforeach
+
+shader_srcs = bin2h_gen.process(shaders)
+srcs += shader_srcs
+
+# Everything except main.cpp. (We do this because if you specify a .cpp file in
+# both Nageru and Kaeru, it gets compiled twice. In the older Makefiles, Kaeru
+# depended on a smaller set of objects.)
+core = static_library('core', srcs, dependencies: nageru_deps, include_directories: nageru_include_dirs)
+nageru_link_with += core
+
+# Nageru executable; it goes into /usr/lib/nageru since CEF files go there, too
+# (we can't put them straight into /usr/bin).
+executable('nageru', 'main.cpp',
+       dependencies: nageru_deps,
+       include_directories: nageru_include_dirs,
+       link_with: nageru_link_with,
+       build_rpath: nageru_build_rpath,
+       install_rpath: nageru_install_rpath,
+       install: true,
+       install_dir: exe_dir
+)
+meson.add_install_script('scripts/setup_nageru_symlink.sh')
+
+# Kaeru executable.
+executable('kaeru', 'kaeru.cpp',
+       dependencies: [nageru_deps, kaeru_extra_deps],
+       include_directories: nageru_include_dirs,
+       link_with: [stream, aux, kaeru_link_with],
+       install: true)
+
+# Audio mixer microbenchmark.
+executable('benchmark_audio_mixer', 'benchmark_audio_mixer.cpp', dependencies: nageru_deps, include_directories: nageru_include_dirs, link_with: [audio, aux])
+
+# These are needed for a default run.
+data_files = ['theme.lua', 'simple.lua', 'bg.jpeg', 'akai_midimix.midimapping']
+install_data(data_files, install_dir: join_paths(get_option('prefix'), 'share/nageru'))
+foreach file : data_files
+       run_command('ln', '-s', join_paths(meson.current_source_dir(), file), meson.current_build_dir())
+endforeach
similarity index 100%
rename from midi_mapper.cpp
rename to nageru/midi_mapper.cpp
similarity index 100%
rename from midi_mapper.h
rename to nageru/midi_mapper.h
similarity index 100%
rename from midi_mapping.proto
rename to nageru/midi_mapping.proto
similarity index 100%
rename from midi_mapping.ui
rename to nageru/midi_mapping.ui
similarity index 99%
rename from midi_mapping_dialog.cpp
rename to nageru/midi_mapping_dialog.cpp
index 05508e4e244224bc33f129d9f93a873931b88a99..a3286b41dd3102d677661bc5efd465635181f8df 100644 (file)
@@ -20,7 +20,7 @@
 
 #include "midi_mapper.h"
 #include "midi_mapping.pb.h"
-#include "post_to_main_thread.h"
+#include "shared/post_to_main_thread.h"
 #include "ui_midi_mapping.h"
 
 class QObject;
similarity index 99%
rename from mixer.cpp
rename to nageru/mixer.cpp
index 294040f1bfd6fc6dde71a793af91c536e2c9f12e..953fd8133db59410ab2973d7592a05e5d133bc09 100644 (file)
--- a/mixer.cpp
 #include "cef_capture.h"
 #endif
 #include "chroma_subsampler.h"
-#include "context.h"
+#include "shared/context.h"
 #include "decklink_capture.h"
 #include "decklink_output.h"
 #include "defs.h"
-#include "disk_space_estimator.h"
+#include "shared/disk_space_estimator.h"
 #include "ffmpeg_capture.h"
 #include "flags.h"
 #include "input_mapping.h"
-#include "metrics.h"
+#include "shared/metrics.h"
 #include "mjpeg_encoder.h"
 #include "pbo_frame_allocator.h"
-#include "ref_counted_gl_sync.h"
+#include "shared/ref_counted_gl_sync.h"
 #include "resampling_queue.h"
-#include "timebase.h"
+#include "shared/timebase.h"
 #include "timecode_renderer.h"
 #include "v210_converter.h"
 #include "va_display_with_cleanup.h"
similarity index 99%
rename from mixer.h
rename to nageru/mixer.h
index 84313e5130b0816b2a997ee9265d637781cb3aed..b4ed76f4575ff0403b4865a470af8925d548ddd1 100644 (file)
--- a/mixer.h
 #include "audio_mixer.h"
 #include "bmusb/bmusb.h"
 #include "defs.h"
-#include "httpd.h"
+#include "shared/httpd.h"
 #include "input_state.h"
 #include "libusb.h"
 #include "pbo_frame_allocator.h"
 #include "ref_counted_frame.h"
-#include "ref_counted_gl_sync.h"
+#include "shared/ref_counted_gl_sync.h"
 #include "theme.h"
-#include "timebase.h"
+#include "shared/timebase.h"
 #include "video_encoder.h"
 #include "ycbcr_interpretation.h"
 
similarity index 99%
rename from mjpeg_encoder.cpp
rename to nageru/mjpeg_encoder.cpp
index 740b059211c6b2b097497d82ffe003fd5cdbd658..3587d78dcf3fa51a61a92af89ec362b3297b2d3e 100644 (file)
@@ -12,12 +12,12 @@ extern "C" {
 }
 
 #include "defs.h"
-#include "ffmpeg_raii.h"
+#include "shared/ffmpeg_raii.h"
 #include "flags.h"
-#include "httpd.h"
-#include "memcpy_interleaved.h"
+#include "shared/httpd.h"
+#include "shared/memcpy_interleaved.h"
 #include "pbo_frame_allocator.h"
-#include "timebase.h"
+#include "shared/timebase.h"
 #include "va_display_with_cleanup.h"
 
 #include <va/va.h>
similarity index 99%
rename from mjpeg_encoder.h
rename to nageru/mjpeg_encoder.h
index ab8632a7f9c5c809ee33f075c20a546219a74891..3ce34396efa4253290efe66e1a59a2574ef757b8 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _MJPEG_ENCODER_H
 #define _MJPEG_ENCODER_H 1
 
-#include "ffmpeg_raii.h"
+#include "shared/ffmpeg_raii.h"
 #include "ref_counted_frame.h"
 
 extern "C" {
similarity index 100%
rename from nageru_cef_app.cpp
rename to nageru/nageru_cef_app.cpp
similarity index 100%
rename from nageru_cef_app.h
rename to nageru/nageru_cef_app.h
similarity index 100%
rename from nonlinear_fader.h
rename to nageru/nonlinear_fader.h
similarity index 99%
rename from print_latency.cpp
rename to nageru/print_latency.cpp
index 72440ae0fe41a3ec13df76c595450ca409090aec..21098a7a5997fac8743008fe285c412566ae0e57 100644 (file)
@@ -1,7 +1,7 @@
 #include "print_latency.h"
 
 #include "flags.h"
-#include "metrics.h"
+#include "shared/metrics.h"
 #include "mixer.h"
 
 #include <stdio.h>
similarity index 97%
rename from print_latency.h
rename to nageru/print_latency.h
index d80ac88e96a25ac04f6f64a27f921edd9c45ff59..a9fb267949437773969685fb401856c384d14c9e 100644 (file)
@@ -10,7 +10,7 @@
 #include <vector>
 
 #include "ref_counted_frame.h"
-#include "metrics.h"
+#include "shared/metrics.h"
 
 // Since every output frame is based on multiple input frames, we need
 // more than one start timestamp; one for each input.
similarity index 99%
rename from quicksync_encoder.cpp
rename to nageru/quicksync_encoder.cpp
index bca7ffd91983e73ed819d7889c9925cd58acae2f..b5d7c2d9dccf428665c954b161a57e6f76c3570c 100644 (file)
@@ -45,16 +45,16 @@ extern "C" {
 }  // namespace
 
 #include "audio_encoder.h"
-#include "context.h"
+#include "shared/context.h"
 #include "defs.h"
-#include "disk_space_estimator.h"
-#include "ffmpeg_raii.h"
+#include "shared/disk_space_estimator.h"
+#include "shared/ffmpeg_raii.h"
 #include "flags.h"
-#include "mux.h"
+#include "shared/mux.h"
 #include "print_latency.h"
 #include "quicksync_encoder_impl.h"
 #include "ref_counted_frame.h"
-#include "timebase.h"
+#include "shared/timebase.h"
 #include "x264_encoder.h"
 
 using namespace movit;
@@ -1813,8 +1813,8 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename)
        {
                lock_guard<mutex> lock(file_audio_encoder_mutex);
                AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters();
-               file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE,
-                       std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1),
+               file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), get_color_space(global_flags.ycbcr_rec709_coefficients), Mux::WITH_AUDIO, TIMEBASE,
+                       std::bind(&DiskSpaceEstimator::report_append, disk_space_estimator, filename, _1),
                        Mux::WRITE_BACKGROUND,
                        { &current_file_mux_metrics, &total_mux_metrics }));
        }
similarity index 98%
rename from quicksync_encoder.h
rename to nageru/quicksync_encoder.h
index 110d615ed05e846b85aae14ad4422a64166092f2..4f71f90067d3d8349f7f7ad11fe163c2b670dcf5 100644 (file)
@@ -43,7 +43,7 @@ extern "C" {
 #include <libavformat/avformat.h>
 }
 
-#include "ref_counted_gl_sync.h"
+#include "shared/ref_counted_gl_sync.h"
 
 class DiskSpaceEstimator;
 class Mux;
similarity index 99%
rename from quicksync_encoder_impl.h
rename to nageru/quicksync_encoder_impl.h
index 0317b6af0393723bb1ca38ac0100947d8c800c85..5e215e5b691bfa3f81ff317ce58404415c8d4c3d 100644 (file)
@@ -17,9 +17,9 @@
 
 #include "audio_encoder.h"
 #include "defs.h"
-#include "timebase.h"
+#include "shared/timebase.h"
 #include "print_latency.h"
-#include "ref_counted_gl_sync.h"
+#include "shared/ref_counted_gl_sync.h"
 #include "va_display_with_cleanup.h"
 
 #define SURFACE_NUM 16 /* 16 surfaces for source YUV */
similarity index 100%
rename from ref.raw
rename to nageru/ref.raw
similarity index 100%
rename from resampling_queue.h
rename to nageru/resampling_queue.h
similarity index 100%
rename from simple.lua
rename to nageru/simple.lua
similarity index 100%
rename from state.proto
rename to nageru/state.proto
similarity index 100%
rename from stereocompressor.h
rename to nageru/stereocompressor.h
similarity index 100%
rename from theme.cpp
rename to nageru/theme.cpp
similarity index 100%
rename from theme.h
rename to nageru/theme.h
similarity index 100%
rename from theme.lua
rename to nageru/theme.lua
diff --git a/nageru/timecode.frag b/nageru/timecode.frag
new file mode 100644 (file)
index 0000000..01333de
--- /dev/null
@@ -0,0 +1,13 @@
+#version 130
+
+in vec2 tc0;
+uniform sampler2D tex;
+out vec4 Y, CbCr, YCbCr;
+
+void main() {
+       vec4 gray = texture(tex, tc0);;
+       gray.r = gray.r * ((235.0-16.0)/255.0) + 16.0/255.0;  // Limited-range Y'CbCr.
+       CbCr = vec4(128.0/255.0, 128.0/255.0, 0.0, 1.0);;
+       Y = gray.rrra;
+       YCbCr = vec4(Y.r, CbCr.r, CbCr.g, CbCr.a);
+}
diff --git a/nageru/timecode.vert b/nageru/timecode.vert
new file mode 100644 (file)
index 0000000..ab4d42e
--- /dev/null
@@ -0,0 +1,17 @@
+#version 130
+
+in vec2 position;
+in vec2 texcoord;
+out vec2 tc0;
+
+void main()
+{
+    // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+    //
+    //   2.000  0.000  0.000 -1.000
+    //   0.000  2.000  0.000 -1.000
+    //   0.000  0.000 -2.000 -1.000
+    //   0.000  0.000  0.000  1.000
+    gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
+    tc0 = texcoord;
+}
diff --git a/nageru/timecode_10bit.frag b/nageru/timecode_10bit.frag
new file mode 100644 (file)
index 0000000..d9b7e69
--- /dev/null
@@ -0,0 +1,13 @@
+#version 130
+
+in vec2 tc0;
+uniform sampler2D tex;
+out vec4 Y, CbCr, YCbCr;
+
+void main() {
+       vec4 gray = texture(tex, tc0);;
+       gray.r = gray.r * ((940.0-16.0)/65535.0) + 16.0/65535.0;  // Limited-range Y'CbCr.
+       CbCr = vec4(512.0/65535.0, 512.0/65535.0, 0.0, 1.0);;
+       Y = gray.rrra;
+       YCbCr = vec4(Y.r, CbCr.r, CbCr.g, CbCr.a);
+}
similarity index 80%
rename from timecode_renderer.cpp
rename to nageru/timecode_renderer.cpp
index a923acd4f3554d1a6d653d9e7ca045dbd24d4dd3..2ada19abb910e2381c9fa2eeaf19f56625dd9740 100644 (file)
@@ -14,6 +14,8 @@
 #include <sys/time.h>
 
 #include "flags.h"
+#include "embedded_files.h"
+#include "shared/read_file.h"
 
 using namespace std;
 using namespace movit;
@@ -21,44 +23,13 @@ using namespace movit;
 TimecodeRenderer::TimecodeRenderer(movit::ResourcePool *resource_pool, unsigned display_width, unsigned display_height)
        : resource_pool(resource_pool), display_width(display_width), display_height(display_height), height(28)
 {
-       string vert_shader =
-               "#version 130 \n"
-               " \n"
-               "in vec2 position; \n"
-               "in vec2 texcoord; \n"
-               "out vec2 tc0; \n"
-               " \n"
-               "void main() \n"
-               "{ \n"
-               "    // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: \n"
-               "    // \n"
-               "    //   2.000  0.000  0.000 -1.000 \n"
-               "    //   0.000  2.000  0.000 -1.000 \n"
-               "    //   0.000  0.000 -2.000 -1.000 \n"
-               "    //   0.000  0.000  0.000  1.000 \n"
-               "    gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); \n"
-               "    tc0 = texcoord; \n"
-               "} \n";
-       string frag_shader =
-               "#version 130 \n"
-               "in vec2 tc0; \n"
-               "uniform sampler2D tex; \n"
-               "out vec4 Y, CbCr, YCbCr; \n"
-               "void main() { \n"
-               "    vec4 gray = texture(tex, tc0); \n";
+       string vert_shader = read_file("timecode.vert", _binary_timecode_vert_data, _binary_timecode_vert_size);
+       string frag_shader;
        if (global_flags.ten_bit_output) {
-               frag_shader +=
-                       "    gray.r = gray.r * ((940.0-16.0)/65535.0) + 16.0/65535.0; \n"  // Limited-range Y'CbCr.
-                       "    CbCr = vec4(512.0/65535.0, 512.0/65535.0, 0.0, 1.0); \n";
+               frag_shader = read_file("timecode_10bit.frag", _binary_timecode_10bit_frag_data, _binary_timecode_10bit_frag_size);
        } else {
-               frag_shader +=
-                       "    gray.r = gray.r * ((235.0-16.0)/255.0) + 16.0/255.0; \n"  // Limited-range Y'CbCr.
-                       "    CbCr = vec4(128.0/255.0, 128.0/255.0, 0.0, 1.0); \n";
+               frag_shader = read_file("timecode.frag", _binary_timecode_frag_data, _binary_timecode_frag_size);
        }
-       frag_shader +=
-               "    Y = gray.rrra; \n"
-               "    YCbCr = vec4(Y.r, CbCr.r, CbCr.g, CbCr.a); \n"
-               "} \n";
 
        vector<string> frag_shader_outputs;
        program_num = resource_pool->compile_glsl_program(vert_shader, frag_shader, frag_shader_outputs);
similarity index 100%
rename from tweaked_inputs.cpp
rename to nageru/tweaked_inputs.cpp
similarity index 100%
rename from tweaked_inputs.h
rename to nageru/tweaked_inputs.h
diff --git a/nageru/uyvy_subsample.frag b/nageru/uyvy_subsample.frag
new file mode 100644 (file)
index 0000000..80e05b0
--- /dev/null
@@ -0,0 +1,13 @@
+#version 130
+
+in vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1;
+uniform sampler2D y_tex, cbcr_tex;
+out vec4 FragColor;
+void main() {
+       float y0 = texture(y_tex, y_tc0).r;
+       float y1 = texture(y_tex, y_tc1).r;
+       vec2 cbcr0 = texture(cbcr_tex, cbcr_tc0).rg;
+       vec2 cbcr1 = texture(cbcr_tex, cbcr_tc1).rg;
+       vec2 cbcr = 0.5 * (cbcr0 + cbcr1);
+       FragColor = vec4(cbcr.g, y0, cbcr.r, y1);
+};
diff --git a/nageru/uyvy_subsample.vert b/nageru/uyvy_subsample.vert
new file mode 100644 (file)
index 0000000..04c1e82
--- /dev/null
@@ -0,0 +1,25 @@
+#version 130
+
+in vec2 position;
+in vec2 texcoord;
+out vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1;
+uniform vec2 foo_luma_offset_0;
+uniform vec2 foo_luma_offset_1;
+uniform vec2 foo_chroma_offset_0;
+uniform vec2 foo_chroma_offset_1;
+
+void main()
+{
+       // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+       //
+       //   2.000  0.000  0.000 -1.000
+       //   0.000  2.000  0.000 -1.000
+       //   0.000  0.000 -2.000 -1.000
+       //   0.000  0.000  0.000  1.000
+       gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
+       vec2 flipped_tc = texcoord;
+       y_tc0 = flipped_tc + foo_luma_offset_0;
+       y_tc1 = flipped_tc + foo_luma_offset_1;
+       cbcr_tc0 = flipped_tc + foo_chroma_offset_0;
+       cbcr_tc1 = flipped_tc + foo_chroma_offset_1;
+};
similarity index 100%
rename from v210_converter.cpp
rename to nageru/v210_converter.cpp
similarity index 100%
rename from v210_converter.h
rename to nageru/v210_converter.h
diff --git a/nageru/v210_subsample.comp b/nageru/v210_subsample.comp
new file mode 100644 (file)
index 0000000..fccbbc9
--- /dev/null
@@ -0,0 +1,37 @@
+#version 150
+#extension GL_ARB_compute_shader : enable
+#extension GL_ARB_shader_image_load_store : enable
+
+layout(local_size_x=2, local_size_y=16) in;
+layout(r16) uniform restrict readonly image2D in_y;
+uniform sampler2D in_cbcr;  // Of type RG16.
+layout(rgb10_a2) uniform restrict writeonly image2D outbuf;
+uniform float inv_width, inv_height;
+
+void main()
+{
+       int xb = int(gl_GlobalInvocationID.x);  // X block number.
+       int y = int(gl_GlobalInvocationID.y);  // Y (actual line).
+       float yf = (gl_GlobalInvocationID.y + 0.5f) * inv_height;  // Y float coordinate.
+
+       // Load and scale CbCr values, sampling in-between the texels to get
+       // to (left/4 + center/2 + right/4).
+       vec2 pix_cbcr[3];
+       for (int i = 0; i < 3; ++i) {
+               vec2 a = texture(in_cbcr, vec2((xb * 6 + i * 2) * inv_width, yf)).xy;
+               vec2 b = texture(in_cbcr, vec2((xb * 6 + i * 2 + 1) * inv_width, yf)).xy;
+               pix_cbcr[i] = (a + b) * (0.5 * 65535.0 / 1023.0);
+       }
+
+       // Load and scale the Y values. Note that we use integer coordinates here,
+       // so we don't need to offset by 0.5.
+       float pix_y[6];
+       for (int i = 0; i < 6; ++i) {
+               pix_y[i] = imageLoad(in_y, ivec2(xb * 6 + i, y)).x * (65535.0 / 1023.0);
+       }
+
+       imageStore(outbuf, ivec2(xb * 4 + 0, y), vec4(pix_cbcr[0].x, pix_y[0],      pix_cbcr[0].y, 1.0));
+       imageStore(outbuf, ivec2(xb * 4 + 1, y), vec4(pix_y[1],      pix_cbcr[1].x, pix_y[2],      1.0));
+       imageStore(outbuf, ivec2(xb * 4 + 2, y), vec4(pix_cbcr[1].y, pix_y[3],      pix_cbcr[2].x, 1.0));
+       imageStore(outbuf, ivec2(xb * 4 + 3, y), vec4(pix_y[4],      pix_cbcr[2].y, pix_y[5],      1.0));
+}
similarity index 97%
rename from video_encoder.cpp
rename to nageru/video_encoder.cpp
index 2b8fcd5edec9e0a9ea3a79aed79d39d02494375d..bd93ae265369ec7076ee445c5bb38edb7f290298 100644 (file)
@@ -13,12 +13,12 @@ extern "C" {
 
 #include "audio_encoder.h"
 #include "defs.h"
-#include "ffmpeg_raii.h"
+#include "shared/ffmpeg_raii.h"
 #include "flags.h"
-#include "httpd.h"
-#include "mux.h"
+#include "shared/httpd.h"
+#include "shared/mux.h"
 #include "quicksync_encoder.h"
-#include "timebase.h"
+#include "shared/timebase.h"
 #include "x264_encoder.h"
 
 class RefCountedFrame;
@@ -193,7 +193,9 @@ void VideoEncoder::open_output_stream()
                video_extradata = x264_encoder->get_global_headers();
        }
 
-       stream_mux.reset(new Mux(avctx, width, height, video_codec, video_extradata, stream_audio_encoder->get_codec_parameters().get(), COARSE_TIMEBASE,
+       stream_mux.reset(new Mux(avctx, width, height, video_codec, video_extradata, stream_audio_encoder->get_codec_parameters().get(),
+               get_color_space(global_flags.ycbcr_rec709_coefficients),
+               Mux::WITH_AUDIO, COARSE_TIMEBASE,
                /*write_callback=*/nullptr, Mux::WRITE_FOREGROUND, { &stream_mux_metrics }));
        stream_mux_metrics.init({{ "destination", "http" }});
 }
similarity index 98%
rename from video_encoder.h
rename to nageru/video_encoder.h
index 21595a380c992b7df76c0d3531e0cee72b5b2cfe..76dd92fa12cb12ef7dc2d439f9f335d75dcaf6b4 100644 (file)
@@ -20,8 +20,8 @@ extern "C" {
 #include <libavformat/avio.h>
 }
 
-#include "mux.h"
-#include "ref_counted_gl_sync.h"
+#include "shared/mux.h"
+#include "shared/ref_counted_gl_sync.h"
 
 class AudioEncoder;
 class DiskSpaceEstimator;
similarity index 100%
rename from vu_common.cpp
rename to nageru/vu_common.cpp
similarity index 100%
rename from vu_common.h
rename to nageru/vu_common.h
similarity index 100%
rename from vumeter.cpp
rename to nageru/vumeter.cpp
similarity index 100%
rename from vumeter.h
rename to nageru/vumeter.h
similarity index 100%
rename from x264_dynamic.cpp
rename to nageru/x264_dynamic.cpp
similarity index 100%
rename from x264_dynamic.h
rename to nageru/x264_dynamic.h
similarity index 99%
rename from x264_encoder.cpp
rename to nageru/x264_encoder.cpp
index 8463d1bae285744a420aa47a1155504eb3f07b00..52710088d1e722fce482a06b3f555c9afe3b6862 100644 (file)
 
 #include "defs.h"
 #include "flags.h"
-#include "metrics.h"
-#include "mux.h"
+#include "shared/metrics.h"
+#include "shared/mux.h"
 #include "print_latency.h"
-#include "timebase.h"
+#include "shared/timebase.h"
 #include "x264_dynamic.h"
 #include "x264_speed_control.h"
 
similarity index 99%
rename from x264_encoder.h
rename to nageru/x264_encoder.h
index 687bf718679c316a3b2e6a24a41c8e449d79c776..7b8751715250d5f34147f7f11d9a8d83df409037 100644 (file)
@@ -33,7 +33,7 @@ extern "C" {
 #include <movit/image_format.h>
 
 #include "defs.h"
-#include "metrics.h"
+#include "shared/metrics.h"
 #include "print_latency.h"
 #include "x264_dynamic.h"
 
similarity index 99%
rename from x264_speed_control.cpp
rename to nageru/x264_speed_control.cpp
index 719cf28d3ef5a47773bd133206a283ba18b1579f..5240347d8375806647ca347be6d0154fb5836da3 100644 (file)
@@ -11,7 +11,7 @@
 #include <type_traits>
 
 #include "flags.h"
-#include "metrics.h"
+#include "shared/metrics.h"
 
 using namespace std;
 using namespace std::chrono;
similarity index 99%
rename from x264_speed_control.h
rename to nageru/x264_speed_control.h
index b0a1739d022e462c48c7e80e7b73b905fcdb6411..7559b87318c2064f56031c5abdcf3e23a534984b 100644 (file)
@@ -55,7 +55,7 @@ extern "C" {
 #include <x264.h>
 }
 
-#include "metrics.h"
+#include "shared/metrics.h"
 #include "x264_dynamic.h"
 
 class X264SpeedControl {
diff --git a/shared/bin2h.cpp b/shared/bin2h.cpp
new file mode 100644 (file)
index 0000000..a396afe
--- /dev/null
@@ -0,0 +1,51 @@
+#include <stdio.h>
+#include <string>
+
+using namespace std;
+
+int main(int argc, char **argv)
+{
+       if (argc != 4) {
+               fprintf(stderr, "Usage: bin2h INFILE BASENAME OUTFILE\n");
+               return 1;
+       }
+
+       string basename = argv[2];
+       for (char &ch : basename) {
+               if (!isalpha(ch) && !isdigit(ch)) {
+                       ch = '_';
+               }
+       }
+
+       FILE *infp = fopen(argv[1], "rb");
+       if (infp == nullptr) {
+               perror(argv[1]);
+               exit(1);
+       }
+
+       FILE *outfp = fopen(argv[3], "w");
+       if (outfp == nullptr) {
+               perror(argv[3]);
+               exit(1);
+       }
+
+       fprintf(outfp, "// Generated by bin2h.cpp from %s. Do not edit by hand.\n", argv[1]);
+       fprintf(outfp, "#include <stddef.h>\n");
+       fprintf(outfp, "unsigned char _binary_%s[] = {", basename.c_str());
+
+       size_t num_bytes = 0;
+       while (!feof(infp)) {
+               if (num_bytes++ % 16 == 0) {
+                       fprintf(outfp, "\n\t");
+               }
+               int ch = getc(infp);
+               if (ch == -1) {
+                       break;
+               }
+               fprintf(outfp, "0x%02x, ", ch);
+       }
+       fprintf(outfp, "\n};\n");
+       fprintf(outfp, "unsigned char *_binary_%s_data = _binary_%s;\n", basename.c_str(), basename.c_str());
+       fprintf(outfp, "size_t _binary_%s_size = sizeof(_binary_%s);\n", basename.c_str(), basename.c_str());
+       return 0;
+}
similarity index 70%
rename from context.cpp
rename to shared/context.cpp
index eb62183a3b3a8c9c7c9065f6d212a21b92e8257e..0b17bfa56893ec065b5f96efbe1a95a1a6a687c7 100644 (file)
@@ -1,18 +1,34 @@
-#include <stdio.h>
-
-#include <string>
-
 #include <QGL>
 #include <QOffscreenSurface>
 #include <QOpenGLContext>
 #include <QSurface>
 #include <QSurfaceFormat>
+#include <stdio.h>
+#include <string>
 
 QGLWidget *global_share_widget = nullptr;
-bool using_egl = false;
 
 using namespace std;
 
+QSurface *create_surface()
+{
+       QSurfaceFormat fmt;
+       fmt.setDepthBufferSize(0);
+       fmt.setStencilBufferSize(0);
+       fmt.setProfile(QSurfaceFormat::CoreProfile);
+       fmt.setMajorVersion(4);
+       fmt.setMinorVersion(5);
+       fmt.setSwapInterval(0);
+       QOffscreenSurface *surface = new QOffscreenSurface;
+       surface->setFormat(fmt);
+       surface->create();
+       if (!surface->isValid()) {
+               fprintf(stderr, "ERROR: surface not valid!\n");
+               exit(1);
+       }
+       return surface;
+}
+
 QSurface *create_surface(const QSurfaceFormat &format)
 {
        QOffscreenSurface *surface = new QOffscreenSurface;
diff --git a/shared/context.h b/shared/context.h
new file mode 100644 (file)
index 0000000..aebba96
--- /dev/null
@@ -0,0 +1,17 @@
+
+// Needs to be in its own file because Qt and libepoxy seemingly don't coexist well
+// within the same file.
+
+class QSurface;
+class QOpenGLContext;
+class QSurfaceFormat;
+class QGLWidget;
+
+extern bool using_egl;
+extern QGLWidget *global_share_widget;
+QSurface *create_surface();
+QSurface *create_surface(const QSurfaceFormat &format);
+QSurface *create_surface_with_same_format(const QSurface *surface);
+QOpenGLContext *create_context(const QSurface *surface);
+bool make_current(QOpenGLContext *context, QSurface *surface);
+void delete_context(QOpenGLContext *context);
similarity index 65%
rename from disk_space_estimator.cpp
rename to shared/disk_space_estimator.cpp
index 86e5e877f65aeaad39cd7423cc3c0577ed499a94..da55ee1e46b5018a0f8e381752bf661e55c11cc1 100644 (file)
@@ -1,12 +1,14 @@
-#include "disk_space_estimator.h"
+#include "shared/disk_space_estimator.h"
 
+#include <memory>
 #include <stdio.h>
 #include <sys/stat.h>
 #include <sys/statfs.h>
-#include <memory>
 
-#include "metrics.h"
-#include "timebase.h"
+#include "shared/metrics.h"
+#include "shared/timebase.h"
+
+using namespace std;
 
 DiskSpaceEstimator::DiskSpaceEstimator(DiskSpaceEstimator::callback_t callback)
        : callback(callback)
@@ -14,15 +16,32 @@ DiskSpaceEstimator::DiskSpaceEstimator(DiskSpaceEstimator::callback_t callback)
        global_metrics.add("disk_free_bytes", &metric_disk_free_bytes, Metrics::TYPE_GAUGE);
 }
 
-void DiskSpaceEstimator::report_write(const std::string &filename, uint64_t pts)
+void DiskSpaceEstimator::report_write(const string &filename, off_t bytes, uint64_t pts)
+{
+       total_size += bytes;
+       report_write_internal(filename, total_size, pts);
+}
+
+void DiskSpaceEstimator::report_append(const string &filename, uint64_t pts)
 {
        if (filename != last_filename) {
                last_filename = filename;
                measure_points.clear();
        }
 
+       struct stat st;
+       if (stat(filename.c_str(), &st) == -1) {
+               perror(filename.c_str());
+               return;
+       }
+
+       report_write_internal(filename, st.st_size, pts);
+}
+
+void DiskSpaceEstimator::report_write_internal(const string &filename, off_t file_size, uint64_t pts)
+{
        // Reject points that are out-of-order (happens with B-frames).
-       if (!measure_points.empty() && pts < measure_points.back().pts) {
+       if (!measure_points.empty() && pts <= measure_points.back().pts) {
                return;
        }
 
@@ -31,12 +50,6 @@ void DiskSpaceEstimator::report_write(const std::string &filename, uint64_t pts)
                measure_points.pop_front();
        }
 
-       struct stat st;
-       if (stat(filename.c_str(), &st) == -1) {
-               perror(filename.c_str());
-               return;
-       }
-
        struct statfs fst;
        if (statfs(filename.c_str(), &fst) == -1) {
                perror(filename.c_str());
@@ -47,7 +60,7 @@ void DiskSpaceEstimator::report_write(const std::string &filename, uint64_t pts)
        metric_disk_free_bytes = free_bytes;
 
        if (!measure_points.empty()) {
-               double bytes_per_second = double(st.st_size - measure_points.front().size) /
+               double bytes_per_second = double(file_size - measure_points.front().size) /
                        (pts - measure_points.front().pts) * TIMEBASE;
                double seconds_left = free_bytes / bytes_per_second;
 
@@ -58,7 +71,7 @@ void DiskSpaceEstimator::report_write(const std::string &filename, uint64_t pts)
                }
        }
 
-       measure_points.push_back({ pts, st.st_size });
+       measure_points.push_back({ pts, file_size });
 }
 
 DiskSpaceEstimator *global_disk_space_estimator = nullptr;  // Created in MainWindow::MainWindow().
similarity index 61%
rename from disk_space_estimator.h
rename to shared/disk_space_estimator.h
index 73b392cc586e43c57c82fa4c6195d20d294c82bf..163b7efd555dd81ac2df84a5fc8591f63b393673 100644 (file)
@@ -9,35 +9,46 @@
 //
 // The bitrate is measured over a simple 30-second sliding window.
 
-#include <stdint.h>
-#include <sys/types.h>
 #include <atomic>
 #include <deque>
 #include <functional>
+#include <stdint.h>
 #include <string>
+#include <sys/types.h>
 
-#include "timebase.h"
+#include "shared/timebase.h"
 
-class DiskSpaceEstimator
-{
+class DiskSpaceEstimator {
 public:
        typedef std::function<void(off_t free_bytes, double estimated_seconds_left)> callback_t;
        DiskSpaceEstimator(callback_t callback);
 
+       // Report that a video frame with the given pts and size has just been
+       // written (possibly appended) to the given file.
+       //
+       // <pts> is taken to be in TIMEBASE units (see shared/timebase.h).
+       void report_write(const std::string &filename, off_t bytes, uint64_t pts);
+
        // Report that a video frame with the given pts has just been written
        // to the given file, so the estimator should stat the file and see
        // by how much it grew since last time. Called by the Mux object
        // responsible for writing to the stream on disk.
        //
        // If the filename changed since last time, the estimation is reset.
-       // <pts> is taken to be in TIMEBASE units (see timebase.h).
-       void report_write(const std::string &filename, uint64_t pts);
+       // <pts> is taken to be in TIMEBASE units (see shared/timebase.h).
+       //
+       // You should probably not mix this and report_write() on the same
+       // object. Really, report_write() matches Futatabi's controlled writes
+       // to a custom format, and report_append() matches Nageru's use of Mux
+       // (where we don't see the bytes flowing past).
+       void report_append(const std::string &filename, uint64_t pts);
 
 private:
        static constexpr int64_t window_length = 30 * TIMEBASE;
 
+       void report_write_internal(const std::string &filename, off_t file_size, uint64_t pts);
+
        callback_t callback;
-       std::string last_filename;
 
        struct MeasurePoint {
                uint64_t pts;
@@ -46,6 +57,9 @@ private:
        std::deque<MeasurePoint> measure_points;
        uint64_t last_pts_reported = 0;
 
+       off_t total_size = 0;  // For report_write().
+       std::string last_filename;  // For report_append().
+
        // Metrics.
        std::atomic<int64_t> metric_disk_free_bytes{-1};
 };
similarity index 100%
rename from ffmpeg_raii.cpp
rename to shared/ffmpeg_raii.cpp
similarity index 100%
rename from ffmpeg_raii.h
rename to shared/ffmpeg_raii.h
similarity index 95%
rename from httpd.cpp
rename to shared/httpd.cpp
index 8d33e75a4f6b5bde8e6f8b79aa23ffa007e8351f..a2668f5987c352d2d3c7c501a86cd86828ffe682 100644 (file)
--- a/httpd.cpp
@@ -1,22 +1,22 @@
-#include "httpd.h"
+#include "shared/httpd.h"
 
 #include <assert.h>
 #include <byteswap.h>
 #include <endian.h>
+#include <memory>
 #include <microhttpd.h>
 #include <netinet/in.h>
 #include <stdio.h>
 #include <string.h>
 #include <sys/time.h>
 #include <time.h>
-#include <memory>
 extern "C" {
 #include <libavutil/avutil.h>
 }
 
-#include "defs.h"
-#include "metacube2.h"
-#include "metrics.h"
+#include "shared/shared_defs.h"
+#include "shared/metacube2.h"
+#include "shared/metrics.h"
 
 struct MHD_Connection;
 struct MHD_Response;
@@ -79,8 +79,8 @@ int HTTPD::answer_to_connection_thunk(void *cls, MHD_Connection *connection,
 
 int HTTPD::answer_to_connection(MHD_Connection *connection,
                                 const char *url, const char *method,
-                               const char *version, const char *upload_data,
-                               size_t *upload_data_size, void **con_cls)
+                                const char *version, const char *upload_data,
+                                size_t *upload_data_size, void **con_cls)
 {
        // See if the URL ends in “.metacube”.
        HTTPD::Stream::Framing framing;
@@ -173,7 +173,7 @@ ssize_t HTTPD::Stream::reader_callback_thunk(void *cls, uint64_t pos, char *buf,
 ssize_t HTTPD::Stream::reader_callback(uint64_t pos, char *buf, size_t max)
 {
        unique_lock<mutex> lock(buffer_mutex);
-       has_buffered_data.wait(lock, [this]{ return should_quit || !buffered_data.empty(); });
+       has_buffered_data.wait(lock, [this] { return should_quit || !buffered_data.empty(); });
        if (should_quit) {
                return 0;
        }
@@ -272,7 +272,7 @@ void HTTPD::Stream::add_data(const char *buf, size_t buf_size, HTTPD::Stream::Da
                buffered_data.emplace_back((char *)&packet, sizeof(packet));
        }
 
-       has_buffered_data.notify_all(); 
+       has_buffered_data.notify_all();
 }
 
 void HTTPD::Stream::stop()
similarity index 95%
rename from httpd.h
rename to shared/httpd.h
index 1ff5c51108facf03ca1b0d8fc2f076a63938e595..8c3c8105c5b959155abbd19d95b10a1fcfc0ffa6 100644 (file)
--- a/httpd.h
@@ -3,16 +3,16 @@
 
 // A class dealing with stream output to HTTP.
 
-#include <stddef.h>
-#include <stdint.h>
-#include <sys/types.h>
 #include <atomic>
 #include <condition_variable>
 #include <deque>
 #include <functional>
 #include <mutex>
 #include <set>
+#include <stddef.h>
+#include <stdint.h>
 #include <string>
+#include <sys/types.h>
 #include <unordered_map>
 #include <utility>
 
@@ -47,14 +47,16 @@ public:
                NO_CORS_POLICY,
                ALLOW_ALL_ORIGINS
        };
-       void add_endpoint(const std::string &url, const EndpointCallback &callback, CORSPolicy cors_policy) {
+       void add_endpoint(const std::string &url, const EndpointCallback &callback, CORSPolicy cors_policy)
+       {
                endpoints[url] = Endpoint{ callback, cors_policy };
        }
 
        void start(int port);
        void stop();
        void add_data(StreamType stream_type, const char *buf, size_t size, bool keyframe, int64_t time, AVRational timebase);
-       int64_t get_num_connected_clients() const {
+       int64_t get_num_connected_clients() const
+       {
                return metric_num_connected_clients.load();
        }
 
@@ -71,7 +73,6 @@ private:
 
        static void free_stream(void *cls);
 
-
        class Stream {
        public:
                enum Framing {
@@ -118,7 +119,7 @@ private:
        std::string header[NUM_STREAM_TYPES];
 
        // Metrics.
-       std::atomic<int64_t> metric_num_connected_clients{0};
+       std::atomic<int64_t> metric_num_connected_clients{ 0 };
 };
 
 #endif  // !defined(_HTTPD_H)
similarity index 95%
rename from memcpy_interleaved.cpp
rename to shared/memcpy_interleaved.cpp
index 9a41cdd53ac1e5ba93330c9657a2d670171e910c..9634fd26b88ddcb95ca4d5c5afcedb79cb21747c 100644 (file)
@@ -1,6 +1,6 @@
-#include <cstdint>
 #include <algorithm>
 #include <assert.h>
+#include <cstdint>
 #if __SSE2__
 #include <immintrin.h>
 #endif
@@ -56,9 +56,9 @@ size_t memcpy_interleaved_fastpath(uint8_t *dest1, uint8_t *dest2, const uint8_t
        assert(((limit - src) % 64) == 0);
 
 #if __AVX2__
-       const __m256i * __restrict in = (const __m256i *)src;
-       __m256i * __restrict out1 = (__m256i *)dest1;
-       __m256i * __restrict out2 = (__m256i *)dest2;
+       const __m256i *__restrict in = (const __m256i *)src;
+       __m256i *__restrict out1 = (__m256i *)dest1;
+       __m256i *__restrict out2 = (__m256i *)dest2;
 
        __m256i shuffle_cw = _mm256_set_epi8(
                15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
diff --git a/shared/meson.build b/shared/meson.build
new file mode 100644 (file)
index 0000000..acb2c9a
--- /dev/null
@@ -0,0 +1,14 @@
+qt5 = import('qt5')
+qt5deps = dependency('qt5', modules: ['OpenGL'])
+libmicrohttpddep = dependency('libmicrohttpd')
+
+srcs = ['memcpy_interleaved.cpp', 'metacube2.cpp', 'ffmpeg_raii.cpp', 'mux.cpp', 'metrics.cpp', 'context.cpp', 'httpd.cpp', 'disk_space_estimator.cpp', 'read_file.cpp']
+shared = static_library('shared', srcs, include_directories: top_include, dependencies: [qt5deps, libmicrohttpddep])
+shareddep = declare_dependency(
+   include_directories: top_include,
+   link_with: shared)
+
+bin2h = executable('bin2h', 'bin2h.cpp')
+bin2h_gen = generator(bin2h, \
+  output    : ['@PLAINNAME@.cpp'],
+  arguments : ['@INPUT@', '@PLAINNAME@', '@OUTPUT@'])
similarity index 100%
rename from metacube2.cpp
rename to shared/metacube2.cpp
similarity index 100%
rename from metacube2.h
rename to shared/metacube2.h
similarity index 99%
rename from metrics.cpp
rename to shared/metrics.cpp
index 86c3d591872a4a38d53d34a55216b66e4d6d1405..24b61fe56de7cb0574dab74670101393ae645dab 100644 (file)
@@ -1,4 +1,4 @@
-#include "metrics.h"
+#include "shared/metrics.h"
 
 #include <assert.h>
 #include <math.h>
similarity index 100%
rename from metrics.h
rename to shared/metrics.h
similarity index 88%
rename from mux.cpp
rename to shared/mux.cpp
index b1b9db683081528e8aa15c640288e0d8a2d5b9e6..4970bcea81bcd684eb019d5484586d888c757a80 100644 (file)
--- a/mux.cpp
@@ -1,12 +1,12 @@
-#include "mux.h"
+#include "shared/mux.h"
 
+#include <algorithm>
 #include <assert.h>
+#include <mutex>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <algorithm>
-#include <mutex>
 #include <string>
 #include <utility>
 #include <vector>
@@ -21,10 +21,9 @@ extern "C" {
 #include <libavutil/rational.h>
 }
 
-#include "defs.h"
-#include "flags.h"
-#include "metrics.h"
-#include "timebase.h"
+#include "shared/metrics.h"
+#include "shared/shared_defs.h"
+#include "shared/timebase.h"
 
 using namespace std;
 
@@ -48,7 +47,7 @@ struct PacketBefore {
        const AVFormatContext * const ctx;
 };
 
-Mux::Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const string &video_extradata, const AVCodecParameters *audio_codecpar, int time_base, std::function<void(int64_t)> write_callback, WriteStrategy write_strategy, const vector<MuxMetrics *> &metrics)
+Mux::Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const string &video_extradata, const AVCodecParameters *audio_codecpar, AVColorSpace color_space, WithAudio with_audio, int time_base, function<void(int64_t)> write_callback, WriteStrategy write_strategy, const vector<MuxMetrics *> &metrics)
        : write_strategy(write_strategy), avctx(avctx), write_callback(write_callback), metrics(metrics)
 {
        avstream_video = avformat_new_stream(avctx, nullptr);
@@ -60,10 +59,12 @@ Mux::Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const
        avstream_video->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
        if (video_codec == CODEC_H264) {
                avstream_video->codecpar->codec_id = AV_CODEC_ID_H264;
-       } else {
-               assert(video_codec == CODEC_NV12);
+       } else if (video_codec == CODEC_NV12) {
                avstream_video->codecpar->codec_id = AV_CODEC_ID_RAWVIDEO;
                avstream_video->codecpar->codec_tag = avcodec_pix_fmt_to_codec_tag(AV_PIX_FMT_NV12);
+       } else {
+               assert(video_codec == CODEC_MJPEG);
+               avstream_video->codecpar->codec_id = AV_CODEC_ID_MJPEG;
        }
        avstream_video->codecpar->width = width;
        avstream_video->codecpar->height = height;
@@ -77,11 +78,7 @@ Mux::Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const
        avstream_video->codecpar->color_primaries = AVCOL_PRI_BT709;  // RGB colorspace (inout_format.color_space).
        avstream_video->codecpar->color_trc = AVCOL_TRC_IEC61966_2_1;  // Gamma curve (inout_format.gamma_curve).
        // YUV colorspace (output_ycbcr_format.luma_coefficients).
-       if (global_flags.ycbcr_rec709_coefficients) {
-               avstream_video->codecpar->color_space = AVCOL_SPC_BT709;
-       } else {
-               avstream_video->codecpar->color_space = AVCOL_SPC_SMPTE170M;
-       }
+       avstream_video->codecpar->color_space = color_space;
        avstream_video->codecpar->color_range = AVCOL_RANGE_MPEG;  // Full vs. limited range (output_ycbcr_format.full_range).
        avstream_video->codecpar->chroma_location = AVCHROMA_LOC_LEFT;  // Chroma sample location. See chroma_offset_0[] in Mixer::subsample_chroma().
        avstream_video->codecpar->field_order = AV_FIELD_PROGRESSIVE;
@@ -92,15 +89,20 @@ Mux::Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const
                memcpy(avstream_video->codecpar->extradata, video_extradata.data(), video_extradata.size());
        }
 
-       avstream_audio = avformat_new_stream(avctx, nullptr);
-       if (avstream_audio == nullptr) {
-               fprintf(stderr, "avformat_new_stream() failed\n");
-               exit(1);
-       }
-       avstream_audio->time_base = AVRational{1, time_base};
-       if (avcodec_parameters_copy(avstream_audio->codecpar, audio_codecpar) < 0) {
-               fprintf(stderr, "avcodec_parameters_copy() failed\n");
-               exit(1);
+       if (with_audio == WITH_AUDIO) {
+               avstream_audio = avformat_new_stream(avctx, nullptr);
+               if (avstream_audio == nullptr) {
+                       fprintf(stderr, "avformat_new_stream() failed\n");
+                       exit(1);
+               }
+               avstream_audio->time_base = AVRational{1, time_base};
+               if (avcodec_parameters_copy(avstream_audio->codecpar, audio_codecpar) < 0) {
+                       fprintf(stderr, "avcodec_parameters_copy() failed\n");
+                       exit(1);
+               }
+       } else {
+               assert(with_audio == WITHOUT_AUDIO);
+               avstream_audio = nullptr;
        }
 
        AVDictionary *options = NULL;
@@ -172,7 +174,8 @@ void Mux::add_packet(const AVPacket &pkt, int64_t pts, int64_t dts, AVRational t
                lock_guard<mutex> lock(mu);
                if (write_strategy == WriteStrategy::WRITE_BACKGROUND) {
                        packet_queue.push_back(QueuedPacket{ av_packet_clone(&pkt_copy), pts });
-                       if (plug_count == 0) packet_queue_ready.notify_all();
+                       if (plug_count == 0)
+                               packet_queue_ready.notify_all();
                } else if (plug_count > 0) {
                        packet_queue.push_back(QueuedPacket{ av_packet_clone(&pkt_copy), pts });
                } else {
@@ -197,7 +200,7 @@ void Mux::write_packet_or_die(const AVPacket &pkt, int64_t unscaled_pts)
        int64_t old_pos = avctx->pb->pos;
        if (av_interleaved_write_frame(avctx, const_cast<AVPacket *>(&pkt)) < 0) {
                fprintf(stderr, "av_interleaved_write_frame() failed\n");
-               exit(1);
+               abort();
        }
        avio_flush(avctx->pb);
        for (MuxMetrics *metric : metrics) {
@@ -238,6 +241,8 @@ void Mux::unplug()
 
 void Mux::thread_func()
 {
+       pthread_setname_np(pthread_self(), "Mux");
+
        unique_lock<mutex> lock(mu);
        for ( ;; ) {
                packet_queue_ready.wait(lock, [this]() {
similarity index 87%
rename from mux.h
rename to shared/mux.h
index 9614bffbb5ae6603d1ef0d50353ef61b27715679..62cd37cf80791f061182ebf50498756948f24a78 100644 (file)
--- a/mux.h
@@ -18,7 +18,7 @@ extern "C" {
 #include <thread>
 #include <vector>
 
-#include "timebase.h"
+#include "shared/timebase.h"
 
 struct MuxMetrics {
        // “written” will usually be equal video + audio + mux overhead,
@@ -37,11 +37,25 @@ struct MuxMetrics {
        }
 };
 
+inline AVColorSpace get_color_space(bool ycbcr_rec709_coefficients)
+{
+       if (ycbcr_rec709_coefficients) {
+               return AVCOL_SPC_BT709;
+       } else {
+               return AVCOL_SPC_SMPTE170M;
+       }
+}
+
 class Mux {
 public:
        enum Codec {
                CODEC_H264,
                CODEC_NV12,  // Uncompressed 4:2:0.
+               CODEC_MJPEG
+       };
+       enum WithAudio {
+               WITH_AUDIO,
+               WITHOUT_AUDIO
        };
        enum WriteStrategy {
                // add_packet() will write the packet immediately, unless plugged.
@@ -60,7 +74,7 @@ public:
        // the just-written frame. (write_callback can be nullptr.)
        // Does not take ownership of <metrics>; elements in there, if any,
        // will be added to.
-       Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const std::string &video_extradata, const AVCodecParameters *audio_codecpar, int time_base, std::function<void(int64_t)> write_callback, WriteStrategy write_strategy, const std::vector<MuxMetrics *> &metrics);
+       Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const std::string &video_extradata, const AVCodecParameters *audio_codecpar, AVColorSpace color_space, WithAudio with_audio, int time_base, std::function<void(int64_t)> write_callback, WriteStrategy write_strategy, const std::vector<MuxMetrics *> &metrics);
        ~Mux();
        void add_packet(const AVPacket &pkt, int64_t pts, int64_t dts, AVRational timebase = { 1, TIMEBASE }, int stream_index_override = -1);
 
diff --git a/shared/read_file.cpp b/shared/read_file.cpp
new file mode 100644 (file)
index 0000000..2310303
--- /dev/null
@@ -0,0 +1,53 @@
+#include "shared/read_file.h"
+
+#include <stdio.h>
+
+using namespace std;
+
+string read_file(const string &filename, const unsigned char *start, const size_t size)
+{
+       FILE *fp = fopen(filename.c_str(), "r");
+       if (fp == nullptr) {
+               // Fall back to the version we compiled in. (We prefer disk if we can,
+               // since that makes it possible to work on shaders without recompiling
+               // all the time.)
+               if (start != nullptr) {
+                       return string(reinterpret_cast<const char *>(start),
+                               reinterpret_cast<const char *>(start) + size);
+               }
+
+               perror(filename.c_str());
+               exit(1);
+       }
+
+       int ret = fseek(fp, 0, SEEK_END);
+       if (ret == -1) {
+               perror("fseek(SEEK_END)");
+               exit(1);
+       }
+
+       int disk_size = ftell(fp);
+
+       ret = fseek(fp, 0, SEEK_SET);
+       if (ret == -1) {
+               perror("fseek(SEEK_SET)");
+               exit(1);
+       }
+
+       string str;
+       str.resize(disk_size);
+       ret = fread(&str[0], disk_size, 1, fp);
+       if (ret == -1) {
+               perror("fread");
+               exit(1);
+       }
+       if (ret == 0) {
+               fprintf(stderr, "Short read when trying to read %d bytes from %s\n",
+                       disk_size, filename.c_str());
+               exit(1);
+       }
+       fclose(fp);
+
+       return str;
+}
+
diff --git a/shared/read_file.h b/shared/read_file.h
new file mode 100644 (file)
index 0000000..27022ab
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef _READ_FILE_H
+#define _READ_FILE_H 1
+
+#include <string>
+
+#include <stdint.h>
+
+// Read the contents of <filename> and return it as a string.
+// If the file does not exist, which is typical outside of development,
+// return the given memory area instead (presumably created by bin2h).
+
+std::string read_file(const std::string &filename, const unsigned char *start = nullptr, const size_t size = 0);
+
+#endif
similarity index 95%
rename from ref_counted_gl_sync.h
rename to shared/ref_counted_gl_sync.h
index 8b6db680a2c31ffa66ce36c799f1475769937033..5604e9e6f8f9a2707c9d2f0e0170d04c3efc688c 100644 (file)
@@ -16,7 +16,7 @@ class RefCountedGLsync : public RefCountedGLsyncBase {
 public:
        RefCountedGLsync() {}
 
-       RefCountedGLsync(GLenum condition, GLbitfield flags) 
+       RefCountedGLsync(GLenum condition, GLbitfield flags)
                : RefCountedGLsyncBase(locked_glFenceSync(condition, flags), glDeleteSync) {}
 
 private:
diff --git a/shared/shared_defs.h b/shared/shared_defs.h
new file mode 100644 (file)
index 0000000..fc3daa1
--- /dev/null
@@ -0,0 +1,28 @@
+#ifndef _SHARED_DEFS_H
+#define _SHARED_DEFS_H 1
+
+// This flag is only supported in FFmpeg 3.3 and up, and we only require 3.1.
+#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 71, 100)
+#define MUX_SKIP_TRAILER "+skip_trailer"
+#else
+#define MUX_SKIP_TRAILER ""
+#endif
+
+#define MUX_OPTS { \
+       /* Make seekable .mov files, and keep MP4 muxer from using unlimited amounts of memory. */ \
+       { "movflags", "empty_moov+frag_keyframe+default_base_moof" MUX_SKIP_TRAILER }, \
+       \
+       /* Make for somewhat less bursty stream output when using .mov. */ \
+       { "frag_duration", "125000" }, \
+       \
+       /* Keep nut muxer from using unlimited amounts of memory. */ \
+       { "write_index", "0" } \
+}
+
+// In bytes. Beware, if too small, stream clients will start dropping data.
+// For mov, you want this at 10MB or so (for the reason mentioned above),
+// but for nut, there's no flushing, so such a large mux buffer would cause
+// the output to be very uneven.
+#define MUX_BUFFER_SIZE 10485760
+
+#endif  // !defined(_SHARED_DEFS_H)
similarity index 93%
rename from timebase.h
rename to shared/timebase.h
index dbc4402c1820d454528bd952feeb7c90ef1b8dd2..532ec86395c4fa08f08d53dbcc12daa7093ba127 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _TIMEBASE_H
 #define _TIMEBASE_H 1
 
+#include <ratio>
+
 // Common timebase that allows us to represent one frame exactly in all the
 // relevant frame rates:
 //
@@ -22,4 +24,6 @@
 // but can do at least 50 and 60 precisely, and months of streaming.
 #define COARSE_TIMEBASE 300
 
+using TimebaseRatio = std::ratio<1, TIMEBASE>;
+
 #endif  // !defined(_TIMEBASE_H)