[submodule "bmusb"]
- path = bmusb
+ path = nageru/bmusb
url = http://git.sesse.net/bmusb
+Nageru 1.7.5, November 11th, 2018
+
+ - Fix a bug where --record-x264-video would not work when VA-API was
+ not present, making the option rather useless (broken in 1.7.2).
+ Bug reported by Peter De Schrijver.
+
+ - The build system has been switched to Meson; see the README for new
+ build instructions.
+
+ - Various smaller fixes.
+
+
Nageru 1.7.4, August 31st, 2018
- Rework the x264 speedcontrol presets, again. (They earlier assumed
meson obj && cd obj && ninja
-To start it, just hook up your equipment, and then type “./nageru”.
+To start it, just hook up your equipment, and then type “cd nageru && ./nageru”.
It is strongly recommended to have the rights to run at real-time priority;
it will make the USB3 threads do so, which will make them a lot more stable.
+++ /dev/null
-Subproject commit e9aa80b8e6a09cf2c8b3458a1c395b6f1b6a58cc
--- /dev/null
+#version 450 core
+
+in vec3 tc;
+out vec2 diff_flow;
+
+uniform sampler2DArray diff_flow_tex;
+
+void main()
+{
+ diff_flow = texture(diff_flow_tex, tc).xy;
+}
--- /dev/null
+#version 450 core
+
+in vec3 tc;
+
+#ifdef SPLIT_YCBCR_OUTPUT
+out float Y;
+out vec2 CbCr;
+#else
+out vec4 rgba;
+#endif
+
+uniform sampler2DArray image_tex;
+uniform sampler2D flow_tex;
+uniform float alpha;
+
+void main()
+{
+ vec2 flow = texture(flow_tex, tc.xy).xy;
+ vec4 I_0 = texture(image_tex, vec3(tc.xy - alpha * flow, 0));
+ vec4 I_1 = texture(image_tex, vec3(tc.xy + (1.0f - alpha) * flow, 1));
+
+ // Occlusion reasoning:
+
+ vec2 size = textureSize(image_tex, 0).xy;
+
+ // Follow the flow back to the initial point (where we sample I_0 from), then forward again.
+ // See how well we match the point we started at, which is out flow consistency.
+ float d0 = alpha * length(size * (texture(flow_tex, vec2(tc.xy - alpha * flow)).xy - flow));
+
+ // Same for d1.
+ float d1 = (1.0f - alpha) * length(size * (texture(flow_tex, vec2(tc.xy + (1.0f - alpha) * flow)).xy - flow));
+
+ vec4 result;
+ if (max(d0, d1) < 3.0f) { // Arbitrary constant, not all that tuned. The UW paper says 1.0 is fine for ground truth.
+ // Both are visible, so blend.
+ result = I_0 + alpha * (I_1 - I_0);
+ } else if (d0 < d1) {
+ result = I_0;
+ } else {
+ result = I_1;
+ }
+
+#ifdef SPLIT_YCBCR_OUTPUT
+ Y = result.r;
+ CbCr = result.gb;
+#else
+ rgba = result;
+#endif
+}
--- /dev/null
+#version 450 core
+in vec2 tc0, tc1;
+uniform sampler2D cbcr_tex;
+out float Cb, Cr;
+void main() {
+ vec2 result = 0.5 * (texture(cbcr_tex, tc0).rg + texture(cbcr_tex, tc1).rg);
+ Cb = result.r;
+ Cr = result.g;
+}
+
--- /dev/null
+#version 450 core
+
+layout(location=0) in vec2 position;
+out vec2 tc0, tc1;
+uniform vec2 chroma_offset_0;
+uniform vec2 chroma_offset_1;
+
+void main()
+{
+ // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+ //
+ // 2.000 0.000 0.000 -1.000
+ // 0.000 2.000 0.000 -1.000
+ // 0.000 0.000 -2.000 -1.000
+ // 0.000 0.000 0.000 1.000
+ gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
+ vec2 flipped_tc = position;
+ tc0 = flipped_tc + chroma_offset_0;
+ tc1 = flipped_tc + chroma_offset_1;
+}
+
--- /dev/null
+#include "chroma_subsampler.h"
+
+#include <movit/util.h>
+#include <string>
+
+#include "embedded_files.h"
+
+#define BUFFER_OFFSET(i) ((char *)nullptr + (i))
+
+using namespace std;
+
+string read_file(const string &filename, const unsigned char *start = nullptr, const size_t size = 0);
+GLuint compile_shader(const string &shader_src, GLenum type);
+GLuint link_program(GLuint vs_obj, GLuint fs_obj);
+void bind_sampler(GLuint program, GLint location, GLuint texture_unit, GLuint tex, GLuint sampler);
+
+extern GLuint linear_sampler;
+
+ChromaSubsampler::ChromaSubsampler()
+{
+ // Set up stuff for 4:2:2 conversion.
+ //
+ // Note: Due to the horizontally co-sited chroma/luma samples in H.264
+ // (chroma position is left for horizontal),
+ // we need to be a bit careful in our subsampling. A diagram will make
+ // this clearer, showing some luma and chroma samples:
+ //
+ // a b c d
+ // +---+---+---+---+
+ // | | | | |
+ // | Y | Y | Y | Y |
+ // | | | | |
+ // +---+---+---+---+
+ //
+ // +-------+-------+
+ // | | |
+ // | C | C |
+ // | | |
+ // +-------+-------+
+ //
+ // Clearly, the rightmost chroma sample here needs to be equivalent to
+ // b/4 + c/2 + d/4. (We could also implement more sophisticated filters,
+ // of course, but as long as the upsampling is not going to be equally
+ // sophisticated, it's probably not worth it.) If we sample once with
+ // no mipmapping, we get just c, ie., no actual filtering in the
+ // horizontal direction. (For the vertical direction, we can just
+ // sample in the middle to get the right filtering.) One could imagine
+ // we could use mipmapping (assuming we can create mipmaps cheaply),
+ // but then, what we'd get is this:
+ //
+ // (a+b)/2 (c+d)/2
+ // +-------+-------+
+ // | | |
+ // | Y | Y |
+ // | | |
+ // +-------+-------+
+ //
+ // +-------+-------+
+ // | | |
+ // | C | C |
+ // | | |
+ // +-------+-------+
+ //
+ // which ends up sampling equally from a and b, which clearly isn't right. Instead,
+ // we need to do two (non-mipmapped) chroma samples, both hitting exactly in-between
+ // source pixels.
+ //
+ // Sampling in-between b and c gives us the sample (b+c)/2, and similarly for c and d.
+ // Taking the average of these gives of (b+c)/4 + (c+d)/4 = b/4 + c/2 + d/4, which is
+ // exactly what we want.
+ //
+ // See also http://www.poynton.com/PDFs/Merging_RGB_and_422.pdf, pages 6–7.
+
+ cbcr_vs_obj = compile_shader(read_file("chroma_subsample.vert", _binary_chroma_subsample_vert_data, _binary_chroma_subsample_vert_size), GL_VERTEX_SHADER);
+ cbcr_fs_obj = compile_shader(read_file("chroma_subsample.frag", _binary_chroma_subsample_frag_data, _binary_chroma_subsample_frag_size), GL_FRAGMENT_SHADER);
+ cbcr_program = link_program(cbcr_vs_obj, cbcr_fs_obj);
+
+ // Set up the VAO containing all the required position data.
+ glCreateVertexArrays(1, &vao);
+ glBindVertexArray(vao);
+
+ float vertices[] = {
+ 0.0f, 2.0f,
+ 0.0f, 0.0f,
+ 2.0f, 0.0f
+ };
+ glCreateBuffers(1, &vbo);
+ glNamedBufferData(vbo, sizeof(vertices), vertices, GL_STATIC_DRAW);
+ glBindBuffer(GL_ARRAY_BUFFER, vbo);
+
+ GLint position_attrib = 0; // Hard-coded in every vertex shader.
+ glEnableVertexArrayAttrib(vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ uniform_cbcr_tex = glGetUniformLocation(cbcr_program, "cbcr_tex");
+ uniform_chroma_offset_0 = glGetUniformLocation(cbcr_program, "chroma_offset_0");
+ uniform_chroma_offset_1 = glGetUniformLocation(cbcr_program, "chroma_offset_1");
+}
+
+ChromaSubsampler::~ChromaSubsampler()
+{
+ glDeleteProgram(cbcr_program);
+ check_error();
+ glDeleteBuffers(1, &vbo);
+ check_error();
+ glDeleteVertexArrays(1, &vao);
+ check_error();
+}
+
+void ChromaSubsampler::subsample_chroma(GLuint cbcr_tex, unsigned width, unsigned height, GLuint cb_tex, GLuint cr_tex)
+{
+ glUseProgram(cbcr_program);
+ bind_sampler(cbcr_program, uniform_cbcr_tex, 0, cbcr_tex, linear_sampler);
+ glProgramUniform2f(cbcr_program, uniform_chroma_offset_0, -1.0f / width, 0.0f);
+ glProgramUniform2f(cbcr_program, uniform_chroma_offset_1, -0.0f / width, 0.0f);
+
+ glViewport(0, 0, width / 2, height);
+ fbos.render_to(cb_tex, cr_tex);
+
+ glBindVertexArray(vao);
+ glDrawArrays(GL_TRIANGLES, 0, 3);
+}
--- /dev/null
+#ifndef _CHROMA_SUBSAMPLER_H
+#define _CHROMA_SUBSAMPLER_H 1
+
+#include "flow.h"
+
+#include <epoxy/gl.h>
+
+class ChromaSubsampler {
+public:
+ ChromaSubsampler();
+ ~ChromaSubsampler();
+
+ // Subsamples chroma (packed Cb and Cr) 2x1 to yield chroma suitable for
+ // planar 4:2:2. Chroma positioning is left (H.264 convention).
+ // width and height are the dimensions (in pixels) of the input texture.
+ void subsample_chroma(GLuint cbcr_tex, unsigned width, unsigned height, GLuint cb_tex, GLuint cr_tex);
+
+private:
+ PersistentFBOSet<2> fbos;
+
+ GLuint vao;
+ GLuint vbo; // Holds position data.
+
+ GLuint cbcr_vs_obj, cbcr_fs_obj, cbcr_program;
+ GLuint uniform_cbcr_tex;
+ GLuint uniform_chroma_offset_0, uniform_chroma_offset_1;
+};
+
+#endif // !defined(_CHROMA_SUBSAMPLER_H)
--- /dev/null
+#include "clip_list.h"
+
+#include "mainwindow.h"
+#include "shared/timebase.h"
+#include "ui_mainwindow.h"
+
+#include <math.h>
+#include <string>
+#include <vector>
+
+using namespace std;
+
+string pts_to_string(int64_t pts)
+{
+ int64_t t = lrint((pts / double(TIMEBASE)) * 1e3); // In milliseconds.
+ int ms = t % 1000;
+ t /= 1000;
+ int sec = t % 60;
+ t /= 60;
+ int min = t % 60;
+ t /= 60;
+ int hour = t;
+
+ char buf[256];
+ snprintf(buf, sizeof(buf), "%d:%02d:%02d.%03d", hour, min, sec, ms);
+ return buf;
+}
+
+string duration_to_string(int64_t pts_diff)
+{
+ int64_t t = lrint((pts_diff / double(TIMEBASE)) * 1e3); // In milliseconds.
+ int ms = t % 1000;
+ t /= 1000;
+ int sec = t % 60;
+ t /= 60;
+ int min = t;
+
+ char buf[256];
+ snprintf(buf, sizeof(buf), "%d:%02d.%03d", min, sec, ms);
+ return buf;
+}
+
+int ClipList::rowCount(const QModelIndex &parent) const
+{
+ if (parent.isValid())
+ return 0;
+ return clips.size();
+}
+
+int PlayList::rowCount(const QModelIndex &parent) const
+{
+ if (parent.isValid())
+ return 0;
+ return clips.size();
+}
+
+int ClipList::columnCount(const QModelIndex &parent) const
+{
+ if (parent.isValid())
+ return 0;
+ return int(Column::NUM_COLUMNS);
+}
+
+int PlayList::columnCount(const QModelIndex &parent) const
+{
+ if (parent.isValid())
+ return 0;
+ return int(Column::NUM_COLUMNS);
+}
+
+QVariant ClipList::data(const QModelIndex &parent, int role) const
+{
+ if (!parent.isValid())
+ return QVariant();
+ const int row = parent.row(), column = parent.column();
+ if (size_t(row) >= clips.size())
+ return QVariant();
+
+ if (role == Qt::TextAlignmentRole) {
+ switch (Column(column)) {
+ case Column::IN:
+ case Column::OUT:
+ case Column::DURATION:
+ return Qt::AlignRight + Qt::AlignVCenter;
+ default:
+ return Qt::AlignLeft + Qt::AlignVCenter;
+ }
+ }
+
+ if (role != Qt::DisplayRole && role != Qt::EditRole)
+ return QVariant();
+
+ switch (Column(column)) {
+ case Column::IN:
+ return QString::fromStdString(pts_to_string(clips[row].pts_in));
+ case Column::OUT:
+ if (clips[row].pts_out >= 0) {
+ return QString::fromStdString(pts_to_string(clips[row].pts_out));
+ } else {
+ return QVariant();
+ }
+ case Column::DURATION:
+ if (clips[row].pts_out >= 0) {
+ return QString::fromStdString(duration_to_string(clips[row].pts_out - clips[row].pts_in));
+ } else {
+ return QVariant();
+ }
+ case Column::CAMERA_1:
+ case Column::CAMERA_2:
+ case Column::CAMERA_3:
+ case Column::CAMERA_4: {
+ unsigned stream_idx = column - int(Column::CAMERA_1);
+ return QString::fromStdString(clips[row].descriptions[stream_idx]);
+ }
+ default:
+ return "";
+ }
+}
+
+QVariant PlayList::data(const QModelIndex &parent, int role) const
+{
+ if (!parent.isValid())
+ return QVariant();
+ const int row = parent.row(), column = parent.column();
+ if (size_t(row) >= clips.size())
+ return QVariant();
+
+ if (role == Qt::TextAlignmentRole) {
+ switch (Column(column)) {
+ case Column::PLAYING:
+ return Qt::AlignCenter;
+ case Column::IN:
+ case Column::OUT:
+ case Column::DURATION:
+ case Column::FADE_TIME:
+ return Qt::AlignRight + Qt::AlignVCenter;
+ case Column::CAMERA:
+ return Qt::AlignCenter;
+ default:
+ return Qt::AlignLeft + Qt::AlignVCenter;
+ }
+ }
+ if (role == Qt::BackgroundRole) {
+ if (Column(column) == Column::PLAYING) {
+ auto it = current_progress.find(row);
+ if (it != current_progress.end()) {
+ double play_progress = it->second;
+
+ // This only really works well for the first column, for whatever odd Qt reason.
+ QLinearGradient grad(QPointF(0, 0), QPointF(1, 0));
+ grad.setCoordinateMode(grad.QGradient::ObjectBoundingMode);
+ grad.setColorAt(0.0f, QColor::fromRgbF(0.0f, 0.0f, 1.0f, 0.2f));
+ grad.setColorAt(play_progress, QColor::fromRgbF(0.0f, 0.0f, 1.0f, 0.2f));
+ if (play_progress + 0.01f <= 1.0f) {
+ grad.setColorAt(play_progress + 0.01f, QColor::fromRgbF(0.0f, 0.0f, 1.0f, 0.0f));
+ }
+ return QBrush(grad);
+ } else {
+ return QVariant();
+ }
+ } else {
+ return QVariant();
+ }
+ }
+
+ if (role != Qt::DisplayRole && role != Qt::EditRole)
+ return QVariant();
+
+ switch (Column(column)) {
+ case Column::PLAYING:
+ return current_progress.count(row) ? "→" : "";
+ case Column::IN:
+ return QString::fromStdString(pts_to_string(clips[row].pts_in));
+ case Column::OUT:
+ if (clips[row].pts_out >= 0) {
+ return QString::fromStdString(pts_to_string(clips[row].pts_out));
+ } else {
+ return QVariant();
+ }
+ case Column::DURATION:
+ if (clips[row].pts_out >= 0) {
+ return QString::fromStdString(duration_to_string(clips[row].pts_out - clips[row].pts_in));
+ } else {
+ return QVariant();
+ }
+ case Column::CAMERA:
+ return qlonglong(clips[row].stream_idx + 1);
+ case Column::DESCRIPTION:
+ return QString::fromStdString(clips[row].descriptions[clips[row].stream_idx]);
+ case Column::FADE_TIME: {
+ stringstream ss;
+ ss.imbue(locale("C"));
+ ss.precision(3);
+ ss << fixed << clips[row].fade_time_seconds;
+ return QString::fromStdString(ss.str());
+ }
+ default:
+ return "";
+ }
+}
+
+QVariant ClipList::headerData(int section, Qt::Orientation orientation, int role) const
+{
+ if (role != Qt::DisplayRole)
+ return QVariant();
+ if (orientation != Qt::Horizontal)
+ return QVariant();
+
+ switch (Column(section)) {
+ case Column::IN:
+ return "In";
+ case Column::OUT:
+ return "Out";
+ case Column::DURATION:
+ return "Duration";
+ case Column::CAMERA_1:
+ return "Camera 1";
+ case Column::CAMERA_2:
+ return "Camera 2";
+ case Column::CAMERA_3:
+ return "Camera 3";
+ case Column::CAMERA_4:
+ return "Camera 4";
+ default:
+ return "";
+ }
+}
+
+QVariant PlayList::headerData(int section, Qt::Orientation orientation, int role) const
+{
+ if (role != Qt::DisplayRole)
+ return QVariant();
+ if (orientation != Qt::Horizontal)
+ return QVariant();
+
+ switch (Column(section)) {
+ case Column::PLAYING:
+ return "";
+ case Column::IN:
+ return "In";
+ case Column::OUT:
+ return "Out";
+ case Column::DURATION:
+ return "Duration";
+ case Column::CAMERA:
+ return "Camera";
+ case Column::DESCRIPTION:
+ return "Description";
+ case Column::FADE_TIME:
+ return "Fade time";
+ default:
+ return "";
+ }
+}
+
+Qt::ItemFlags ClipList::flags(const QModelIndex &index) const
+{
+ if (!index.isValid())
+ return Qt::ItemIsEnabled | Qt::ItemIsSelectable;
+ const int row = index.row(), column = index.column();
+ if (size_t(row) >= clips.size())
+ return Qt::ItemIsEnabled | Qt::ItemIsSelectable;
+
+ switch (Column(column)) {
+ case Column::CAMERA_1:
+ case Column::CAMERA_2:
+ case Column::CAMERA_3:
+ case Column::CAMERA_4:
+ return Qt::ItemIsEnabled | Qt::ItemIsSelectable | Qt::ItemIsEditable | Qt::ItemIsDragEnabled;
+ default:
+ return Qt::ItemIsEnabled | Qt::ItemIsSelectable;
+ }
+}
+
+Qt::ItemFlags PlayList::flags(const QModelIndex &index) const
+{
+ if (!index.isValid())
+ return Qt::ItemIsEnabled | Qt::ItemIsSelectable;
+ const int row = index.row(), column = index.column();
+ if (size_t(row) >= clips.size())
+ return Qt::ItemIsEnabled | Qt::ItemIsSelectable;
+
+ switch (Column(column)) {
+ case Column::DESCRIPTION:
+ case Column::CAMERA:
+ case Column::FADE_TIME:
+ return Qt::ItemIsEnabled | Qt::ItemIsSelectable | Qt::ItemIsEditable;
+ return Qt::ItemIsEnabled | Qt::ItemIsSelectable | Qt::ItemIsEditable;
+ default:
+ return Qt::ItemIsEnabled | Qt::ItemIsSelectable;
+ }
+}
+
+bool ClipList::setData(const QModelIndex &index, const QVariant &value, int role)
+{
+ if (!index.isValid() || role != Qt::EditRole) {
+ return false;
+ }
+
+ const int row = index.row(), column = index.column();
+ if (size_t(row) >= clips.size())
+ return false;
+
+ switch (Column(column)) {
+ case Column::CAMERA_1:
+ case Column::CAMERA_2:
+ case Column::CAMERA_3:
+ case Column::CAMERA_4: {
+ unsigned stream_idx = column - int(Column::CAMERA_1);
+ clips[row].descriptions[stream_idx] = value.toString().toStdString();
+ emit_data_changed(row);
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
+bool PlayList::setData(const QModelIndex &index, const QVariant &value, int role)
+{
+ if (!index.isValid() || role != Qt::EditRole) {
+ return false;
+ }
+
+ const int row = index.row(), column = index.column();
+ if (size_t(row) >= clips.size())
+ return false;
+
+ switch (Column(column)) {
+ case Column::DESCRIPTION:
+ clips[row].descriptions[clips[row].stream_idx] = value.toString().toStdString();
+ emit_data_changed(row);
+ return true;
+ case Column::CAMERA: {
+ bool ok;
+ int camera_idx = value.toInt(&ok);
+ if (!ok || camera_idx < 1 || camera_idx > NUM_CAMERAS) {
+ return false;
+ }
+ clips[row].stream_idx = camera_idx - 1;
+ emit_data_changed(row);
+ return true;
+ }
+ case Column::FADE_TIME: {
+ bool ok;
+ double val = value.toDouble(&ok);
+ if (!ok || !(val >= 0.0)) {
+ return false;
+ }
+ clips[row].fade_time_seconds = val;
+ emit_data_changed(row);
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
+void ClipList::add_clip(const Clip &clip)
+{
+ beginInsertRows(QModelIndex(), clips.size(), clips.size());
+ clips.push_back(clip);
+ endInsertRows();
+ emit any_content_changed();
+}
+
+void PlayList::add_clip(const Clip &clip)
+{
+ beginInsertRows(QModelIndex(), clips.size(), clips.size());
+ clips.push_back(clip);
+ endInsertRows();
+ emit any_content_changed();
+}
+
+void PlayList::duplicate_clips(size_t first, size_t last)
+{
+ beginInsertRows(QModelIndex(), first, last);
+ clips.insert(clips.begin() + first, clips.begin() + first, clips.begin() + last + 1);
+ endInsertRows();
+ emit any_content_changed();
+}
+
+void PlayList::erase_clips(size_t first, size_t last)
+{
+ beginRemoveRows(QModelIndex(), first, last);
+ clips.erase(clips.begin() + first, clips.begin() + last + 1);
+ endRemoveRows();
+ emit any_content_changed();
+}
+
+void PlayList::move_clips(size_t first, size_t last, int delta)
+{
+ if (delta == -1) {
+ beginMoveRows(QModelIndex(), first, last, QModelIndex(), first - 1);
+ rotate(clips.begin() + first - 1, clips.begin() + first, clips.begin() + last + 1);
+ } else {
+ beginMoveRows(QModelIndex(), first, last, QModelIndex(), first + (last - first + 1) + 1);
+ first = clips.size() - first - 1;
+ last = clips.size() - last - 1;
+ rotate(clips.rbegin() + last - 1, clips.rbegin() + last, clips.rbegin() + first + 1);
+ }
+ endMoveRows();
+ emit any_content_changed();
+}
+
+void ClipList::emit_data_changed(size_t row)
+{
+ emit dataChanged(index(row, 0), index(row, int(Column::NUM_COLUMNS)));
+ emit any_content_changed();
+}
+
+void PlayList::emit_data_changed(size_t row)
+{
+ emit dataChanged(index(row, 0), index(row, int(Column::NUM_COLUMNS)));
+ emit any_content_changed();
+}
+
+void PlayList::set_currently_playing(int index, double progress)
+{
+ int old_index = currently_playing_index;
+ int column = int(Column::PLAYING);
+ if (index != old_index) {
+ currently_playing_index = index;
+ play_progress = progress;
+ if (old_index != -1) {
+ emit dataChanged(this->index(old_index, column), this->index(old_index, column));
+ }
+ if (index != -1) {
+ emit dataChanged(this->index(index, column), this->index(index, column));
+ }
+ } else if (index != -1 && fabs(progress - play_progress) > 1e-3) {
+ play_progress = progress;
+ emit dataChanged(this->index(index, column), this->index(index, column));
+ }
+}
+
+void PlayList::set_progress(const map<size_t, double> &progress)
+{
+ const int column = int(Column::PLAYING);
+ map<size_t, double> old_progress = move(this->current_progress);
+ this->current_progress = progress;
+
+ for (auto it : old_progress) {
+ size_t index = it.first;
+ if (current_progress.count(index) == 0) {
+ emit dataChanged(this->index(index, column), this->index(index, column));
+ }
+ }
+ for (auto it : current_progress) {
+ size_t index = it.first;
+ emit dataChanged(this->index(index, column), this->index(index, column));
+ }
+}
+
+namespace {
+
+Clip deserialize_clip(const ClipProto &clip_proto)
+{
+ Clip clip;
+ clip.pts_in = clip_proto.pts_in();
+ clip.pts_out = clip_proto.pts_out();
+ for (int camera_idx = 0; camera_idx < min(clip_proto.description_size(), NUM_CAMERAS); ++camera_idx) {
+ clip.descriptions[camera_idx] = clip_proto.description(camera_idx);
+ }
+ clip.stream_idx = clip_proto.stream_idx();
+ clip.fade_time_seconds = clip_proto.fade_time_seconds();
+ return clip;
+}
+
+void serialize_clip(const Clip &clip, ClipProto *clip_proto)
+{
+ clip_proto->set_pts_in(clip.pts_in);
+ clip_proto->set_pts_out(clip.pts_out);
+ for (int camera_idx = 0; camera_idx < NUM_CAMERAS; ++camera_idx) {
+ *clip_proto->add_description() = clip.descriptions[camera_idx];
+ }
+ clip_proto->set_stream_idx(clip.stream_idx);
+ clip_proto->set_fade_time_seconds(clip.fade_time_seconds);
+}
+
+} // namespace
+
+ClipList::ClipList(const ClipListProto &serialized)
+{
+ for (const ClipProto &clip_proto : serialized.clip()) {
+ clips.push_back(deserialize_clip(clip_proto));
+ }
+}
+
+ClipListProto ClipList::serialize() const
+{
+ ClipListProto ret;
+ for (const Clip &clip : clips) {
+ serialize_clip(clip, ret.add_clip());
+ }
+ return ret;
+}
+
+PlayList::PlayList(const ClipListProto &serialized)
+{
+ for (const ClipProto &clip_proto : serialized.clip()) {
+ clips.push_back(deserialize_clip(clip_proto));
+ }
+}
+
+ClipListProto PlayList::serialize() const
+{
+ ClipListProto ret;
+ for (const Clip &clip : clips) {
+ serialize_clip(clip, ret.add_clip());
+ }
+ return ret;
+}
--- /dev/null
+#ifndef _CLIP_LIST_H
+#define _CLIP_LIST_H 1
+
+#include "defs.h"
+#include "state.pb.h"
+
+#include <QAbstractTableModel>
+#include <stdint.h>
+#include <map>
+#include <string>
+#include <vector>
+
+struct Clip {
+ int64_t pts_in = -1, pts_out = -1; // pts_in is inclusive, pts_out is exclusive.
+ std::string descriptions[NUM_CAMERAS];
+ unsigned stream_idx = 0; // For the playlist only.
+ double fade_time_seconds = 0.5; // For the playlist only.
+};
+
+class DataChangedReceiver {
+public:
+ virtual ~DataChangedReceiver() {}
+ virtual void emit_data_changed(size_t row) = 0;
+};
+
+// Like a smart pointer to a Clip, but emits dataChanged when it goes out of scope.
+struct ClipProxy {
+public:
+ ClipProxy(Clip &clip, DataChangedReceiver *clip_list, size_t row)
+ : clip(clip), clip_list(clip_list), row(row) {}
+ ~ClipProxy()
+ {
+ if (clip_list != nullptr) {
+ clip_list->emit_data_changed(row);
+ }
+ }
+ Clip *operator->() { return &clip; }
+ Clip &operator*() { return clip; }
+
+private:
+ Clip &clip;
+ DataChangedReceiver *clip_list;
+ size_t row;
+};
+
+class ClipList : public QAbstractTableModel, public DataChangedReceiver {
+ Q_OBJECT
+
+public:
+ explicit ClipList(const ClipListProto &serialized);
+
+ enum class Column {
+ IN,
+ OUT,
+ DURATION,
+ CAMERA_1,
+ CAMERA_2,
+ CAMERA_3,
+ CAMERA_4,
+ NUM_COLUMNS
+ };
+
+ int rowCount(const QModelIndex &parent) const override;
+ int columnCount(const QModelIndex &parent) const override;
+ QVariant data(const QModelIndex &parent, int role) const override;
+ QVariant headerData(int section, Qt::Orientation orientation, int role = Qt::DisplayRole) const override;
+ Qt::ItemFlags flags(const QModelIndex &index) const override;
+ bool setData(const QModelIndex &index, const QVariant &value, int role = Qt::EditRole) override;
+
+ void add_clip(const Clip &clip);
+ size_t size() const { return clips.size(); }
+ bool empty() const { return clips.empty(); }
+
+ ClipProxy mutable_clip(size_t index) { return ClipProxy(clips[index], this, index); }
+ const Clip *clip(size_t index) const { return &clips[index]; }
+
+ ClipProxy mutable_back() { return mutable_clip(size() - 1); }
+ const Clip *back() const { return clip(size() - 1); }
+
+ ClipListProto serialize() const;
+
+ void emit_data_changed(size_t row) override;
+
+signals:
+ void any_content_changed();
+
+private:
+ std::vector<Clip> clips;
+};
+
+class PlayList : public QAbstractTableModel, public DataChangedReceiver {
+ Q_OBJECT
+
+public:
+ explicit PlayList(const ClipListProto &serialized);
+
+ enum class Column {
+ PLAYING,
+ IN,
+ OUT,
+ DURATION,
+ CAMERA,
+ DESCRIPTION,
+ FADE_TIME,
+ NUM_COLUMNS
+ };
+
+ int rowCount(const QModelIndex &parent) const override;
+ int columnCount(const QModelIndex &parent) const override;
+ QVariant data(const QModelIndex &parent, int role) const override;
+ QVariant headerData(int section, Qt::Orientation orientation, int role = Qt::DisplayRole) const override;
+ Qt::ItemFlags flags(const QModelIndex &index) const override;
+ bool setData(const QModelIndex &index, const QVariant &value, int role = Qt::EditRole) override;
+
+ void add_clip(const Clip &clip);
+
+ // <last> is inclusive in all of these.
+ void duplicate_clips(size_t first, size_t last);
+ void erase_clips(size_t first, size_t last);
+ // <delta> is -1 to move upwards, +1 to move downwards.
+ void move_clips(size_t first, size_t last, int delta);
+
+ size_t size() const { return clips.size(); }
+ bool empty() const { return clips.empty(); }
+
+ ClipProxy mutable_clip(size_t index) { return ClipProxy(clips[index], this, index); }
+ const Clip *clip(size_t index) const { return &clips[index]; }
+
+ ClipProxy mutable_back() { return mutable_clip(size() - 1); }
+ const Clip *back() const { return clip(size() - 1); }
+
+ // TODO: Move these out of PlayList.
+ void set_currently_playing(int index, double progress); // -1 = none.
+ int get_currently_playing() const { return currently_playing_index; }
+
+ void set_progress(const std::map<size_t, double> &progress);
+
+ ClipListProto serialize() const;
+
+ void emit_data_changed(size_t row) override;
+
+signals:
+ void any_content_changed();
+
+private:
+ std::vector<Clip> clips;
+ int currently_playing_index = -1;
+ double play_progress = 0.0;
+ std::map<size_t, double> current_progress;
+};
+
+#endif // !defined (_CLIP_LIST_H)
--- /dev/null
+#include "db.h"
+
+#include "frame.pb.h"
+
+#include <string>
+
+using namespace std;
+
+DB::DB(const string &filename)
+{
+ int ret = sqlite3_open(filename.c_str(), &db);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "%s: %s\n", filename.c_str(), sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ sqlite3_exec(db, R"(
+ CREATE TABLE IF NOT EXISTS state (state BLOB);
+ )", nullptr, nullptr, nullptr); // Ignore errors.
+
+ sqlite3_exec(db, R"(
+ DROP TABLE file;
+ )", nullptr, nullptr, nullptr); // Ignore errors.
+
+ sqlite3_exec(db, R"(
+ DROP TABLE frame;
+ )", nullptr, nullptr, nullptr); // Ignore errors.
+
+ sqlite3_exec(db, R"(
+ CREATE TABLE IF NOT EXISTS filev2 (
+ file INTEGER NOT NULL PRIMARY KEY,
+ filename VARCHAR NOT NULL UNIQUE,
+ size BIGINT NOT NULL,
+ frames BLOB NOT NULL
+ );
+ )", nullptr, nullptr, nullptr); // Ignore errors.
+
+ sqlite3_exec(db, "PRAGMA journal_mode=WAL", nullptr, nullptr, nullptr); // Ignore errors.
+ sqlite3_exec(db, "PRAGMA synchronous=NORMAL", nullptr, nullptr, nullptr); // Ignore errors.
+}
+
+StateProto DB::get_state()
+{
+ StateProto state;
+
+ sqlite3_stmt *stmt;
+ int ret = sqlite3_prepare_v2(db, "SELECT state FROM state", -1, &stmt, 0);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "SELECT prepare: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ ret = sqlite3_step(stmt);
+ if (ret == SQLITE_ROW) {
+ bool ok = state.ParseFromArray(sqlite3_column_blob(stmt, 0), sqlite3_column_bytes(stmt, 0));
+ if (!ok) {
+ fprintf(stderr, "State in database is corrupted!\n");
+ exit(1);
+ }
+ } else if (ret != SQLITE_DONE) {
+ fprintf(stderr, "SELECT step: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ ret = sqlite3_finalize(stmt);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "SELECT finalize: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ return state;
+}
+
+void DB::store_state(const StateProto &state)
+{
+ string serialized;
+ state.SerializeToString(&serialized);
+
+ int ret = sqlite3_exec(db, "BEGIN", nullptr, nullptr, nullptr);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "BEGIN: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ ret = sqlite3_exec(db, "DELETE FROM state", nullptr, nullptr, nullptr);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "DELETE: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ sqlite3_stmt *stmt;
+ ret = sqlite3_prepare_v2(db, "INSERT INTO state VALUES (?)", -1, &stmt, 0);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "INSERT prepare: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ sqlite3_bind_blob(stmt, 1, serialized.data(), serialized.size(), SQLITE_STATIC);
+
+ ret = sqlite3_step(stmt);
+ if (ret == SQLITE_ROW) {
+ fprintf(stderr, "INSERT step: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ ret = sqlite3_finalize(stmt);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "INSERT finalize: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ ret = sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "COMMIT: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+}
+
+vector<DB::FrameOnDiskAndStreamIdx> DB::load_frame_file(const string &filename, size_t size, unsigned filename_idx)
+{
+ FileContentsProto file_contents;
+
+ sqlite3_stmt *stmt;
+ int ret = sqlite3_prepare_v2(db, "SELECT frames FROM filev2 WHERE filename=? AND size=?", -1, &stmt, 0);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "SELECT prepare: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ sqlite3_bind_text(stmt, 1, filename.data(), filename.size(), SQLITE_STATIC);
+ sqlite3_bind_int64(stmt, 2, size);
+
+ ret = sqlite3_step(stmt);
+ if (ret == SQLITE_ROW) {
+ bool ok = file_contents.ParseFromArray(sqlite3_column_blob(stmt, 0), sqlite3_column_bytes(stmt, 0));
+ if (!ok) {
+ fprintf(stderr, "Frame list in database is corrupted!\n");
+ exit(1);
+ }
+ } else if (ret != SQLITE_DONE) {
+ fprintf(stderr, "SELECT step: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ ret = sqlite3_finalize(stmt);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "SELECT finalize: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ vector<FrameOnDiskAndStreamIdx> frames;
+ for (const StreamContentsProto &stream : file_contents.stream()) {
+ FrameOnDiskAndStreamIdx frame;
+ frame.stream_idx = stream.stream_idx();
+ for (int i = 0; i < stream.pts_size(); ++i) {
+ frame.frame.filename_idx = filename_idx;
+ frame.frame.pts = stream.pts(i);
+ frame.frame.offset = stream.offset(i);
+ frame.frame.size = stream.file_size(i);
+ frames.push_back(frame);
+ }
+ }
+
+ return frames;
+}
+
+void DB::store_frame_file(const string &filename, size_t size, const vector<FrameOnDiskAndStreamIdx> &frames)
+{
+ int ret = sqlite3_exec(db, "BEGIN", nullptr, nullptr, nullptr);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "BEGIN: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ // Delete any existing instances with this filename.
+ sqlite3_stmt *stmt;
+
+ ret = sqlite3_prepare_v2(db, "DELETE FROM filev2 WHERE filename=?", -1, &stmt, 0);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "DELETE prepare: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ sqlite3_bind_text(stmt, 1, filename.data(), filename.size(), SQLITE_STATIC);
+
+ ret = sqlite3_step(stmt);
+ if (ret == SQLITE_ROW) {
+ fprintf(stderr, "DELETE step: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ ret = sqlite3_finalize(stmt);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "DELETE finalize: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ // Create the protobuf blob for the new row.
+ FileContentsProto file_contents;
+ unordered_set<unsigned> seen_stream_idx; // Usually only one.
+ for (const FrameOnDiskAndStreamIdx &frame : frames) {
+ seen_stream_idx.insert(frame.stream_idx);
+ }
+ for (unsigned stream_idx : seen_stream_idx) {
+ StreamContentsProto *stream = file_contents.add_stream();
+ stream->set_stream_idx(stream_idx);
+ stream->mutable_pts()->Reserve(frames.size());
+ stream->mutable_offset()->Reserve(frames.size());
+ stream->mutable_file_size()->Reserve(frames.size());
+ for (const FrameOnDiskAndStreamIdx &frame : frames) {
+ if (frame.stream_idx != stream_idx) {
+ continue;
+ }
+ stream->add_pts(frame.frame.pts);
+ stream->add_offset(frame.frame.offset);
+ stream->add_file_size(frame.frame.size);
+ }
+ }
+ string serialized;
+ file_contents.SerializeToString(&serialized);
+
+ // Insert the new row.
+ ret = sqlite3_prepare_v2(db, "INSERT INTO filev2 (filename, size, frames) VALUES (?, ?, ?)", -1, &stmt, 0);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "INSERT prepare: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ sqlite3_bind_text(stmt, 1, filename.data(), filename.size(), SQLITE_STATIC);
+ sqlite3_bind_int64(stmt, 2, size);
+ sqlite3_bind_blob(stmt, 3, serialized.data(), serialized.size(), SQLITE_STATIC);
+
+ ret = sqlite3_step(stmt);
+ if (ret == SQLITE_ROW) {
+ fprintf(stderr, "INSERT step: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ ret = sqlite3_finalize(stmt);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "INSERT finalize: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ // Commit.
+ ret = sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "COMMIT: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+}
+
+void DB::clean_unused_frame_files(const vector<string> &used_filenames)
+{
+ int ret = sqlite3_exec(db, "BEGIN", nullptr, nullptr, nullptr);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "BEGIN: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ ret = sqlite3_exec(db, R"(
+ CREATE TEMPORARY TABLE used_filenames ( filename VARCHAR NOT NULL PRIMARY KEY )
+ )", nullptr, nullptr, nullptr);
+
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "CREATE TEMPORARY TABLE: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ // Insert the new rows.
+ sqlite3_stmt *stmt;
+ ret = sqlite3_prepare_v2(db, "INSERT INTO used_filenames (filename) VALUES (?)", -1, &stmt, 0);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "INSERT prepare: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ for (const string &filename : used_filenames) {
+ sqlite3_bind_text(stmt, 1, filename.data(), filename.size(), SQLITE_STATIC);
+
+ ret = sqlite3_step(stmt);
+ if (ret == SQLITE_ROW) {
+ fprintf(stderr, "INSERT step: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ ret = sqlite3_reset(stmt);
+ if (ret == SQLITE_ROW) {
+ fprintf(stderr, "INSERT reset: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+ }
+
+ ret = sqlite3_finalize(stmt);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "INSERT finalize: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ ret = sqlite3_exec(db, R"(
+ DELETE FROM filev2 WHERE filename NOT IN ( SELECT filename FROM used_filenames )
+ )", nullptr, nullptr, nullptr);
+
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "DELETE: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ ret = sqlite3_exec(db, R"(
+ DROP TABLE used_filenames
+ )", nullptr, nullptr, nullptr);
+
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "DROP TABLE: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+
+ // Commit.
+ ret = sqlite3_exec(db, "COMMIT", nullptr, nullptr, nullptr);
+ if (ret != SQLITE_OK) {
+ fprintf(stderr, "COMMIT: %s\n", sqlite3_errmsg(db));
+ exit(1);
+ }
+}
--- /dev/null
+#ifndef DB_H
+#define DB_H 1
+
+#include "state.pb.h"
+
+#include <sqlite3.h>
+#include <string>
+#include <vector>
+
+#include "frame_on_disk.h"
+
+class DB {
+public:
+ explicit DB(const std::string &filename);
+ DB(const DB &) = delete;
+
+ StateProto get_state();
+ void store_state(const StateProto &state);
+
+ struct FrameOnDiskAndStreamIdx {
+ FrameOnDisk frame;
+ unsigned stream_idx;
+ };
+ std::vector<FrameOnDiskAndStreamIdx> load_frame_file(const std::string &filename, size_t size, unsigned frame_idx); // Empty = none found, or there were no frames.
+ void store_frame_file(const std::string &filename, size_t size, const std::vector<FrameOnDiskAndStreamIdx> &frames);
+ void clean_unused_frame_files(const std::vector<std::string> &used_filenames);
+
+private:
+ StateProto state;
+ sqlite3 *db;
+};
+
+#endif // !defined(DB_H)
--- /dev/null
+#ifndef _DEFS_H
+#define _DEFS_H 1
+
+#define MAX_STREAMS 16
+#define CACHE_SIZE_MB 2048
+#define NUM_CAMERAS 4
+#define MUX_BUFFER_SIZE 10485760
+
+#define DEFAULT_HTTPD_PORT 9095
+
+#endif // !defined(_DEFS_H)
--- /dev/null
+#version 450 core
+
+in vec2 image_pos;
+flat in int image0_layer, image1_layer;
+flat in vec2 flow_du;
+flat in float mean_diff;
+out vec3 flow_contribution;
+
+uniform sampler2DArray image_tex;
+
+void main()
+{
+ // Equation (3) from the paper. We're using additive blending, so the
+ // sum will happen automatically for us, and normalization happens on
+ // next read.
+ //
+ // Note that equation (2) says 1 for the minimum error, but the code says 2.0.
+ // And it says L2 norm, but really, the code does absolute value even for
+ // L2 error norm (it uses a square root formula for L1 norm).
+ float diff = texture(image_tex, vec3(image_pos, image0_layer)).x - texture(image_tex, vec3(image_pos + flow_du, image1_layer)).x;
+ diff -= mean_diff;
+ float weight = 1.0 / max(abs(diff), 2.0 / 255.0);
+ flow_contribution = vec3(flow_du.x * weight, flow_du.y * weight, weight);
+}
--- /dev/null
+#version 450 core
+#extension GL_ARB_shader_viewport_layer_array : require
+
+layout(location=0) in vec2 position;
+out vec2 image_pos;
+flat out vec2 flow_du;
+flat out float mean_diff;
+flat out int image0_layer, image1_layer;
+
+uniform vec2 patch_size; // In 0..1 coordinates.
+uniform sampler2DArray flow_tex;
+
+void main()
+{
+ int num_patches = textureSize(flow_tex, 0).x * textureSize(flow_tex, 0).y;
+ int patch_layer = gl_InstanceID / num_patches;
+ int patch_x = gl_InstanceID % textureSize(flow_tex, 0).x;
+ int patch_y = (gl_InstanceID % num_patches) / textureSize(flow_tex, 0).x;
+
+ // Convert the patch index to being the full 0..1 range, to match where
+ // the motion search puts the patches. We don't bother with the locking
+ // to texel centers, though.
+ vec2 patch_center = ivec2(patch_x, patch_y) / (textureSize(flow_tex, 0).xy - 1.0);
+
+ // Increase the patch size a bit; since patch spacing is not necessarily
+ // an integer number of pixels, and we don't use conservative rasterization,
+ // we could be missing the outer edges of the patch. And it seemingly helps
+ // a little bit in general to have some more candidates as well -- although
+ // this is measured without variational refinement, so it might be moot
+ // with it.
+ //
+ // This maps [0.0,1.0] to [-0.25,1.25], ie. extends the patch by 25% in
+ // all directions.
+ vec2 grown_pos = (position * 1.5) - 0.25;
+
+ image_pos = patch_center + patch_size * (grown_pos - 0.5f);
+
+ // Find the flow value for this patch, and send it on to the fragment shader.
+ vec3 flow_du_and_mean_diff = texelFetch(flow_tex, ivec3(patch_x, patch_y, patch_layer), 0).xyz;
+ flow_du = flow_du_and_mean_diff.xy;
+ mean_diff = flow_du_and_mean_diff.z;
+
+ // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+ //
+ // 2.000 0.000 0.000 -1.000
+ // 0.000 2.000 0.000 -1.000
+ // 0.000 0.000 -2.000 -1.000
+ // 0.000 0.000 0.000 1.000
+ gl_Position = vec4(2.0 * image_pos.x - 1.0, 2.0 * image_pos.y - 1.0, -1.0, 1.0);
+ gl_Layer = patch_layer;
+
+ // Forward flow (0) goes from 0 to 1. Backward flow (1) goes from 1 to 0.
+ image0_layer = patch_layer;
+ image1_layer = 1 - patch_layer;
+}
--- /dev/null
+#version 450 core
+
+in vec3 tc;
+out vec2 derivatives;
+out float beta_0;
+
+uniform sampler2DArray tex;
+
+void main()
+{
+ float x_m2 = textureOffset(tex, tc, ivec2(-2, 0)).x;
+ float x_m1 = textureOffset(tex, tc, ivec2(-1, 0)).x;
+ float x_p1 = textureOffset(tex, tc, ivec2( 1, 0)).x;
+ float x_p2 = textureOffset(tex, tc, ivec2( 2, 0)).x;
+
+ float y_m2 = textureOffset(tex, tc, ivec2( 0, -2)).x;
+ float y_m1 = textureOffset(tex, tc, ivec2( 0, -1)).x;
+ float y_p1 = textureOffset(tex, tc, ivec2( 0, 1)).x;
+ float y_p2 = textureOffset(tex, tc, ivec2( 0, 2)).x;
+
+ derivatives.x = (x_p1 - x_m1) * (2.0/3.0) + (x_m2 - x_p2) * (1.0/12.0);
+ derivatives.y = (y_p1 - y_m1) * (2.0/3.0) + (y_m2 - y_p2) * (1.0/12.0);
+
+ // The nudge term in the square root in the DeepFlow paper is ζ² = 0.1² = 0.01.
+ // But this is assuming a 0..255 level. Given the nonlinearities in the expression
+ // where β_0 appears, there's no 100% equivalent way to adjust this
+ // constant that I can see, but taking it to (0.1/255)² ~= 1.53e-7 ~=
+ // 1e-7 ought to be good enough. I guess the basic idea is that it
+ // will only matter for near-zero derivatives anyway. I am a tiny
+ // bit worried about fp16 precision when storing these numbers, but OK.
+ beta_0 = 1.0 / (derivatives.x * derivatives.x + derivatives.y * derivatives.y + 1e-7);
+}
--- /dev/null
+#version 450 core
+
+in vec3 tc;
+out float g;
+const float eps_sq = 0.001 * 0.001;
+
+uniform sampler2DArray flow_tex, diff_flow_tex;
+
+// Relative weighting of smoothness term.
+uniform float alpha;
+
+uniform bool zero_diff_flow;
+
+// This must be a macro, since the offset needs to be a constant expression.
+#define get_flow(x_offs, y_offs) \
+ (textureOffset(flow_tex, tc, ivec2((x_offs), (y_offs))).xy + \
+ textureOffset(diff_flow_tex, tc, ivec2((x_offs), (y_offs))).xy)
+
+#define get_flow_no_diff(x_offs, y_offs) \
+ textureOffset(flow_tex, tc, ivec2((x_offs), (y_offs))).xy
+
+float diffusivity(float u_x, float u_y, float v_x, float v_y)
+{
+ return alpha * inversesqrt(u_x * u_x + u_y * u_y + v_x * v_x + v_y * v_y + eps_sq);
+}
+
+void main()
+{
+ // Find diffusivity (g) for this pixel, using central differences.
+ if (zero_diff_flow) {
+ vec2 uv_x = get_flow_no_diff(1, 0) - get_flow_no_diff(-1, 0);
+ vec2 uv_y = get_flow_no_diff(0, 1) - get_flow_no_diff( 0, -1);
+ g = diffusivity(uv_x.x, uv_y.x, uv_x.y, uv_y.y);
+ } else {
+ vec2 uv_x = get_flow(1, 0) - get_flow(-1, 0);
+ vec2 uv_y = get_flow(0, 1) - get_flow( 0, -1);
+ g = diffusivity(uv_x.x, uv_y.x, uv_x.y, uv_y.y);
+ }
+}
--- /dev/null
+#ifndef _EMBEDDED_FILES_H
+#define _EMBEDDED_FILES_H 1
+
+// Files that are embedded into the binary as part of the build process.
+// They are used as a backup if the files are not available on disk
+// (which is typically the case if the program is installed, as opposed to
+// being run during development).
+
+#include <stddef.h>
+
+extern const unsigned char *_binary_add_base_flow_frag_data;
+extern const size_t _binary_add_base_flow_frag_size;
+extern const unsigned char *_binary_blend_frag_data;
+extern const size_t _binary_blend_frag_size;
+extern const unsigned char *_binary_chroma_subsample_frag_data;
+extern const size_t _binary_chroma_subsample_frag_size;
+extern const unsigned char *_binary_chroma_subsample_vert_data;
+extern const size_t _binary_chroma_subsample_vert_size;
+extern const unsigned char *_binary_densify_frag_data;
+extern const size_t _binary_densify_frag_size;
+extern const unsigned char *_binary_densify_vert_data;
+extern const size_t _binary_densify_vert_size;
+extern const unsigned char *_binary_derivatives_frag_data;
+extern const size_t _binary_derivatives_frag_size;
+extern const unsigned char *_binary_diffusivity_frag_data;
+extern const size_t _binary_diffusivity_frag_size;
+extern const unsigned char *_binary_equations_frag_data;
+extern const size_t _binary_equations_frag_size;
+extern const unsigned char *_binary_equations_vert_data;
+extern const size_t _binary_equations_vert_size;
+extern const unsigned char *_binary_gray_frag_data;
+extern const size_t _binary_gray_frag_size;
+extern const unsigned char *_binary_hole_blend_frag_data;
+extern const size_t _binary_hole_blend_frag_size;
+extern const unsigned char *_binary_hole_fill_frag_data;
+extern const size_t _binary_hole_fill_frag_size;
+extern const unsigned char *_binary_hole_fill_vert_data;
+extern const size_t _binary_hole_fill_vert_size;
+extern const unsigned char *_binary_motion_search_frag_data;
+extern const size_t _binary_motion_search_frag_size;
+extern const unsigned char *_binary_motion_search_vert_data;
+extern const size_t _binary_motion_search_vert_size;
+extern const unsigned char *_binary_prewarp_frag_data;
+extern const size_t _binary_prewarp_frag_size;
+extern const unsigned char *_binary_resize_flow_frag_data;
+extern const size_t _binary_resize_flow_frag_size;
+extern const unsigned char *_binary_sobel_frag_data;
+extern const size_t _binary_sobel_frag_size;
+extern const unsigned char *_binary_sor_frag_data;
+extern const size_t _binary_sor_frag_size;
+extern const unsigned char *_binary_sor_vert_data;
+extern const size_t _binary_sor_vert_size;
+extern const unsigned char *_binary_splat_frag_data;
+extern const size_t _binary_splat_frag_size;
+extern const unsigned char *_binary_splat_vert_data;
+extern const size_t _binary_splat_vert_size;
+extern const unsigned char *_binary_vs_vert_data;
+extern const size_t _binary_vs_vert_size;
+
+#endif // !defined(_EMBEDDED_FILES_H)
--- /dev/null
+#version 450 core
+
+in vec3 tc0, tc_left0, tc_down0;
+in vec3 tc1, tc_left1, tc_down1;
+in float line_offset;
+out uvec4 equation_red, equation_black;
+
+uniform sampler2DArray I_x_y_tex, I_t_tex;
+uniform sampler2DArray diff_flow_tex, base_flow_tex;
+uniform sampler2DArray beta_0_tex;
+uniform sampler2DArray diffusivity_tex;
+
+// Relative weighting of intensity term.
+uniform float delta;
+
+// Relative weighting of gradient term.
+uniform float gamma;
+
+uniform bool zero_diff_flow;
+
+// Similar to packHalf2x16, but the two values share exponent, and are stored
+// as 12-bit fixed point numbers multiplied by that exponent (the leading one
+// can't be implicit in this kind of format). This allows us to store a much
+// greater range of numbers (8-bit, ie., full fp32 range), and also gives us an
+// extra mantissa bit. (Well, ostensibly two, but because the numbers have to
+// be stored denormalized, we only really gain one.)
+//
+// The price we pay is that if the numbers are of very different magnitudes,
+// the smaller number gets less precision.
+uint pack_floats_shared(float a, float b)
+{
+ float greatest = max(abs(a), abs(b));
+
+ // Find the exponent, increase it by one, and negate it.
+ // E.g., if the nonbiased exponent is 3, the number is between
+ // 2^3 and 2^4, so our normalization factor to get within -1..1
+ // is going to be 2^-4.
+ //
+ // exponent -= 127;
+ // exponent = -(exponent + 1);
+ // exponent += 127;
+ //
+ // is the same as
+ //
+ // exponent = 252 - exponent;
+ uint e = floatBitsToUint(greatest) & 0x7f800000u;
+ float normalizer = uintBitsToFloat((252 << 23) - e);
+
+ // The exponent is the same range as fp32, so just copy it
+ // verbatim, shifted up to where the sign bit used to be.
+ e <<= 1;
+
+ // Quantize to 12 bits.
+ uint qa = uint(int(round(a * (normalizer * 2047.0))));
+ uint qb = uint(int(round(b * (normalizer * 2047.0))));
+
+ return (qa & 0xfffu) | ((qb & 0xfffu) << 12) | e;
+}
+
+float zero_if_outside_border(vec4 val)
+{
+ if (val.w < 1.0f) {
+ // We hit the border (or more like half-way to it), so zero smoothness.
+ return 0.0f;
+ } else {
+ return val.x;
+ }
+}
+
+uvec4 compute_equation(vec3 tc, vec3 tc_left, vec3 tc_down)
+{
+ // Read the flow (on top of the u0/v0 flow).
+ float du, dv;
+ if (zero_diff_flow) {
+ du = dv = 0.0f;
+ } else {
+ vec2 diff_flow = texture(diff_flow_tex, tc).xy;
+ du = diff_flow.x;
+ dv = diff_flow.y;
+ }
+
+ // Read the first derivatives.
+ vec2 I_x_y = texture(I_x_y_tex, tc).xy;
+ float I_x = I_x_y.x;
+ float I_y = I_x_y.y;
+ float I_t = texture(I_t_tex, tc).x;
+
+ // E_I term. Note that we don't square β_0, in line with DeepFlow;
+ // it's probably an error (see variational_refinement.txt),
+ // but squaring it seems to give worse results.
+ float beta_0 = texture(beta_0_tex, tc).x;
+ float k1 = delta * beta_0 * inversesqrt(beta_0 * (I_x * du + I_y * dv + I_t) * (I_x * du + I_y * dv + I_t) + 1e-6);
+ float A11 = k1 * I_x * I_x;
+ float A12 = k1 * I_x * I_y;
+ float A22 = k1 * I_y * I_y;
+ float b1 = -k1 * I_t * I_x;
+ float b2 = -k1 * I_t * I_y;
+
+ // Compute the second derivatives. First I_xx and I_xy.
+ vec2 I_x_y_m2 = textureOffset(I_x_y_tex, tc, ivec2(-2, 0)).xy;
+ vec2 I_x_y_m1 = textureOffset(I_x_y_tex, tc, ivec2(-1, 0)).xy;
+ vec2 I_x_y_p1 = textureOffset(I_x_y_tex, tc, ivec2( 1, 0)).xy;
+ vec2 I_x_y_p2 = textureOffset(I_x_y_tex, tc, ivec2( 2, 0)).xy;
+ vec2 I_xx_yx = (I_x_y_p1 - I_x_y_m1) * (2.0/3.0) + (I_x_y_m2 - I_x_y_p2) * (1.0/12.0);
+ float I_xx = I_xx_yx.x;
+ float I_xy = I_xx_yx.y;
+
+ // And now I_yy; I_yx = I_xy, bar rounding differences, so we don't
+ // bother computing it. We still have to sample the x component,
+ // though, but we can throw it away immediately.
+ float I_y_m2 = textureOffset(I_x_y_tex, tc, ivec2(0, -2)).y;
+ float I_y_m1 = textureOffset(I_x_y_tex, tc, ivec2(0, -1)).y;
+ float I_y_p1 = textureOffset(I_x_y_tex, tc, ivec2(0, 1)).y;
+ float I_y_p2 = textureOffset(I_x_y_tex, tc, ivec2(0, 2)).y;
+ float I_yy = (I_y_p1 - I_y_m1) * (2.0/3.0) + (I_y_m2 - I_y_p2) * (1.0/12.0);
+
+ // Finally I_xt and I_yt. (We compute these as I_tx and I_yt.)
+ vec2 I_t_m2 = textureOffset(I_t_tex, tc, ivec2(-2, 0)).xy;
+ vec2 I_t_m1 = textureOffset(I_t_tex, tc, ivec2(-1, 0)).xy;
+ vec2 I_t_p1 = textureOffset(I_t_tex, tc, ivec2( 1, 0)).xy;
+ vec2 I_t_p2 = textureOffset(I_t_tex, tc, ivec2( 2, 0)).xy;
+ vec2 I_tx_ty = (I_t_p1 - I_t_m1) * (2.0/3.0) + (I_t_m2 - I_t_p2) * (1.0/12.0);
+ float I_xt = I_tx_ty.x;
+ float I_yt = I_tx_ty.y;
+
+ // E_G term. Same normalization as beta_0 (see derivatives.frag).
+ float beta_x = 1.0 / (I_xx * I_xx + I_xy * I_xy + 1e-7);
+ float beta_y = 1.0 / (I_xy * I_xy + I_yy * I_yy + 1e-7);
+ float k2 = gamma * inversesqrt(
+ beta_x * (I_xx * du + I_xy * dv + I_xt) * (I_xx * du + I_xy * dv + I_xt) +
+ beta_y * (I_xy * du + I_yy * dv + I_yt) * (I_xy * du + I_yy * dv + I_yt) +
+ 1e-6);
+ float k_x = k2 * beta_x;
+ float k_y = k2 * beta_y;
+ A11 += k_x * I_xx * I_xx + k_y * I_xy * I_xy;
+ A12 += k_x * I_xx * I_xy + k_y * I_xy * I_yy;
+ A22 += k_x * I_xy * I_xy + k_y * I_yy * I_yy;
+ b1 -= k_x * I_xx * I_xt + k_y * I_xy * I_yt;
+ b2 -= k_x * I_xy * I_xt + k_y * I_yy * I_yt;
+
+ // E_S term, sans the part on the right-hand side that deals with
+ // the neighboring pixels. The gamma is multiplied in in smoothness.frag.
+ //
+ // Note that we sample in-between two texels, which gives us the 0.5 *
+ // (x[-1] + x[0]) part for free. If one of the texels is a border
+ // texel, it will have zero alpha, and zero_if_outside_border() will
+ // set smoothness to zero.
+ float smooth_l = zero_if_outside_border(texture(diffusivity_tex, tc_left));
+ float smooth_r = zero_if_outside_border(textureOffset(diffusivity_tex, tc_left, ivec2(1, 0)));
+ float smooth_d = zero_if_outside_border(texture(diffusivity_tex, tc_down));
+ float smooth_u = zero_if_outside_border(textureOffset(diffusivity_tex, tc_down, ivec2(0, 1)));
+ A11 += smooth_l + smooth_r + smooth_d + smooth_u;
+ A22 += smooth_l + smooth_r + smooth_d + smooth_u;
+
+ // Laplacian of (u0, v0).
+ vec2 laplacian =
+ smooth_l * textureOffset(base_flow_tex, tc, ivec2(-1, 0)).xy +
+ smooth_r * textureOffset(base_flow_tex, tc, ivec2( 1, 0)).xy +
+ smooth_d * textureOffset(base_flow_tex, tc, ivec2( 0, -1)).xy +
+ smooth_u * textureOffset(base_flow_tex, tc, ivec2( 0, 1)).xy -
+ (smooth_l + smooth_r + smooth_d + smooth_u) * texture(base_flow_tex, tc).xy;
+ b1 += laplacian.x;
+ b2 += laplacian.y;
+
+ // Encode the equation down into four uint32s.
+ uvec4 ret;
+ ret.x = floatBitsToUint(1.0 / A11);
+ ret.y = floatBitsToUint(A12);
+ ret.z = floatBitsToUint(1.0 / A22);
+ ret.w = pack_floats_shared(b1, b2);
+ return ret;
+}
+
+void main()
+{
+ uvec4 eq0 = compute_equation(tc0, tc_left0, tc_down0);
+ uvec4 eq1 = compute_equation(tc1, tc_left1, tc_down1);
+
+ if ((int(round(line_offset)) & 1) == 1) {
+ // Odd line, so the right value is red.
+ equation_red = eq1;
+ equation_black = eq0;
+ } else {
+ equation_red = eq0;
+ equation_black = eq1;
+ }
+}
--- /dev/null
+#version 450 core
+#extension GL_ARB_shader_viewport_layer_array : require
+
+layout(location=0) in vec2 position;
+out vec3 tc0, tc_left0, tc_down0;
+out vec3 tc1, tc_left1, tc_down1;
+out float line_offset;
+
+uniform sampler2DArray diffusivity_tex;
+
+void main()
+{
+ // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+ //
+ // 2.000 0.000 0.000 -1.000
+ // 0.000 2.000 0.000 -1.000
+ // 0.000 0.000 -2.000 -1.000
+ // 0.000 0.000 0.000 1.000
+ gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
+ gl_Layer = gl_InstanceID;
+
+ const vec2 half_texel = 0.5f / textureSize(diffusivity_tex, 0).xy;
+
+ vec2 tc = position;
+ vec2 tc_left = vec2(tc.x - half_texel.x, tc.y);
+ vec2 tc_down = vec2(tc.x, tc.y - half_texel.y);
+
+ // Adjust for different texel centers.
+ tc0 = vec3(tc.x - half_texel.x, tc.y, gl_InstanceID);
+ tc_left0 = vec3(tc_left.x - half_texel.x, tc_left.y, gl_InstanceID);
+ tc_down0 = vec3(tc_down.x - half_texel.x, tc_down.y, gl_InstanceID);
+
+ tc1 = vec3(tc.x + half_texel.x, tc.y, gl_InstanceID);
+ tc_left1 = vec3(tc_left.x + half_texel.x, tc_left.y, gl_InstanceID);
+ tc_down1 = vec3(tc_down.x + half_texel.x, tc_down.y, gl_InstanceID);
+
+ line_offset = position.y * textureSize(diffusivity_tex, 0).y - 0.5f;
+}
--- /dev/null
+// Evaluate a .flo file against ground truth,
+// outputting the average end-point error.
+
+#include "util.h"
+
+#include <assert.h>
+#include <memory>
+#include <stdio.h>
+
+using namespace std;
+
+double eval_flow(const char *filename1, const char *filename2);
+
+int main(int argc, char **argv)
+{
+ double sum_epe = 0.0;
+ int num_flows = 0;
+ for (int i = 1; i < argc; i += 2) {
+ sum_epe += eval_flow(argv[i], argv[i + 1]);
+ ++num_flows;
+ }
+ printf("Average EPE: %.2f pixels\n", sum_epe / num_flows);
+}
+
+double eval_flow(const char *filename1, const char *filename2)
+{
+ Flow flow = read_flow(filename1);
+ Flow gt = read_flow(filename2);
+
+ double sum = 0.0;
+ for (unsigned y = 0; y < unsigned(flow.height); ++y) {
+ for (unsigned x = 0; x < unsigned(flow.width); ++x) {
+ float du = flow.flow[y * flow.width + x].du;
+ float dv = flow.flow[y * flow.width + x].dv;
+ float gt_du = gt.flow[y * flow.width + x].du;
+ float gt_dv = gt.flow[y * flow.width + x].dv;
+ sum += hypot(du - gt_du, dv - gt_dv);
+ }
+ }
+ return sum / (flow.width * flow.height);
+}
--- /dev/null
+#include "flags.h"
+
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <utility>
+
+using namespace std;
+
+Flags global_flags;
+
+// Long options that have no corresponding short option.
+enum LongOption {
+ OPTION_HELP = 1000,
+ OPTION_SLOW_DOWN_INPUT = 1001,
+ OPTION_HTTP_PORT = 1002
+};
+
+void usage()
+{
+ fprintf(stderr, "Usage: futatabi [OPTION]... SOURCE_URL\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " --help print usage information\n");
+ fprintf(stderr, " --slow-down-input slow down input to realtime (default on if no\n");
+ fprintf(stderr, " source URL given)\n");
+ fprintf(stderr, " -q, --interpolation-quality N 1 = fastest\n");
+ fprintf(stderr, " 2 = default (realtime 720p on fast embedded GPUs)\n");
+ fprintf(stderr, " 3 = good (realtime 720p on GTX 970 or so)\n");
+ fprintf(stderr, " 4 = best (not realtime on any current GPU)\n");
+ fprintf(stderr, " -d, --working-directory DIR where to store frames and database\n");
+ fprintf(stderr, " --http-port PORT which port to listen on for output\n");
+}
+
+void parse_flags(int argc, char * const argv[])
+{
+ static const option long_options[] = {
+ { "help", no_argument, 0, OPTION_HELP },
+ { "slow-down-input", no_argument, 0, OPTION_SLOW_DOWN_INPUT },
+ { "interpolation-quality", required_argument, 0, 'q' },
+ { "working-directory", required_argument, 0, 'd' },
+ { "http-port", required_argument, 0, OPTION_HTTP_PORT },
+ { 0, 0, 0, 0 }
+ };
+ for ( ;; ) {
+ int option_index = 0;
+ int c = getopt_long(argc, argv, "q:d:", long_options, &option_index);
+
+ if (c == -1) {
+ break;
+ }
+ switch (c) {
+ case OPTION_SLOW_DOWN_INPUT:
+ global_flags.slow_down_input = true;
+ break;
+ case 'q':
+ global_flags.interpolation_quality = atoi(optarg);
+ break;
+ case 'd':
+ global_flags.working_directory = optarg;
+ break;
+ case OPTION_HTTP_PORT:
+ global_flags.http_port = atoi(optarg);
+ break;
+ case OPTION_HELP:
+ usage();
+ exit(0);
+ default:
+ fprintf(stderr, "Unknown option '%s'\n", argv[option_index]);
+ fprintf(stderr, "\n");
+ usage();
+ exit(1);
+ }
+ }
+
+ if (global_flags.interpolation_quality < 1 || global_flags.interpolation_quality > 4) {
+ fprintf(stderr, "Interpolation quality must be 1, 2, 3 or 4.\n");
+ usage();
+ exit(1);
+ }
+}
--- /dev/null
+#ifndef _FLAGS_H
+#define _FLAGS_H
+
+#include <string>
+
+#include "defs.h"
+
+struct Flags {
+ std::string stream_source;
+ std::string working_directory = ".";
+ bool slow_down_input = false;
+ int interpolation_quality = 2;
+ uint16_t http_port = DEFAULT_HTTPD_PORT;
+};
+extern Flags global_flags;
+
+void usage();
+void parse_flags(int argc, char * const argv[]);
+
+#endif // !defined(_FLAGS_H)
--- /dev/null
+#define NO_SDL_GLEXT 1
+
+#include "flow.h"
+
+#include "embedded_files.h"
+#include "gpu_timers.h"
+#include "util.h"
+#include "shared/read_file.h"
+
+#include <algorithm>
+#include <assert.h>
+#include <deque>
+#include <dlfcn.h>
+#include <epoxy/gl.h>
+#include <map>
+#include <memory>
+#include <stack>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <vector>
+
+#define BUFFER_OFFSET(i) ((char *)nullptr + (i))
+
+using namespace std;
+
+// Weighting constants for the different parts of the variational refinement.
+// These don't correspond 1:1 to the values given in the DIS paper,
+// since we have different normalizations and ranges in some cases.
+// These are found through a simple grid search on some MPI-Sintel data,
+// although the error (EPE) seems to be fairly insensitive to the precise values.
+// Only the relative values matter, so we fix alpha (the smoothness constant)
+// at unity and tweak the others.
+//
+// TODO: Maybe this should not be global.
+float vr_alpha = 1.0f, vr_delta = 0.25f, vr_gamma = 0.25f;
+
+// Some global OpenGL objects.
+// TODO: These should really be part of DISComputeFlow.
+GLuint nearest_sampler, linear_sampler, zero_border_sampler;
+GLuint vertex_vbo;
+
+int find_num_levels(int width, int height)
+{
+ int levels = 1;
+ for (int w = width, h = height; w > 1 || h > 1; ) {
+ w >>= 1;
+ h >>= 1;
+ ++levels;
+ }
+ return levels;
+}
+
+GLuint compile_shader(const string &shader_src, GLenum type)
+{
+ GLuint obj = glCreateShader(type);
+ const GLchar *source[] = { shader_src.data() };
+ const GLint length[] = { (GLint)shader_src.size() };
+ glShaderSource(obj, 1, source, length);
+ glCompileShader(obj);
+
+ GLchar info_log[4096];
+ GLsizei log_length = sizeof(info_log) - 1;
+ glGetShaderInfoLog(obj, log_length, &log_length, info_log);
+ info_log[log_length] = 0;
+ if (strlen(info_log) > 0) {
+ fprintf(stderr, "Shader compile log: %s\n", info_log);
+ }
+
+ GLint status;
+ glGetShaderiv(obj, GL_COMPILE_STATUS, &status);
+ if (status == GL_FALSE) {
+ // Add some line numbers to easier identify compile errors.
+ string src_with_lines = "/* 1 */ ";
+ size_t lineno = 1;
+ for (char ch : shader_src) {
+ src_with_lines.push_back(ch);
+ if (ch == '\n') {
+ char buf[32];
+ snprintf(buf, sizeof(buf), "/* %3zu */ ", ++lineno);
+ src_with_lines += buf;
+ }
+ }
+
+ fprintf(stderr, "Failed to compile shader:\n%s\n", src_with_lines.c_str());
+ exit(1);
+ }
+
+ return obj;
+}
+
+GLuint link_program(GLuint vs_obj, GLuint fs_obj)
+{
+ GLuint program = glCreateProgram();
+ glAttachShader(program, vs_obj);
+ glAttachShader(program, fs_obj);
+ glLinkProgram(program);
+ GLint success;
+ glGetProgramiv(program, GL_LINK_STATUS, &success);
+ if (success == GL_FALSE) {
+ GLchar error_log[1024] = {0};
+ glGetProgramInfoLog(program, 1024, nullptr, error_log);
+ fprintf(stderr, "Error linking program: %s\n", error_log);
+ exit(1);
+ }
+ return program;
+}
+
+void bind_sampler(GLuint program, GLint location, GLuint texture_unit, GLuint tex, GLuint sampler)
+{
+ if (location == -1) {
+ return;
+ }
+
+ glBindTextureUnit(texture_unit, tex);
+ glBindSampler(texture_unit, sampler);
+ glProgramUniform1i(program, location, texture_unit);
+}
+
+template<size_t num_elements>
+void PersistentFBOSet<num_elements>::render_to(const array<GLuint, num_elements> &textures)
+{
+ auto it = fbos.find(textures);
+ if (it != fbos.end()) {
+ glBindFramebuffer(GL_FRAMEBUFFER, it->second);
+ return;
+ }
+
+ GLuint fbo;
+ glCreateFramebuffers(1, &fbo);
+ GLenum bufs[num_elements];
+ for (size_t i = 0; i < num_elements; ++i) {
+ glNamedFramebufferTexture(fbo, GL_COLOR_ATTACHMENT0 + i, textures[i], 0);
+ bufs[i] = GL_COLOR_ATTACHMENT0 + i;
+ }
+ glNamedFramebufferDrawBuffers(fbo, num_elements, bufs);
+
+ fbos[textures] = fbo;
+ glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+}
+
+template<size_t num_elements>
+void PersistentFBOSetWithDepth<num_elements>::render_to(GLuint depth_rb, const array<GLuint, num_elements> &textures)
+{
+ auto key = make_pair(depth_rb, textures);
+
+ auto it = fbos.find(key);
+ if (it != fbos.end()) {
+ glBindFramebuffer(GL_FRAMEBUFFER, it->second);
+ return;
+ }
+
+ GLuint fbo;
+ glCreateFramebuffers(1, &fbo);
+ GLenum bufs[num_elements];
+ glNamedFramebufferRenderbuffer(fbo, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depth_rb);
+ for (size_t i = 0; i < num_elements; ++i) {
+ glNamedFramebufferTexture(fbo, GL_COLOR_ATTACHMENT0 + i, textures[i], 0);
+ bufs[i] = GL_COLOR_ATTACHMENT0 + i;
+ }
+ glNamedFramebufferDrawBuffers(fbo, num_elements, bufs);
+
+ fbos[key] = fbo;
+ glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+}
+
+GrayscaleConversion::GrayscaleConversion()
+{
+ gray_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
+ gray_fs_obj = compile_shader(read_file("gray.frag", _binary_gray_frag_data, _binary_gray_frag_size), GL_FRAGMENT_SHADER);
+ gray_program = link_program(gray_vs_obj, gray_fs_obj);
+
+ // Set up the VAO containing all the required position/texcoord data.
+ glCreateVertexArrays(1, &gray_vao);
+ glBindVertexArray(gray_vao);
+
+ GLint position_attrib = glGetAttribLocation(gray_program, "position");
+ glEnableVertexArrayAttrib(gray_vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+
+ uniform_tex = glGetUniformLocation(gray_program, "tex");
+}
+
+void GrayscaleConversion::exec(GLint tex, GLint gray_tex, int width, int height, int num_layers)
+{
+ glUseProgram(gray_program);
+ bind_sampler(gray_program, uniform_tex, 0, tex, nearest_sampler);
+
+ glViewport(0, 0, width, height);
+ fbos.render_to(gray_tex);
+ glBindVertexArray(gray_vao);
+ glDisable(GL_BLEND);
+ glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+Sobel::Sobel()
+{
+ sobel_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
+ sobel_fs_obj = compile_shader(read_file("sobel.frag", _binary_sobel_frag_data, _binary_sobel_frag_size), GL_FRAGMENT_SHADER);
+ sobel_program = link_program(sobel_vs_obj, sobel_fs_obj);
+
+ uniform_tex = glGetUniformLocation(sobel_program, "tex");
+}
+
+void Sobel::exec(GLint tex_view, GLint grad_tex, int level_width, int level_height, int num_layers)
+{
+ glUseProgram(sobel_program);
+ bind_sampler(sobel_program, uniform_tex, 0, tex_view, nearest_sampler);
+
+ glViewport(0, 0, level_width, level_height);
+ fbos.render_to(grad_tex);
+ glDisable(GL_BLEND);
+ glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+MotionSearch::MotionSearch(const OperatingPoint &op)
+ : op(op)
+{
+ motion_vs_obj = compile_shader(read_file("motion_search.vert", _binary_motion_search_vert_data, _binary_motion_search_vert_size), GL_VERTEX_SHADER);
+ motion_fs_obj = compile_shader(read_file("motion_search.frag", _binary_motion_search_frag_data, _binary_motion_search_frag_size), GL_FRAGMENT_SHADER);
+ motion_search_program = link_program(motion_vs_obj, motion_fs_obj);
+
+ uniform_inv_image_size = glGetUniformLocation(motion_search_program, "inv_image_size");
+ uniform_inv_prev_level_size = glGetUniformLocation(motion_search_program, "inv_prev_level_size");
+ uniform_out_flow_size = glGetUniformLocation(motion_search_program, "out_flow_size");
+ uniform_image_tex = glGetUniformLocation(motion_search_program, "image_tex");
+ uniform_grad_tex = glGetUniformLocation(motion_search_program, "grad_tex");
+ uniform_flow_tex = glGetUniformLocation(motion_search_program, "flow_tex");
+ uniform_patch_size = glGetUniformLocation(motion_search_program, "patch_size");
+ uniform_num_iterations = glGetUniformLocation(motion_search_program, "num_iterations");
+}
+
+void MotionSearch::exec(GLuint tex_view, GLuint grad_tex, GLuint flow_tex, GLuint flow_out_tex, int level_width, int level_height, int prev_level_width, int prev_level_height, int width_patches, int height_patches, int num_layers)
+{
+ glUseProgram(motion_search_program);
+
+ bind_sampler(motion_search_program, uniform_image_tex, 0, tex_view, linear_sampler);
+ bind_sampler(motion_search_program, uniform_grad_tex, 1, grad_tex, nearest_sampler);
+ bind_sampler(motion_search_program, uniform_flow_tex, 2, flow_tex, linear_sampler);
+
+ glProgramUniform2f(motion_search_program, uniform_inv_image_size, 1.0f / level_width, 1.0f / level_height);
+ glProgramUniform2f(motion_search_program, uniform_inv_prev_level_size, 1.0f / prev_level_width, 1.0f / prev_level_height);
+ glProgramUniform2f(motion_search_program, uniform_out_flow_size, width_patches, height_patches);
+ glProgramUniform1ui(motion_search_program, uniform_patch_size, op.patch_size_pixels);
+ glProgramUniform1ui(motion_search_program, uniform_num_iterations, op.search_iterations);
+
+ glViewport(0, 0, width_patches, height_patches);
+ fbos.render_to(flow_out_tex);
+ glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+Densify::Densify(const OperatingPoint &op)
+ : op(op)
+{
+ densify_vs_obj = compile_shader(read_file("densify.vert", _binary_densify_vert_data, _binary_densify_vert_size), GL_VERTEX_SHADER);
+ densify_fs_obj = compile_shader(read_file("densify.frag", _binary_densify_frag_data, _binary_densify_frag_size), GL_FRAGMENT_SHADER);
+ densify_program = link_program(densify_vs_obj, densify_fs_obj);
+
+ uniform_patch_size = glGetUniformLocation(densify_program, "patch_size");
+ uniform_image_tex = glGetUniformLocation(densify_program, "image_tex");
+ uniform_flow_tex = glGetUniformLocation(densify_program, "flow_tex");
+}
+
+void Densify::exec(GLuint tex_view, GLuint flow_tex, GLuint dense_flow_tex, int level_width, int level_height, int width_patches, int height_patches, int num_layers)
+{
+ glUseProgram(densify_program);
+
+ bind_sampler(densify_program, uniform_image_tex, 0, tex_view, linear_sampler);
+ bind_sampler(densify_program, uniform_flow_tex, 1, flow_tex, nearest_sampler);
+
+ glProgramUniform2f(densify_program, uniform_patch_size,
+ float(op.patch_size_pixels) / level_width,
+ float(op.patch_size_pixels) / level_height);
+
+ glViewport(0, 0, level_width, level_height);
+ glEnable(GL_BLEND);
+ glBlendFunc(GL_ONE, GL_ONE);
+ fbos.render_to(dense_flow_tex);
+ glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
+ glClear(GL_COLOR_BUFFER_BIT);
+ glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, width_patches * height_patches * num_layers);
+}
+
+Prewarp::Prewarp()
+{
+ prewarp_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
+ prewarp_fs_obj = compile_shader(read_file("prewarp.frag", _binary_prewarp_frag_data, _binary_prewarp_frag_size), GL_FRAGMENT_SHADER);
+ prewarp_program = link_program(prewarp_vs_obj, prewarp_fs_obj);
+
+ uniform_image_tex = glGetUniformLocation(prewarp_program, "image_tex");
+ uniform_flow_tex = glGetUniformLocation(prewarp_program, "flow_tex");
+}
+
+void Prewarp::exec(GLuint tex_view, GLuint flow_tex, GLuint I_tex, GLuint I_t_tex, GLuint normalized_flow_tex, int level_width, int level_height, int num_layers)
+{
+ glUseProgram(prewarp_program);
+
+ bind_sampler(prewarp_program, uniform_image_tex, 0, tex_view, linear_sampler);
+ bind_sampler(prewarp_program, uniform_flow_tex, 1, flow_tex, nearest_sampler);
+
+ glViewport(0, 0, level_width, level_height);
+ glDisable(GL_BLEND);
+ fbos.render_to(I_tex, I_t_tex, normalized_flow_tex);
+ glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+Derivatives::Derivatives()
+{
+ derivatives_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
+ derivatives_fs_obj = compile_shader(read_file("derivatives.frag", _binary_derivatives_frag_data, _binary_derivatives_frag_size), GL_FRAGMENT_SHADER);
+ derivatives_program = link_program(derivatives_vs_obj, derivatives_fs_obj);
+
+ uniform_tex = glGetUniformLocation(derivatives_program, "tex");
+}
+
+void Derivatives::exec(GLuint input_tex, GLuint I_x_y_tex, GLuint beta_0_tex, int level_width, int level_height, int num_layers)
+{
+ glUseProgram(derivatives_program);
+
+ bind_sampler(derivatives_program, uniform_tex, 0, input_tex, nearest_sampler);
+
+ glViewport(0, 0, level_width, level_height);
+ glDisable(GL_BLEND);
+ fbos.render_to(I_x_y_tex, beta_0_tex);
+ glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+ComputeDiffusivity::ComputeDiffusivity()
+{
+ diffusivity_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
+ diffusivity_fs_obj = compile_shader(read_file("diffusivity.frag", _binary_diffusivity_frag_data, _binary_diffusivity_frag_size), GL_FRAGMENT_SHADER);
+ diffusivity_program = link_program(diffusivity_vs_obj, diffusivity_fs_obj);
+
+ uniform_flow_tex = glGetUniformLocation(diffusivity_program, "flow_tex");
+ uniform_diff_flow_tex = glGetUniformLocation(diffusivity_program, "diff_flow_tex");
+ uniform_alpha = glGetUniformLocation(diffusivity_program, "alpha");
+ uniform_zero_diff_flow = glGetUniformLocation(diffusivity_program, "zero_diff_flow");
+}
+
+void ComputeDiffusivity::exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint diffusivity_tex, int level_width, int level_height, bool zero_diff_flow, int num_layers)
+{
+ glUseProgram(diffusivity_program);
+
+ bind_sampler(diffusivity_program, uniform_flow_tex, 0, flow_tex, nearest_sampler);
+ bind_sampler(diffusivity_program, uniform_diff_flow_tex, 1, diff_flow_tex, nearest_sampler);
+ glProgramUniform1f(diffusivity_program, uniform_alpha, vr_alpha);
+ glProgramUniform1i(diffusivity_program, uniform_zero_diff_flow, zero_diff_flow);
+
+ glViewport(0, 0, level_width, level_height);
+
+ glDisable(GL_BLEND);
+ fbos.render_to(diffusivity_tex);
+ glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+SetupEquations::SetupEquations()
+{
+ equations_vs_obj = compile_shader(read_file("equations.vert", _binary_equations_vert_data, _binary_equations_vert_size), GL_VERTEX_SHADER);
+ equations_fs_obj = compile_shader(read_file("equations.frag", _binary_equations_frag_data, _binary_equations_frag_size), GL_FRAGMENT_SHADER);
+ equations_program = link_program(equations_vs_obj, equations_fs_obj);
+
+ uniform_I_x_y_tex = glGetUniformLocation(equations_program, "I_x_y_tex");
+ uniform_I_t_tex = glGetUniformLocation(equations_program, "I_t_tex");
+ uniform_diff_flow_tex = glGetUniformLocation(equations_program, "diff_flow_tex");
+ uniform_base_flow_tex = glGetUniformLocation(equations_program, "base_flow_tex");
+ uniform_beta_0_tex = glGetUniformLocation(equations_program, "beta_0_tex");
+ uniform_diffusivity_tex = glGetUniformLocation(equations_program, "diffusivity_tex");
+ uniform_gamma = glGetUniformLocation(equations_program, "gamma");
+ uniform_delta = glGetUniformLocation(equations_program, "delta");
+ uniform_zero_diff_flow = glGetUniformLocation(equations_program, "zero_diff_flow");
+}
+
+void SetupEquations::exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex, GLuint base_flow_tex, GLuint beta_0_tex, GLuint diffusivity_tex, GLuint equation_red_tex, GLuint equation_black_tex, int level_width, int level_height, bool zero_diff_flow, int num_layers)
+{
+ glUseProgram(equations_program);
+
+ bind_sampler(equations_program, uniform_I_x_y_tex, 0, I_x_y_tex, nearest_sampler);
+ bind_sampler(equations_program, uniform_I_t_tex, 1, I_t_tex, nearest_sampler);
+ bind_sampler(equations_program, uniform_diff_flow_tex, 2, diff_flow_tex, nearest_sampler);
+ bind_sampler(equations_program, uniform_base_flow_tex, 3, base_flow_tex, nearest_sampler);
+ bind_sampler(equations_program, uniform_beta_0_tex, 4, beta_0_tex, nearest_sampler);
+ bind_sampler(equations_program, uniform_diffusivity_tex, 5, diffusivity_tex, zero_border_sampler);
+ glProgramUniform1f(equations_program, uniform_delta, vr_delta);
+ glProgramUniform1f(equations_program, uniform_gamma, vr_gamma);
+ glProgramUniform1i(equations_program, uniform_zero_diff_flow, zero_diff_flow);
+
+ glViewport(0, 0, (level_width + 1) / 2, level_height);
+ glDisable(GL_BLEND);
+ fbos.render_to(equation_red_tex, equation_black_tex);
+ glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+SOR::SOR()
+{
+ sor_vs_obj = compile_shader(read_file("sor.vert", _binary_sor_vert_data, _binary_sor_vert_size), GL_VERTEX_SHADER);
+ sor_fs_obj = compile_shader(read_file("sor.frag", _binary_sor_frag_data, _binary_sor_frag_size), GL_FRAGMENT_SHADER);
+ sor_program = link_program(sor_vs_obj, sor_fs_obj);
+
+ uniform_diff_flow_tex = glGetUniformLocation(sor_program, "diff_flow_tex");
+ uniform_equation_red_tex = glGetUniformLocation(sor_program, "equation_red_tex");
+ uniform_equation_black_tex = glGetUniformLocation(sor_program, "equation_black_tex");
+ uniform_diffusivity_tex = glGetUniformLocation(sor_program, "diffusivity_tex");
+ uniform_phase = glGetUniformLocation(sor_program, "phase");
+ uniform_num_nonzero_phases = glGetUniformLocation(sor_program, "num_nonzero_phases");
+}
+
+void SOR::exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_black_tex, GLuint diffusivity_tex, int level_width, int level_height, int num_iterations, bool zero_diff_flow, int num_layers, ScopedTimer *sor_timer)
+{
+ glUseProgram(sor_program);
+
+ bind_sampler(sor_program, uniform_diff_flow_tex, 0, diff_flow_tex, nearest_sampler);
+ bind_sampler(sor_program, uniform_diffusivity_tex, 1, diffusivity_tex, zero_border_sampler);
+ bind_sampler(sor_program, uniform_equation_red_tex, 2, equation_red_tex, nearest_sampler);
+ bind_sampler(sor_program, uniform_equation_black_tex, 3, equation_black_tex, nearest_sampler);
+
+ if (!zero_diff_flow) {
+ glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 2);
+ }
+
+ // NOTE: We bind to the texture we are rendering from, but we never write any value
+ // that we read in the same shader pass (we call discard for red values when we compute
+ // black, and vice versa), and we have barriers between the passes, so we're fine
+ // as per the spec.
+ glViewport(0, 0, level_width, level_height);
+ glDisable(GL_BLEND);
+ fbos.render_to(diff_flow_tex);
+
+ for (int i = 0; i < num_iterations; ++i) {
+ {
+ ScopedTimer timer("Red pass", sor_timer);
+ if (zero_diff_flow && i == 0) {
+ glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 0);
+ }
+ glProgramUniform1i(sor_program, uniform_phase, 0);
+ glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+ glTextureBarrier();
+ }
+ {
+ ScopedTimer timer("Black pass", sor_timer);
+ if (zero_diff_flow && i == 0) {
+ glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 1);
+ }
+ glProgramUniform1i(sor_program, uniform_phase, 1);
+ glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+ if (zero_diff_flow && i == 0) {
+ glProgramUniform1i(sor_program, uniform_num_nonzero_phases, 2);
+ }
+ if (i != num_iterations - 1) {
+ glTextureBarrier();
+ }
+ }
+ }
+}
+
+AddBaseFlow::AddBaseFlow()
+{
+ add_flow_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
+ add_flow_fs_obj = compile_shader(read_file("add_base_flow.frag", _binary_add_base_flow_frag_data, _binary_add_base_flow_frag_size), GL_FRAGMENT_SHADER);
+ add_flow_program = link_program(add_flow_vs_obj, add_flow_fs_obj);
+
+ uniform_diff_flow_tex = glGetUniformLocation(add_flow_program, "diff_flow_tex");
+}
+
+void AddBaseFlow::exec(GLuint base_flow_tex, GLuint diff_flow_tex, int level_width, int level_height, int num_layers)
+{
+ glUseProgram(add_flow_program);
+
+ bind_sampler(add_flow_program, uniform_diff_flow_tex, 0, diff_flow_tex, nearest_sampler);
+
+ glViewport(0, 0, level_width, level_height);
+ glEnable(GL_BLEND);
+ glBlendFunc(GL_ONE, GL_ONE);
+ fbos.render_to(base_flow_tex);
+
+ glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+ResizeFlow::ResizeFlow()
+{
+ resize_flow_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
+ resize_flow_fs_obj = compile_shader(read_file("resize_flow.frag", _binary_resize_flow_frag_data, _binary_resize_flow_frag_size), GL_FRAGMENT_SHADER);
+ resize_flow_program = link_program(resize_flow_vs_obj, resize_flow_fs_obj);
+
+ uniform_flow_tex = glGetUniformLocation(resize_flow_program, "flow_tex");
+ uniform_scale_factor = glGetUniformLocation(resize_flow_program, "scale_factor");
+}
+
+void ResizeFlow::exec(GLuint flow_tex, GLuint out_tex, int input_width, int input_height, int output_width, int output_height, int num_layers)
+{
+ glUseProgram(resize_flow_program);
+
+ bind_sampler(resize_flow_program, uniform_flow_tex, 0, flow_tex, nearest_sampler);
+
+ glProgramUniform2f(resize_flow_program, uniform_scale_factor, float(output_width) / input_width, float(output_height) / input_height);
+
+ glViewport(0, 0, output_width, output_height);
+ glDisable(GL_BLEND);
+ fbos.render_to(out_tex);
+
+ glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, num_layers);
+}
+
+DISComputeFlow::DISComputeFlow(int width, int height, const OperatingPoint &op)
+ : width(width), height(height), op(op), motion_search(op), densify(op)
+{
+ // Make some samplers.
+ glCreateSamplers(1, &nearest_sampler);
+ glSamplerParameteri(nearest_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glSamplerParameteri(nearest_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glSamplerParameteri(nearest_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glSamplerParameteri(nearest_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+
+ glCreateSamplers(1, &linear_sampler);
+ glSamplerParameteri(linear_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ glSamplerParameteri(linear_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ glSamplerParameteri(linear_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glSamplerParameteri(linear_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+
+ // The smoothness is sampled so that once we get to a smoothness involving
+ // a value outside the border, the diffusivity between the two becomes zero.
+ // Similarly, gradients are zero outside the border, since the edge is taken
+ // to be constant.
+ glCreateSamplers(1, &zero_border_sampler);
+ glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ glSamplerParameteri(zero_border_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
+ glSamplerParameteri(zero_border_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
+ float zero[] = { 0.0f, 0.0f, 0.0f, 0.0f }; // Note that zero alpha means we can also see whether we sampled outside the border or not.
+ glSamplerParameterfv(zero_border_sampler, GL_TEXTURE_BORDER_COLOR, zero);
+
+ // Initial flow is zero, 1x1.
+ glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &initial_flow_tex);
+ glTextureStorage3D(initial_flow_tex, 1, GL_RG16F, 1, 1, 1);
+ glClearTexImage(initial_flow_tex, 0, GL_RG, GL_FLOAT, nullptr);
+
+ // Set up the vertex data that will be shared between all passes.
+ float vertices[] = {
+ 0.0f, 1.0f,
+ 0.0f, 0.0f,
+ 1.0f, 1.0f,
+ 1.0f, 0.0f,
+ };
+ glCreateBuffers(1, &vertex_vbo);
+ glNamedBufferData(vertex_vbo, sizeof(vertices), vertices, GL_STATIC_DRAW);
+
+ glCreateVertexArrays(1, &vao);
+ glBindVertexArray(vao);
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+ GLint position_attrib = 0; // Hard-coded in every vertex shader.
+ glEnableVertexArrayAttrib(vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+}
+
+GLuint DISComputeFlow::exec(GLuint tex, FlowDirection flow_direction, ResizeStrategy resize_strategy)
+{
+ int num_layers = (flow_direction == FORWARD_AND_BACKWARD) ? 2 : 1;
+ int prev_level_width = 1, prev_level_height = 1;
+ GLuint prev_level_flow_tex = initial_flow_tex;
+
+ GPUTimers timers;
+
+ glBindVertexArray(vao);
+ glDisable(GL_DITHER);
+
+ ScopedTimer total_timer("Compute flow", &timers);
+ for (int level = op.coarsest_level; level >= int(op.finest_level); --level) {
+ char timer_name[256];
+ snprintf(timer_name, sizeof(timer_name), "Level %d (%d x %d)", level, width >> level, height >> level);
+ ScopedTimer level_timer(timer_name, &total_timer);
+
+ int level_width = width >> level;
+ int level_height = height >> level;
+ float patch_spacing_pixels = op.patch_size_pixels * (1.0f - op.patch_overlap_ratio);
+
+ // Make sure we have patches at least every Nth pixel, e.g. for width=9
+ // and patch_spacing=3 (the default), we put out patch centers in
+ // x=0, x=3, x=6, x=9, which is four patches. The fragment shader will
+ // lock all the centers to integer coordinates if needed.
+ int width_patches = 1 + ceil(level_width / patch_spacing_pixels);
+ int height_patches = 1 + ceil(level_height / patch_spacing_pixels);
+
+ // Make sure we always read from the correct level; the chosen
+ // mipmapping could otherwise be rather unpredictable, especially
+ // during motion search.
+ GLuint tex_view;
+ glGenTextures(1, &tex_view);
+ glTextureView(tex_view, GL_TEXTURE_2D_ARRAY, tex, GL_R8, level, 1, 0, 2);
+
+ // Create a new texture to hold the gradients.
+ GLuint grad_tex = pool.get_texture(GL_R32UI, level_width, level_height, num_layers);
+
+ // Find the derivative.
+ {
+ ScopedTimer timer("Sobel", &level_timer);
+ sobel.exec(tex_view, grad_tex, level_width, level_height, num_layers);
+ }
+
+ // Motion search to find the initial flow. We use the flow from the previous
+ // level (sampled bilinearly; no fancy tricks) as a guide, then search from there.
+
+ // Create an output flow texture.
+ GLuint flow_out_tex = pool.get_texture(GL_RGB16F, width_patches, height_patches, num_layers);
+
+ // And draw.
+ {
+ ScopedTimer timer("Motion search", &level_timer);
+ motion_search.exec(tex_view, grad_tex, prev_level_flow_tex, flow_out_tex, level_width, level_height, prev_level_width, prev_level_height, width_patches, height_patches, num_layers);
+ }
+ pool.release_texture(grad_tex);
+
+ // Densification.
+
+ // Set up an output texture (cleared in Densify).
+ GLuint dense_flow_tex = pool.get_texture(GL_RGB16F, level_width, level_height, num_layers);
+
+ // And draw.
+ {
+ ScopedTimer timer("Densification", &level_timer);
+ densify.exec(tex_view, flow_out_tex, dense_flow_tex, level_width, level_height, width_patches, height_patches, num_layers);
+ }
+ pool.release_texture(flow_out_tex);
+
+ // Everything below here in the loop belongs to variational refinement.
+ ScopedTimer varref_timer("Variational refinement", &level_timer);
+
+ // Prewarping; create I and I_t, and a normalized base flow (so we don't
+ // have to normalize it over and over again, and also save some bandwidth).
+ //
+ // During the entire rest of the variational refinement, flow will be measured
+ // in pixels, not 0..1 normalized OpenGL texture coordinates.
+ // This is because variational refinement depends so heavily on derivatives,
+ // which are measured in intensity levels per pixel.
+ GLuint I_tex = pool.get_texture(GL_R16F, level_width, level_height, num_layers);
+ GLuint I_t_tex = pool.get_texture(GL_R16F, level_width, level_height, num_layers);
+ GLuint base_flow_tex = pool.get_texture(GL_RG16F, level_width, level_height, num_layers);
+ {
+ ScopedTimer timer("Prewarping", &varref_timer);
+ prewarp.exec(tex_view, dense_flow_tex, I_tex, I_t_tex, base_flow_tex, level_width, level_height, num_layers);
+ }
+ pool.release_texture(dense_flow_tex);
+ glDeleteTextures(1, &tex_view);
+
+ // TODO: If we don't have variational refinement, we don't need I and I_t,
+ // so computing them is a waste.
+ if (op.variational_refinement) {
+ // Calculate I_x and I_y. We're only calculating first derivatives;
+ // the others will be taken on-the-fly in order to sample from fewer
+ // textures overall, since sampling from the L1 cache is cheap.
+ // (TODO: Verify that this is indeed faster than making separate
+ // double-derivative textures.)
+ GLuint I_x_y_tex = pool.get_texture(GL_RG16F, level_width, level_height, num_layers);
+ GLuint beta_0_tex = pool.get_texture(GL_R16F, level_width, level_height, num_layers);
+ {
+ ScopedTimer timer("First derivatives", &varref_timer);
+ derivatives.exec(I_tex, I_x_y_tex, beta_0_tex, level_width, level_height, num_layers);
+ }
+ pool.release_texture(I_tex);
+
+ // We need somewhere to store du and dv (the flow increment, relative
+ // to the non-refined base flow u0 and v0). It's initially garbage,
+ // but not read until we've written something sane to it.
+ GLuint diff_flow_tex = pool.get_texture(GL_RG16F, level_width, level_height, num_layers);
+
+ // And for diffusivity.
+ GLuint diffusivity_tex = pool.get_texture(GL_R16F, level_width, level_height, num_layers);
+
+ // And finally for the equation set. See SetupEquations for
+ // the storage format.
+ GLuint equation_red_tex = pool.get_texture(GL_RGBA32UI, (level_width + 1) / 2, level_height, num_layers);
+ GLuint equation_black_tex = pool.get_texture(GL_RGBA32UI, (level_width + 1) / 2, level_height, num_layers);
+
+ for (int outer_idx = 0; outer_idx < level + 1; ++outer_idx) {
+ // Calculate the diffusivity term for each pixel.
+ {
+ ScopedTimer timer("Compute diffusivity", &varref_timer);
+ compute_diffusivity.exec(base_flow_tex, diff_flow_tex, diffusivity_tex, level_width, level_height, outer_idx == 0, num_layers);
+ }
+
+ // Set up the 2x2 equation system for each pixel.
+ {
+ ScopedTimer timer("Set up equations", &varref_timer);
+ setup_equations.exec(I_x_y_tex, I_t_tex, diff_flow_tex, base_flow_tex, beta_0_tex, diffusivity_tex, equation_red_tex, equation_black_tex, level_width, level_height, outer_idx == 0, num_layers);
+ }
+
+ // Run a few SOR iterations. Note that these are to/from the same texture.
+ {
+ ScopedTimer timer("SOR", &varref_timer);
+ sor.exec(diff_flow_tex, equation_red_tex, equation_black_tex, diffusivity_tex, level_width, level_height, 5, outer_idx == 0, num_layers, &timer);
+ }
+ }
+
+ pool.release_texture(I_t_tex);
+ pool.release_texture(I_x_y_tex);
+ pool.release_texture(beta_0_tex);
+ pool.release_texture(diffusivity_tex);
+ pool.release_texture(equation_red_tex);
+ pool.release_texture(equation_black_tex);
+
+ // Add the differential flow found by the variational refinement to the base flow,
+ // giving the final flow estimate for this level.
+ // The output is in base_flow_tex; we don't need to make a new texture.
+ {
+ ScopedTimer timer("Add differential flow", &varref_timer);
+ add_base_flow.exec(base_flow_tex, diff_flow_tex, level_width, level_height, num_layers);
+ }
+ pool.release_texture(diff_flow_tex);
+ }
+
+ if (prev_level_flow_tex != initial_flow_tex) {
+ pool.release_texture(prev_level_flow_tex);
+ }
+ prev_level_flow_tex = base_flow_tex;
+ prev_level_width = level_width;
+ prev_level_height = level_height;
+ }
+ total_timer.end();
+
+ if (!in_warmup) {
+ timers.print();
+ }
+
+ // Scale up the flow to the final size (if needed).
+ if (op.finest_level == 0 || resize_strategy == DO_NOT_RESIZE_FLOW) {
+ return prev_level_flow_tex;
+ } else {
+ GLuint final_tex = pool.get_texture(GL_RG16F, width, height, num_layers);
+ resize_flow.exec(prev_level_flow_tex, final_tex, prev_level_width, prev_level_height, width, height, num_layers);
+ pool.release_texture(prev_level_flow_tex);
+ return final_tex;
+ }
+}
+
+Splat::Splat(const OperatingPoint &op)
+ : op(op)
+{
+ splat_vs_obj = compile_shader(read_file("splat.vert", _binary_splat_vert_data, _binary_splat_vert_size), GL_VERTEX_SHADER);
+ splat_fs_obj = compile_shader(read_file("splat.frag", _binary_splat_frag_data, _binary_splat_frag_size), GL_FRAGMENT_SHADER);
+ splat_program = link_program(splat_vs_obj, splat_fs_obj);
+
+ uniform_splat_size = glGetUniformLocation(splat_program, "splat_size");
+ uniform_alpha = glGetUniformLocation(splat_program, "alpha");
+ uniform_gray_tex = glGetUniformLocation(splat_program, "gray_tex");
+ uniform_flow_tex = glGetUniformLocation(splat_program, "flow_tex");
+ uniform_inv_flow_size = glGetUniformLocation(splat_program, "inv_flow_size");
+}
+
+void Splat::exec(GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha)
+{
+ glUseProgram(splat_program);
+
+ bind_sampler(splat_program, uniform_gray_tex, 0, gray_tex, linear_sampler);
+ bind_sampler(splat_program, uniform_flow_tex, 1, bidirectional_flow_tex, nearest_sampler);
+
+ glProgramUniform2f(splat_program, uniform_splat_size, op.splat_size / width, op.splat_size / height);
+ glProgramUniform1f(splat_program, uniform_alpha, alpha);
+ glProgramUniform2f(splat_program, uniform_inv_flow_size, 1.0f / width, 1.0f / height);
+
+ glViewport(0, 0, width, height);
+ glDisable(GL_BLEND);
+ glEnable(GL_DEPTH_TEST);
+ glDepthMask(GL_TRUE);
+ glDepthFunc(GL_LESS); // We store the difference between I_0 and I_1, where less difference is good. (Default 1.0 is effectively +inf, which always loses.)
+
+ fbos.render_to(depth_rb, flow_tex);
+
+ // Evidently NVIDIA doesn't use fast clears for glClearTexImage, so clear now that
+ // we've got it bound.
+ glClearColor(1000.0f, 1000.0f, 0.0f, 1.0f); // Invalid flow.
+ glClearDepth(1.0f); // Effectively infinity.
+ glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+
+ glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, width * height * 2);
+
+ glDisable(GL_DEPTH_TEST);
+}
+
+HoleFill::HoleFill()
+{
+ fill_vs_obj = compile_shader(read_file("hole_fill.vert", _binary_hole_fill_vert_data, _binary_hole_fill_vert_size), GL_VERTEX_SHADER);
+ fill_fs_obj = compile_shader(read_file("hole_fill.frag", _binary_hole_fill_frag_data, _binary_hole_fill_frag_size), GL_FRAGMENT_SHADER);
+ fill_program = link_program(fill_vs_obj, fill_fs_obj);
+
+ uniform_tex = glGetUniformLocation(fill_program, "tex");
+ uniform_z = glGetUniformLocation(fill_program, "z");
+ uniform_sample_offset = glGetUniformLocation(fill_program, "sample_offset");
+}
+
+void HoleFill::exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int width, int height)
+{
+ glUseProgram(fill_program);
+
+ bind_sampler(fill_program, uniform_tex, 0, flow_tex, nearest_sampler);
+
+ glProgramUniform1f(fill_program, uniform_z, 1.0f - 1.0f / 1024.0f);
+
+ glViewport(0, 0, width, height);
+ glDisable(GL_BLEND);
+ glEnable(GL_DEPTH_TEST);
+ glDepthFunc(GL_LESS); // Only update the values > 0.999f (ie., only invalid pixels).
+
+ fbos.render_to(depth_rb, flow_tex); // NOTE: Reading and writing to the same texture.
+
+ // Fill holes from the left, by shifting 1, 2, 4, 8, etc. pixels to the right.
+ for (int offs = 1; offs < width; offs *= 2) {
+ glProgramUniform2f(fill_program, uniform_sample_offset, -offs / float(width), 0.0f);
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+ glTextureBarrier();
+ }
+ glCopyImageSubData(flow_tex, GL_TEXTURE_2D, 0, 0, 0, 0, temp_tex[0], GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
+
+ // Similar to the right; adjust Z a bit down, so that we re-fill the pixels that
+ // were overwritten in the last algorithm.
+ glProgramUniform1f(fill_program, uniform_z, 1.0f - 2.0f / 1024.0f);
+ for (int offs = 1; offs < width; offs *= 2) {
+ glProgramUniform2f(fill_program, uniform_sample_offset, offs / float(width), 0.0f);
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+ glTextureBarrier();
+ }
+ glCopyImageSubData(flow_tex, GL_TEXTURE_2D, 0, 0, 0, 0, temp_tex[1], GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
+
+ // Up.
+ glProgramUniform1f(fill_program, uniform_z, 1.0f - 3.0f / 1024.0f);
+ for (int offs = 1; offs < height; offs *= 2) {
+ glProgramUniform2f(fill_program, uniform_sample_offset, 0.0f, -offs / float(height));
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+ glTextureBarrier();
+ }
+ glCopyImageSubData(flow_tex, GL_TEXTURE_2D, 0, 0, 0, 0, temp_tex[2], GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
+
+ // Down.
+ glProgramUniform1f(fill_program, uniform_z, 1.0f - 4.0f / 1024.0f);
+ for (int offs = 1; offs < height; offs *= 2) {
+ glProgramUniform2f(fill_program, uniform_sample_offset, 0.0f, offs / float(height));
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+ glTextureBarrier();
+ }
+
+ glDisable(GL_DEPTH_TEST);
+}
+
+HoleBlend::HoleBlend()
+{
+ blend_vs_obj = compile_shader(read_file("hole_fill.vert", _binary_hole_fill_vert_data, _binary_hole_fill_vert_size), GL_VERTEX_SHADER); // Reuse the vertex shader from the fill.
+ blend_fs_obj = compile_shader(read_file("hole_blend.frag", _binary_hole_blend_frag_data, _binary_hole_blend_frag_size), GL_FRAGMENT_SHADER);
+ blend_program = link_program(blend_vs_obj, blend_fs_obj);
+
+ uniform_left_tex = glGetUniformLocation(blend_program, "left_tex");
+ uniform_right_tex = glGetUniformLocation(blend_program, "right_tex");
+ uniform_up_tex = glGetUniformLocation(blend_program, "up_tex");
+ uniform_down_tex = glGetUniformLocation(blend_program, "down_tex");
+ uniform_z = glGetUniformLocation(blend_program, "z");
+ uniform_sample_offset = glGetUniformLocation(blend_program, "sample_offset");
+}
+
+void HoleBlend::exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int width, int height)
+{
+ glUseProgram(blend_program);
+
+ bind_sampler(blend_program, uniform_left_tex, 0, temp_tex[0], nearest_sampler);
+ bind_sampler(blend_program, uniform_right_tex, 1, temp_tex[1], nearest_sampler);
+ bind_sampler(blend_program, uniform_up_tex, 2, temp_tex[2], nearest_sampler);
+ bind_sampler(blend_program, uniform_down_tex, 3, flow_tex, nearest_sampler);
+
+ glProgramUniform1f(blend_program, uniform_z, 1.0f - 4.0f / 1024.0f);
+ glProgramUniform2f(blend_program, uniform_sample_offset, 0.0f, 0.0f);
+
+ glViewport(0, 0, width, height);
+ glDisable(GL_BLEND);
+ glEnable(GL_DEPTH_TEST);
+ glDepthFunc(GL_LEQUAL); // Skip over all of the pixels that were never holes to begin with.
+
+ fbos.render_to(depth_rb, flow_tex); // NOTE: Reading and writing to the same texture.
+
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+ glDisable(GL_DEPTH_TEST);
+}
+
+Blend::Blend(bool split_ycbcr_output)
+ : split_ycbcr_output(split_ycbcr_output)
+{
+ string frag_shader = read_file("blend.frag", _binary_blend_frag_data, _binary_blend_frag_size);
+ if (split_ycbcr_output) {
+ // Insert after the first #version line.
+ size_t offset = frag_shader.find('\n');
+ assert(offset != string::npos);
+ frag_shader = frag_shader.substr(0, offset + 1) + "#define SPLIT_YCBCR_OUTPUT 1\n" + frag_shader.substr(offset + 1);
+ }
+
+ blend_vs_obj = compile_shader(read_file("vs.vert", _binary_vs_vert_data, _binary_vs_vert_size), GL_VERTEX_SHADER);
+ blend_fs_obj = compile_shader(frag_shader, GL_FRAGMENT_SHADER);
+ blend_program = link_program(blend_vs_obj, blend_fs_obj);
+
+ uniform_image_tex = glGetUniformLocation(blend_program, "image_tex");
+ uniform_flow_tex = glGetUniformLocation(blend_program, "flow_tex");
+ uniform_alpha = glGetUniformLocation(blend_program, "alpha");
+ uniform_flow_consistency_tolerance = glGetUniformLocation(blend_program, "flow_consistency_tolerance");
+}
+
+void Blend::exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, GLuint output2_tex, int level_width, int level_height, float alpha)
+{
+ glUseProgram(blend_program);
+ bind_sampler(blend_program, uniform_image_tex, 0, image_tex, linear_sampler);
+ bind_sampler(blend_program, uniform_flow_tex, 1, flow_tex, linear_sampler); // May be upsampled.
+ glProgramUniform1f(blend_program, uniform_alpha, alpha);
+
+ glViewport(0, 0, level_width, level_height);
+ if (split_ycbcr_output) {
+ fbos_split.render_to(output_tex, output2_tex);
+ } else {
+ fbos.render_to(output_tex);
+ }
+ glDisable(GL_BLEND); // A bit ironic, perhaps.
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+}
+
+Interpolate::Interpolate(const OperatingPoint &op, bool split_ycbcr_output)
+ : flow_level(op.finest_level),
+ split_ycbcr_output(split_ycbcr_output),
+ splat(op),
+ blend(split_ycbcr_output) {
+ // Set up the vertex data that will be shared between all passes.
+ float vertices[] = {
+ 0.0f, 1.0f,
+ 0.0f, 0.0f,
+ 1.0f, 1.0f,
+ 1.0f, 0.0f,
+ };
+ glCreateBuffers(1, &vertex_vbo);
+ glNamedBufferData(vertex_vbo, sizeof(vertices), vertices, GL_STATIC_DRAW);
+
+ glCreateVertexArrays(1, &vao);
+ glBindVertexArray(vao);
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_vbo);
+
+ GLint position_attrib = 0; // Hard-coded in every vertex shader.
+ glEnableVertexArrayAttrib(vao, position_attrib);
+ glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+}
+
+pair<GLuint, GLuint> Interpolate::exec(GLuint image_tex, GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha)
+{
+ GPUTimers timers;
+
+ ScopedTimer total_timer("Interpolate", &timers);
+
+ glBindVertexArray(vao);
+ glDisable(GL_DITHER);
+
+ // Pick out the right level to test splatting results on.
+ GLuint tex_view;
+ glGenTextures(1, &tex_view);
+ glTextureView(tex_view, GL_TEXTURE_2D_ARRAY, gray_tex, GL_R8, flow_level, 1, 0, 2);
+
+ int flow_width = width >> flow_level;
+ int flow_height = height >> flow_level;
+
+ GLuint flow_tex = pool.get_texture(GL_RG16F, flow_width, flow_height);
+ GLuint depth_rb = pool.get_renderbuffer(GL_DEPTH_COMPONENT16, flow_width, flow_height); // Used for ranking flows.
+
+ {
+ ScopedTimer timer("Splat", &total_timer);
+ splat.exec(tex_view, bidirectional_flow_tex, flow_tex, depth_rb, flow_width, flow_height, alpha);
+ }
+ glDeleteTextures(1, &tex_view);
+
+ GLuint temp_tex[3];
+ temp_tex[0] = pool.get_texture(GL_RG16F, flow_width, flow_height);
+ temp_tex[1] = pool.get_texture(GL_RG16F, flow_width, flow_height);
+ temp_tex[2] = pool.get_texture(GL_RG16F, flow_width, flow_height);
+
+ {
+ ScopedTimer timer("Fill holes", &total_timer);
+ hole_fill.exec(flow_tex, depth_rb, temp_tex, flow_width, flow_height);
+ hole_blend.exec(flow_tex, depth_rb, temp_tex, flow_width, flow_height);
+ }
+
+ pool.release_texture(temp_tex[0]);
+ pool.release_texture(temp_tex[1]);
+ pool.release_texture(temp_tex[2]);
+ pool.release_renderbuffer(depth_rb);
+
+ GLuint output_tex, output2_tex = 0;
+ if (split_ycbcr_output) {
+ output_tex = pool.get_texture(GL_R8, width, height);
+ output2_tex = pool.get_texture(GL_RG8, width, height);
+ {
+ ScopedTimer timer("Blend", &total_timer);
+ blend.exec(image_tex, flow_tex, output_tex, output2_tex, width, height, alpha);
+ }
+ } else {
+ output_tex = pool.get_texture(GL_RGBA8, width, height);
+ {
+ ScopedTimer timer("Blend", &total_timer);
+ blend.exec(image_tex, flow_tex, output_tex, 0, width, height, alpha);
+ }
+ }
+ pool.release_texture(flow_tex);
+ total_timer.end();
+ if (!in_warmup) {
+ timers.print();
+ }
+
+ return make_pair(output_tex, output2_tex);
+}
+
+GLuint TexturePool::get_texture(GLenum format, GLuint width, GLuint height, GLuint num_layers)
+{
+ {
+ lock_guard<mutex> lock(mu);
+ for (Texture &tex : textures) {
+ if (!tex.in_use && !tex.is_renderbuffer && tex.format == format &&
+ tex.width == width && tex.height == height && tex.num_layers == num_layers) {
+ tex.in_use = true;
+ return tex.tex_num;
+ }
+ }
+ }
+
+ Texture tex;
+ if (num_layers == 0) {
+ glCreateTextures(GL_TEXTURE_2D, 1, &tex.tex_num);
+ glTextureStorage2D(tex.tex_num, 1, format, width, height);
+ } else {
+ glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &tex.tex_num);
+ glTextureStorage3D(tex.tex_num, 1, format, width, height, num_layers);
+ }
+ tex.format = format;
+ tex.width = width;
+ tex.height = height;
+ tex.num_layers = num_layers;
+ tex.in_use = true;
+ tex.is_renderbuffer = false;
+ {
+ lock_guard<mutex> lock(mu);
+ textures.push_back(tex);
+ }
+ return tex.tex_num;
+}
+
+GLuint TexturePool::get_renderbuffer(GLenum format, GLuint width, GLuint height)
+{
+ {
+ lock_guard<mutex> lock(mu);
+ for (Texture &tex : textures) {
+ if (!tex.in_use && tex.is_renderbuffer && tex.format == format &&
+ tex.width == width && tex.height == height) {
+ tex.in_use = true;
+ return tex.tex_num;
+ }
+ }
+ }
+
+ Texture tex;
+ glCreateRenderbuffers(1, &tex.tex_num);
+ glNamedRenderbufferStorage(tex.tex_num, format, width, height);
+
+ tex.format = format;
+ tex.width = width;
+ tex.height = height;
+ tex.in_use = true;
+ tex.is_renderbuffer = true;
+ {
+ lock_guard<mutex> lock(mu);
+ textures.push_back(tex);
+ }
+ return tex.tex_num;
+}
+
+void TexturePool::release_texture(GLuint tex_num)
+{
+ lock_guard<mutex> lock(mu);
+ for (Texture &tex : textures) {
+ if (!tex.is_renderbuffer && tex.tex_num == tex_num) {
+ assert(tex.in_use);
+ tex.in_use = false;
+ return;
+ }
+ }
+ assert(false);
+}
+
+void TexturePool::release_renderbuffer(GLuint tex_num)
+{
+ lock_guard<mutex> lock(mu);
+ for (Texture &tex : textures) {
+ if (tex.is_renderbuffer && tex.tex_num == tex_num) {
+ assert(tex.in_use);
+ tex.in_use = false;
+ return;
+ }
+ }
+ //assert(false);
+}
--- /dev/null
+#ifndef _FLOW_H
+#define _FLOW_H 1
+
+// Code for computing optical flow between two images, and using it to interpolate
+// in-between frames. The main user interface is the DISComputeFlow and Interpolate
+// classes (also GrayscaleConversion can be useful).
+
+#include <array>
+#include <epoxy/gl.h>
+#include <map>
+#include <mutex>
+#include <stdint.h>
+#include <utility>
+#include <vector>
+
+class ScopedTimer;
+
+// Predefined operating points from the paper.
+struct OperatingPoint {
+ unsigned coarsest_level; // TODO: Adjust dynamically based on the resolution?
+ unsigned finest_level;
+ unsigned search_iterations; // Halved from the paper.
+ unsigned patch_size_pixels;
+ float patch_overlap_ratio;
+ bool variational_refinement;
+
+ // Not part of the original paper; used for interpolation.
+ // NOTE: Values much larger than 1.0 seems to trigger Haswell's “PMA stall”;
+ // the problem is not present on Broadwell and higher (there's a mitigation
+ // in the hardware, but Mesa doesn't enable it at the time of writing).
+ // Since we have hole filling, the holes from 1.0 are not critical,
+ // but larger values seem to do better than hole filling for large
+ // motion, blurs etc. since we have more candidates.
+ float splat_size;
+};
+
+// Operating point 1 (600 Hz on CPU, excluding preprocessing).
+static constexpr OperatingPoint operating_point1 = {
+ 5, // Coarsest level.
+ 3, // Finest level.
+ 8, // Search iterations.
+ 8, // Patch size (pixels).
+ 0.30f, // Overlap ratio.
+ false, // Variational refinement.
+ 1.0f // Splat size (pixels).
+};
+
+// Operating point 2 (300 Hz on CPU, excluding preprocessing).
+static constexpr OperatingPoint operating_point2 = {
+ 5, // Coarsest level.
+ 3, // Finest level.
+ 6, // Search iterations.
+ 8, // Patch size (pixels).
+ 0.40f, // Overlap ratio.
+ true, // Variational refinement.
+ 1.0f // Splat size (pixels).
+};
+
+// Operating point 3 (10 Hz on CPU, excluding preprocessing).
+// This is the only one that has been thorougly tested.
+static constexpr OperatingPoint operating_point3 = {
+ 5, // Coarsest level.
+ 1, // Finest level.
+ 8, // Search iterations.
+ 12, // Patch size (pixels).
+ 0.75f, // Overlap ratio.
+ true, // Variational refinement.
+ 4.0f // Splat size (pixels).
+};
+
+// Operating point 4 (0.5 Hz on CPU, excluding preprocessing).
+static constexpr OperatingPoint operating_point4 = {
+ 5, // Coarsest level.
+ 0, // Finest level.
+ 128, // Search iterations.
+ 12, // Patch size (pixels).
+ 0.75f, // Overlap ratio.
+ true, // Variational refinement.
+ 8.0f // Splat size (pixels).
+};
+
+int find_num_levels(int width, int height);
+
+// A class that caches FBOs that render to a given set of textures.
+// It never frees anything, so it is only suitable for rendering to
+// the same (small) set of textures over and over again.
+template<size_t num_elements>
+class PersistentFBOSet {
+public:
+ void render_to(const std::array<GLuint, num_elements> &textures);
+
+ // Convenience wrappers.
+ void render_to(GLuint texture0) {
+ render_to({{texture0}});
+ }
+
+ void render_to(GLuint texture0, GLuint texture1) {
+ render_to({{texture0, texture1}});
+ }
+
+ void render_to(GLuint texture0, GLuint texture1, GLuint texture2) {
+ render_to({{texture0, texture1, texture2}});
+ }
+
+ void render_to(GLuint texture0, GLuint texture1, GLuint texture2, GLuint texture3) {
+ render_to({{texture0, texture1, texture2, texture3}});
+ }
+
+private:
+ // TODO: Delete these on destruction.
+ std::map<std::array<GLuint, num_elements>, GLuint> fbos;
+};
+
+// Same, but with a depth texture.
+template<size_t num_elements>
+class PersistentFBOSetWithDepth {
+public:
+ void render_to(GLuint depth_rb, const std::array<GLuint, num_elements> &textures);
+
+ // Convenience wrappers.
+ void render_to(GLuint depth_rb, GLuint texture0) {
+ render_to(depth_rb, {{texture0}});
+ }
+
+ void render_to(GLuint depth_rb, GLuint texture0, GLuint texture1) {
+ render_to(depth_rb, {{texture0, texture1}});
+ }
+
+ void render_to(GLuint depth_rb, GLuint texture0, GLuint texture1, GLuint texture2) {
+ render_to(depth_rb, {{texture0, texture1, texture2}});
+ }
+
+ void render_to(GLuint depth_rb, GLuint texture0, GLuint texture1, GLuint texture2, GLuint texture3) {
+ render_to(depth_rb, {{texture0, texture1, texture2, texture3}});
+ }
+
+private:
+ // TODO: Delete these on destruction.
+ std::map<std::pair<GLuint, std::array<GLuint, num_elements>>, GLuint> fbos;
+};
+
+// Convert RGB to grayscale, using Rec. 709 coefficients.
+class GrayscaleConversion {
+public:
+ GrayscaleConversion();
+ void exec(GLint tex, GLint gray_tex, int width, int height, int num_layers);
+
+private:
+ PersistentFBOSet<1> fbos;
+ GLuint gray_vs_obj;
+ GLuint gray_fs_obj;
+ GLuint gray_program;
+ GLuint gray_vao;
+
+ GLuint uniform_tex;
+};
+
+// Compute gradients in every point, used for the motion search.
+// The DIS paper doesn't actually mention how these are computed,
+// but seemingly, a 3x3 Sobel operator is used here (at least in
+// later versions of the code), while a [1 -8 0 8 -1] kernel is
+// used for all the derivatives in the variational refinement part
+// (which borrows code from DeepFlow). This is inconsistent,
+// but I guess we're better off with staying with the original
+// decisions until we actually know having different ones would be better.
+class Sobel {
+public:
+ Sobel();
+ void exec(GLint tex_view, GLint grad_tex, int level_width, int level_height, int num_layers);
+
+private:
+ PersistentFBOSet<1> fbos;
+ GLuint sobel_vs_obj;
+ GLuint sobel_fs_obj;
+ GLuint sobel_program;
+
+ GLuint uniform_tex;
+};
+
+// Motion search to find the initial flow. See motion_search.frag for documentation.
+class MotionSearch {
+public:
+ MotionSearch(const OperatingPoint &op);
+ void exec(GLuint tex_view, GLuint grad_tex, GLuint flow_tex, GLuint flow_out_tex, int level_width, int level_height, int prev_level_width, int prev_level_height, int width_patches, int height_patches, int num_layers);
+
+private:
+ const OperatingPoint op;
+ PersistentFBOSet<1> fbos;
+
+ GLuint motion_vs_obj;
+ GLuint motion_fs_obj;
+ GLuint motion_search_program;
+
+ GLuint uniform_inv_image_size, uniform_inv_prev_level_size, uniform_out_flow_size;
+ GLuint uniform_image_tex, uniform_grad_tex, uniform_flow_tex;
+ GLuint uniform_patch_size, uniform_num_iterations;
+};
+
+// Do “densification”, ie., upsampling of the flow patches to the flow field
+// (the same size as the image at this level). We draw one quad per patch
+// over its entire covered area (using instancing in the vertex shader),
+// and then weight the contributions in the pixel shader by post-warp difference.
+// This is equation (3) in the paper.
+//
+// We accumulate the flow vectors in the R/G channels (for u/v) and the total
+// weight in the B channel. Dividing R and G by B gives the normalized values.
+class Densify {
+public:
+ Densify(const OperatingPoint &op);
+ void exec(GLuint tex_view, GLuint flow_tex, GLuint dense_flow_tex, int level_width, int level_height, int width_patches, int height_patches, int num_layers);
+
+private:
+ OperatingPoint op;
+ PersistentFBOSet<1> fbos;
+
+ GLuint densify_vs_obj;
+ GLuint densify_fs_obj;
+ GLuint densify_program;
+
+ GLuint uniform_patch_size;
+ GLuint uniform_image_tex, uniform_flow_tex;
+};
+
+// Warp I_1 to I_w, and then compute the mean (I) and difference (I_t) of
+// I_0 and I_w. The prewarping is what enables us to solve the variational
+// flow for du,dv instead of u,v.
+//
+// Also calculates the normalized flow, ie. divides by z (this is needed because
+// Densify works by additive blending) and multiplies by the image size.
+//
+// See variational_refinement.txt for more information.
+class Prewarp {
+public:
+ Prewarp();
+ void exec(GLuint tex_view, GLuint flow_tex, GLuint normalized_flow_tex, GLuint I_tex, GLuint I_t_tex, int level_width, int level_height, int num_layers);
+
+private:
+ PersistentFBOSet<3> fbos;
+
+ GLuint prewarp_vs_obj;
+ GLuint prewarp_fs_obj;
+ GLuint prewarp_program;
+
+ GLuint uniform_image_tex, uniform_flow_tex;
+};
+
+// From I, calculate the partial derivatives I_x and I_y. We use a four-tap
+// central difference filter, since apparently, that's tradition (I haven't
+// measured quality versus a more normal 0.5 (I[x+1] - I[x-1]).)
+// The coefficients come from
+//
+// https://en.wikipedia.org/wiki/Finite_difference_coefficient
+//
+// Also computes β_0, since it depends only on I_x and I_y.
+class Derivatives {
+public:
+ Derivatives();
+ void exec(GLuint input_tex, GLuint I_x_y_tex, GLuint beta_0_tex, int level_width, int level_height, int num_layers);
+
+private:
+ PersistentFBOSet<2> fbos;
+
+ GLuint derivatives_vs_obj;
+ GLuint derivatives_fs_obj;
+ GLuint derivatives_program;
+
+ GLuint uniform_tex;
+};
+
+// Calculate the diffusivity for each pixels, g(x,y). Smoothness (s) will
+// be calculated in the shaders on-the-fly by sampling in-between two
+// neighboring g(x,y) pixels, plus a border tweak to make sure we get
+// zero smoothness at the border.
+//
+// See variational_refinement.txt for more information.
+class ComputeDiffusivity {
+public:
+ ComputeDiffusivity();
+ void exec(GLuint flow_tex, GLuint diff_flow_tex, GLuint diffusivity_tex, int level_width, int level_height, bool zero_diff_flow, int num_layers);
+
+private:
+ PersistentFBOSet<1> fbos;
+
+ GLuint diffusivity_vs_obj;
+ GLuint diffusivity_fs_obj;
+ GLuint diffusivity_program;
+
+ GLuint uniform_flow_tex, uniform_diff_flow_tex;
+ GLuint uniform_alpha, uniform_zero_diff_flow;
+};
+
+// Set up the equations set (two equations in two unknowns, per pixel).
+// We store five floats; the three non-redundant elements of the 2x2 matrix (A)
+// as 32-bit floats, and the two elements on the right-hand side (b) as 16-bit
+// floats. (Actually, we store the inverse of the diagonal elements, because
+// we only ever need to divide by them.) This fits into four u32 values;
+// R, G, B for the matrix (the last element is symmetric) and A for the two b values.
+// All the values of the energy term (E_I, E_G, E_S), except the smoothness
+// terms that depend on other pixels, are calculated in one pass.
+//
+// The equation set is split in two; one contains only the pixels needed for
+// the red pass, and one only for the black pass (see sor.frag). This reduces
+// the amount of data the SOR shader has to pull in, at the cost of some
+// complexity when the equation texture ends up with half the size and we need
+// to adjust texture coordinates. The contraction is done along the horizontal
+// axis, so that on even rows (0, 2, 4, ...), the “red” texture will contain
+// pixels 0, 2, 4, 6, etc., and on odd rows 1, 3, 5, etc..
+//
+// See variational_refinement.txt for more information about the actual
+// equations in use.
+class SetupEquations {
+public:
+ SetupEquations();
+ void exec(GLuint I_x_y_tex, GLuint I_t_tex, GLuint diff_flow_tex, GLuint flow_tex, GLuint beta_0_tex, GLuint diffusivity_tex, GLuint equation_red_tex, GLuint equation_black_tex, int level_width, int level_height, bool zero_diff_flow, int num_layers);
+
+private:
+ PersistentFBOSet<2> fbos;
+
+ GLuint equations_vs_obj;
+ GLuint equations_fs_obj;
+ GLuint equations_program;
+
+ GLuint uniform_I_x_y_tex, uniform_I_t_tex;
+ GLuint uniform_diff_flow_tex, uniform_base_flow_tex;
+ GLuint uniform_beta_0_tex;
+ GLuint uniform_diffusivity_tex;
+ GLuint uniform_gamma, uniform_delta, uniform_zero_diff_flow;
+};
+
+// Actually solve the equation sets made by SetupEquations, by means of
+// successive over-relaxation (SOR).
+//
+// See variational_refinement.txt for more information.
+class SOR {
+public:
+ SOR();
+ void exec(GLuint diff_flow_tex, GLuint equation_red_tex, GLuint equation_black_tex, GLuint diffusivity_tex, int level_width, int level_height, int num_iterations, bool zero_diff_flow, int num_layers, ScopedTimer *sor_timer);
+
+private:
+ PersistentFBOSet<1> fbos;
+
+ GLuint sor_vs_obj;
+ GLuint sor_fs_obj;
+ GLuint sor_program;
+
+ GLuint uniform_diff_flow_tex;
+ GLuint uniform_equation_red_tex, uniform_equation_black_tex;
+ GLuint uniform_diffusivity_tex;
+ GLuint uniform_phase, uniform_num_nonzero_phases;
+};
+
+// Simply add the differential flow found by the variational refinement to the base flow.
+// The output is in base_flow_tex; we don't need to make a new texture.
+class AddBaseFlow {
+public:
+ AddBaseFlow();
+ void exec(GLuint base_flow_tex, GLuint diff_flow_tex, int level_width, int level_height, int num_layers);
+
+private:
+ PersistentFBOSet<1> fbos;
+
+ GLuint add_flow_vs_obj;
+ GLuint add_flow_fs_obj;
+ GLuint add_flow_program;
+
+ GLuint uniform_diff_flow_tex;
+};
+
+// Take a copy of the flow, bilinearly interpolated and scaled up.
+class ResizeFlow {
+public:
+ ResizeFlow();
+ void exec(GLuint in_tex, GLuint out_tex, int input_width, int input_height, int output_width, int output_height, int num_layers);
+
+private:
+ PersistentFBOSet<1> fbos;
+
+ GLuint resize_flow_vs_obj;
+ GLuint resize_flow_fs_obj;
+ GLuint resize_flow_program;
+
+ GLuint uniform_flow_tex;
+ GLuint uniform_scale_factor;
+};
+
+// All operations, except construction and destruction, are thread-safe.
+class TexturePool {
+public:
+ GLuint get_texture(GLenum format, GLuint width, GLuint height, GLuint num_layers = 0);
+ void release_texture(GLuint tex_num);
+ GLuint get_renderbuffer(GLenum format, GLuint width, GLuint height);
+ void release_renderbuffer(GLuint tex_num);
+
+private:
+ struct Texture {
+ GLuint tex_num;
+ GLenum format;
+ GLuint width, height, num_layers;
+ bool in_use = false;
+ bool is_renderbuffer = false;
+ };
+ std::mutex mu;
+ std::vector<Texture> textures; // Under mu.
+};
+
+class DISComputeFlow {
+public:
+ DISComputeFlow(int width, int height, const OperatingPoint &op);
+
+ enum FlowDirection {
+ FORWARD,
+ FORWARD_AND_BACKWARD
+ };
+ enum ResizeStrategy {
+ DO_NOT_RESIZE_FLOW,
+ RESIZE_FLOW_TO_FULL_SIZE
+ };
+
+ // The texture must have two layers (first and second frame).
+ // Returns a texture that must be released with release_texture()
+ // after use.
+ GLuint exec(GLuint tex, FlowDirection flow_direction, ResizeStrategy resize_strategy);
+
+ void release_texture(GLuint tex)
+ {
+ pool.release_texture(tex);
+ }
+
+private:
+ int width, height;
+ GLuint initial_flow_tex;
+ GLuint vertex_vbo, vao;
+ TexturePool pool;
+ const OperatingPoint op;
+
+ // The various passes.
+ Sobel sobel;
+ MotionSearch motion_search;
+ Densify densify;
+ Prewarp prewarp;
+ Derivatives derivatives;
+ ComputeDiffusivity compute_diffusivity;
+ SetupEquations setup_equations;
+ SOR sor;
+ AddBaseFlow add_base_flow;
+ ResizeFlow resize_flow;
+};
+
+// Forward-warp the flow half-way (or rather, by alpha). A non-zero “splatting”
+// radius fills most of the holes.
+class Splat {
+public:
+ Splat(const OperatingPoint &op);
+
+ // alpha is the time of the interpolated frame (0..1).
+ void exec(GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha);
+
+private:
+ const OperatingPoint op;
+ PersistentFBOSetWithDepth<1> fbos;
+
+ GLuint splat_vs_obj;
+ GLuint splat_fs_obj;
+ GLuint splat_program;
+
+ GLuint uniform_splat_size, uniform_alpha;
+ GLuint uniform_gray_tex, uniform_flow_tex;
+ GLuint uniform_inv_flow_size;
+};
+
+// Doing good and fast hole-filling on a GPU is nontrivial. We choose an option
+// that's fairly simple (given that most holes are really small) and also hopefully
+// cheap should the holes not be so small. Conceptually, we look for the first
+// non-hole to the left of us (ie., shoot a ray until we hit something), then
+// the first non-hole to the right of us, then up and down, and then average them
+// all together. It's going to create “stars” if the holes are big, but OK, that's
+// a tradeoff.
+//
+// Our implementation here is efficient assuming that the hierarchical Z-buffer is
+// on even for shaders that do discard (this typically kills early Z, but hopefully
+// not hierarchical Z); we set up Z so that only holes are written to, which means
+// that as soon as a hole is filled, the rasterizer should just skip it. Most of the
+// fullscreen quads should just be discarded outright, really.
+class HoleFill {
+public:
+ HoleFill();
+
+ // Output will be in flow_tex, temp_tex[0, 1, 2], representing the filling
+ // from the down, left, right and up, respectively. Use HoleBlend to merge
+ // them into one.
+ void exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int width, int height);
+
+private:
+ PersistentFBOSetWithDepth<1> fbos;
+
+ GLuint fill_vs_obj;
+ GLuint fill_fs_obj;
+ GLuint fill_program;
+
+ GLuint uniform_tex;
+ GLuint uniform_z, uniform_sample_offset;
+};
+
+// Blend the four directions from HoleFill into one pixel, so that single-pixel
+// holes become the average of their four neighbors.
+class HoleBlend {
+public:
+ HoleBlend();
+
+ void exec(GLuint flow_tex, GLuint depth_rb, GLuint temp_tex[3], int width, int height);
+
+private:
+ PersistentFBOSetWithDepth<1> fbos;
+
+ GLuint blend_vs_obj;
+ GLuint blend_fs_obj;
+ GLuint blend_program;
+
+ GLuint uniform_left_tex, uniform_right_tex, uniform_up_tex, uniform_down_tex;
+ GLuint uniform_z, uniform_sample_offset;
+};
+
+class Blend {
+public:
+ Blend(bool split_ycbcr_output);
+
+ // output2_tex is only used if split_ycbcr_output was true.
+ void exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, GLuint output2_tex, int width, int height, float alpha);
+
+private:
+ bool split_ycbcr_output;
+ PersistentFBOSet<1> fbos;
+ PersistentFBOSet<2> fbos_split;
+ GLuint blend_vs_obj;
+ GLuint blend_fs_obj;
+ GLuint blend_program;
+
+ GLuint uniform_image_tex, uniform_flow_tex;
+ GLuint uniform_alpha, uniform_flow_consistency_tolerance;
+};
+
+class Interpolate {
+public:
+ Interpolate(const OperatingPoint &op, bool split_ycbcr_output);
+
+ // Returns a texture (or two, if split_ycbcr_output is true) that must
+ // be released with release_texture() after use. image_tex must be a
+ // two-layer RGBA8 texture with mipmaps (unless flow_level == 0).
+ std::pair<GLuint, GLuint> exec(GLuint image_tex, GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha);
+
+ void release_texture(GLuint tex)
+ {
+ pool.release_texture(tex);
+ }
+
+private:
+ int flow_level;
+ GLuint vertex_vbo, vao;
+ TexturePool pool;
+ const bool split_ycbcr_output;
+
+ Splat splat;
+ HoleFill hole_fill;
+ HoleBlend hole_blend;
+ Blend blend;
+};
+
+#endif // !defined(_FLOW_H)
--- /dev/null
+#define NO_SDL_GLEXT 1
+
+#include "flow.h"
+#include "gpu_timers.h"
+#include "util.h"
+
+#include <SDL2/SDL.h>
+#include <SDL2/SDL_error.h>
+#include <SDL2/SDL_events.h>
+#include <SDL2/SDL_image.h>
+#include <SDL2/SDL_keyboard.h>
+#include <SDL2/SDL_mouse.h>
+#include <SDL2/SDL_video.h>
+#include <algorithm>
+#include <assert.h>
+#include <deque>
+#include <epoxy/gl.h>
+#include <getopt.h>
+#include <map>
+#include <memory>
+#include <stack>
+#include <stdio.h>
+#include <unistd.h>
+#include <vector>
+
+#define BUFFER_OFFSET(i) ((char *)nullptr + (i))
+
+using namespace std;
+
+SDL_Window *window;
+
+bool enable_warmup = false;
+bool enable_variational_refinement = true; // Just for debugging.
+bool enable_interpolation = false;
+
+extern float vr_alpha, vr_delta, vr_gamma;
+
+// Structures for asynchronous readback. We assume everything is the same size (and GL_RG16F).
+struct ReadInProgress {
+ GLuint pbo;
+ string filename0, filename1;
+ string flow_filename, ppm_filename; // Either may be empty for no write.
+};
+stack<GLuint> spare_pbos;
+deque<ReadInProgress> reads_in_progress;
+
+enum MipmapPolicy {
+ WITHOUT_MIPMAPS,
+ WITH_MIPMAPS
+};
+
+GLuint load_texture(const char *filename, unsigned *width_ret, unsigned *height_ret, MipmapPolicy mipmaps)
+{
+ SDL_Surface *surf = IMG_Load(filename);
+ if (surf == nullptr) {
+ fprintf(stderr, "IMG_Load(%s): %s\n", filename, IMG_GetError());
+ exit(1);
+ }
+
+ // For whatever reason, SDL doesn't support converting to YUV surfaces
+ // nor grayscale, so we'll do it ourselves.
+ SDL_Surface *rgb_surf = SDL_ConvertSurfaceFormat(surf, SDL_PIXELFORMAT_RGBA32, /*flags=*/0);
+ if (rgb_surf == nullptr) {
+ fprintf(stderr, "SDL_ConvertSurfaceFormat(%s): %s\n", filename, SDL_GetError());
+ exit(1);
+ }
+
+ SDL_FreeSurface(surf);
+
+ unsigned width = rgb_surf->w, height = rgb_surf->h;
+ const uint8_t *sptr = (uint8_t *)rgb_surf->pixels;
+ unique_ptr<uint8_t[]> pix(new uint8_t[width * height * 4]);
+
+ // Extract the Y component, and convert to bottom-left origin.
+ for (unsigned y = 0; y < height; ++y) {
+ unsigned y2 = height - 1 - y;
+ memcpy(pix.get() + y * width * 4, sptr + y2 * rgb_surf->pitch, width * 4);
+ }
+ SDL_FreeSurface(rgb_surf);
+
+ int num_levels = (mipmaps == WITH_MIPMAPS) ? find_num_levels(width, height) : 1;
+
+ GLuint tex;
+ glCreateTextures(GL_TEXTURE_2D, 1, &tex);
+ glTextureStorage2D(tex, num_levels, GL_RGBA8, width, height);
+ glTextureSubImage2D(tex, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, pix.get());
+
+ if (mipmaps == WITH_MIPMAPS) {
+ glGenerateTextureMipmap(tex);
+ }
+
+ *width_ret = width;
+ *height_ret = height;
+
+ return tex;
+}
+
+// OpenGL uses a bottom-left coordinate system, .flo files use a top-left coordinate system.
+void flip_coordinate_system(float *dense_flow, unsigned width, unsigned height)
+{
+ for (unsigned i = 0; i < width * height; ++i) {
+ dense_flow[i * 2 + 1] = -dense_flow[i * 2 + 1];
+ }
+}
+
+// Not relevant for RGB.
+void flip_coordinate_system(uint8_t *dense_flow, unsigned width, unsigned height)
+{
+}
+
+void write_flow(const char *filename, const float *dense_flow, unsigned width, unsigned height)
+{
+ FILE *flowfp = fopen(filename, "wb");
+ fprintf(flowfp, "FEIH");
+ fwrite(&width, 4, 1, flowfp);
+ fwrite(&height, 4, 1, flowfp);
+ for (unsigned y = 0; y < height; ++y) {
+ int yy = height - y - 1;
+ fwrite(&dense_flow[yy * width * 2], width * 2 * sizeof(float), 1, flowfp);
+ }
+ fclose(flowfp);
+}
+
+// Not relevant for RGB.
+void write_flow(const char *filename, const uint8_t *dense_flow, unsigned width, unsigned height)
+{
+ assert(false);
+}
+
+void write_ppm(const char *filename, const float *dense_flow, unsigned width, unsigned height)
+{
+ FILE *fp = fopen(filename, "wb");
+ fprintf(fp, "P6\n%d %d\n255\n", width, height);
+ for (unsigned y = 0; y < unsigned(height); ++y) {
+ int yy = height - y - 1;
+ for (unsigned x = 0; x < unsigned(width); ++x) {
+ float du = dense_flow[(yy * width + x) * 2 + 0];
+ float dv = dense_flow[(yy * width + x) * 2 + 1];
+
+ uint8_t r, g, b;
+ flow2rgb(du, dv, &r, &g, &b);
+ putc(r, fp);
+ putc(g, fp);
+ putc(b, fp);
+ }
+ }
+ fclose(fp);
+}
+
+void write_ppm(const char *filename, const uint8_t *rgba, unsigned width, unsigned height)
+{
+ unique_ptr<uint8_t[]> rgb_line(new uint8_t[width * 3 + 1]);
+
+ FILE *fp = fopen(filename, "wb");
+ fprintf(fp, "P6\n%d %d\n255\n", width, height);
+ for (unsigned y = 0; y < height; ++y) {
+ unsigned y2 = height - 1 - y;
+ for (size_t x = 0; x < width; ++x) {
+ memcpy(&rgb_line[x * 3], &rgba[(y2 * width + x) * 4], 4);
+ }
+ fwrite(rgb_line.get(), width * 3, 1, fp);
+ }
+ fclose(fp);
+}
+
+struct FlowType {
+ using type = float;
+ static constexpr GLenum gl_format = GL_RG;
+ static constexpr GLenum gl_type = GL_FLOAT;
+ static constexpr int num_channels = 2;
+};
+
+struct RGBAType {
+ using type = uint8_t;
+ static constexpr GLenum gl_format = GL_RGBA;
+ static constexpr GLenum gl_type = GL_UNSIGNED_BYTE;
+ static constexpr int num_channels = 4;
+};
+
+template <class Type>
+void finish_one_read(GLuint width, GLuint height)
+{
+ using T = typename Type::type;
+ constexpr int bytes_per_pixel = Type::num_channels * sizeof(T);
+
+ assert(!reads_in_progress.empty());
+ ReadInProgress read = reads_in_progress.front();
+ reads_in_progress.pop_front();
+
+ unique_ptr<T[]> flow(new typename Type::type[width * height * Type::num_channels]);
+ void *buf = glMapNamedBufferRange(read.pbo, 0, width * height * bytes_per_pixel, GL_MAP_READ_BIT); // Blocks if the read isn't done yet.
+ memcpy(flow.get(), buf, width * height * bytes_per_pixel); // TODO: Unneeded for RGBType, since flip_coordinate_system() does nothing.:
+ glUnmapNamedBuffer(read.pbo);
+ spare_pbos.push(read.pbo);
+
+ flip_coordinate_system(flow.get(), width, height);
+ if (!read.flow_filename.empty()) {
+ write_flow(read.flow_filename.c_str(), flow.get(), width, height);
+ fprintf(stderr, "%s %s -> %s\n", read.filename0.c_str(), read.filename1.c_str(), read.flow_filename.c_str());
+ }
+ if (!read.ppm_filename.empty()) {
+ write_ppm(read.ppm_filename.c_str(), flow.get(), width, height);
+ }
+}
+
+template <class Type>
+void schedule_read(GLuint tex, GLuint width, GLuint height, const char *filename0, const char *filename1, const char *flow_filename, const char *ppm_filename)
+{
+ using T = typename Type::type;
+ constexpr int bytes_per_pixel = Type::num_channels * sizeof(T);
+
+ if (spare_pbos.empty()) {
+ finish_one_read<Type>(width, height);
+ }
+ assert(!spare_pbos.empty());
+ reads_in_progress.emplace_back(ReadInProgress{ spare_pbos.top(), filename0, filename1, flow_filename, ppm_filename });
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, spare_pbos.top());
+ spare_pbos.pop();
+ glGetTextureImage(tex, 0, Type::gl_format, Type::gl_type, width * height * bytes_per_pixel, nullptr);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+}
+
+void compute_flow_only(int argc, char **argv, int optind)
+{
+ const char *filename0 = argc >= (optind + 1) ? argv[optind] : "test1499.png";
+ const char *filename1 = argc >= (optind + 2) ? argv[optind + 1] : "test1500.png";
+ const char *flow_filename = argc >= (optind + 3) ? argv[optind + 2] : "flow.flo";
+
+ // Load pictures.
+ unsigned width1, height1, width2, height2;
+ GLuint tex0 = load_texture(filename0, &width1, &height1, WITHOUT_MIPMAPS);
+ GLuint tex1 = load_texture(filename1, &width2, &height2, WITHOUT_MIPMAPS);
+
+ if (width1 != width2 || height1 != height2) {
+ fprintf(stderr, "Image dimensions don't match (%dx%d versus %dx%d)\n",
+ width1, height1, width2, height2);
+ exit(1);
+ }
+
+ // Move them into an array texture, since that's how the rest of the code
+ // would like them.
+ GLuint image_tex;
+ glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &image_tex);
+ glTextureStorage3D(image_tex, 1, GL_RGBA8, width1, height1, 2);
+ glCopyImageSubData(tex0, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 0, width1, height1, 1);
+ glCopyImageSubData(tex1, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 1, width1, height1, 1);
+ glDeleteTextures(1, &tex0);
+ glDeleteTextures(1, &tex1);
+
+ // Set up some PBOs to do asynchronous readback.
+ GLuint pbos[5];
+ glCreateBuffers(5, pbos);
+ for (int i = 0; i < 5; ++i) {
+ glNamedBufferData(pbos[i], width1 * height1 * 2 * 2 * sizeof(float), nullptr, GL_STREAM_READ);
+ spare_pbos.push(pbos[i]);
+ }
+
+ int levels = find_num_levels(width1, height1);
+
+ GLuint tex_gray;
+ glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &tex_gray);
+ glTextureStorage3D(tex_gray, levels, GL_R8, width1, height1, 2);
+
+ OperatingPoint op = operating_point3;
+ if (!enable_variational_refinement) {
+ op.variational_refinement = false;
+ }
+
+ DISComputeFlow compute_flow(width1, height1, op); // Must be initialized before gray.
+ GrayscaleConversion gray;
+ gray.exec(image_tex, tex_gray, width1, height1, /*num_layers=*/2);
+ glGenerateTextureMipmap(tex_gray);
+
+ if (enable_warmup) {
+ in_warmup = true;
+ for (int i = 0; i < 10; ++i) {
+ GLuint final_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE);
+ compute_flow.release_texture(final_tex);
+ }
+ in_warmup = false;
+ }
+
+ GLuint final_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE);
+ //GLuint final_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE);
+
+ schedule_read<FlowType>(final_tex, width1, height1, filename0, filename1, flow_filename, "flow.ppm");
+ compute_flow.release_texture(final_tex);
+
+ // See if there are more flows on the command line (ie., more than three arguments),
+ // and if so, process them.
+ int num_flows = (argc - optind) / 3;
+ for (int i = 1; i < num_flows; ++i) {
+ const char *filename0 = argv[optind + i * 3 + 0];
+ const char *filename1 = argv[optind + i * 3 + 1];
+ const char *flow_filename = argv[optind + i * 3 + 2];
+ GLuint width, height;
+ GLuint tex0 = load_texture(filename0, &width, &height, WITHOUT_MIPMAPS);
+ if (width != width1 || height != height1) {
+ fprintf(stderr, "%s: Image dimensions don't match (%dx%d versus %dx%d)\n",
+ filename0, width, height, width1, height1);
+ exit(1);
+ }
+ glCopyImageSubData(tex0, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 0, width1, height1, 1);
+ glDeleteTextures(1, &tex0);
+
+ GLuint tex1 = load_texture(filename1, &width, &height, WITHOUT_MIPMAPS);
+ if (width != width1 || height != height1) {
+ fprintf(stderr, "%s: Image dimensions don't match (%dx%d versus %dx%d)\n",
+ filename1, width, height, width1, height1);
+ exit(1);
+ }
+ glCopyImageSubData(tex1, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 1, width1, height1, 1);
+ glDeleteTextures(1, &tex1);
+
+ gray.exec(image_tex, tex_gray, width1, height1, /*num_layers=*/2);
+ glGenerateTextureMipmap(tex_gray);
+
+ GLuint final_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE);
+
+ schedule_read<FlowType>(final_tex, width1, height1, filename0, filename1, flow_filename, "");
+ compute_flow.release_texture(final_tex);
+ }
+ glDeleteTextures(1, &tex_gray);
+
+ while (!reads_in_progress.empty()) {
+ finish_one_read<FlowType>(width1, height1);
+ }
+}
+
+// Interpolate images based on
+//
+// Herbst, Seitz, Baker: “Occlusion Reasoning for Temporal Interpolation
+// Using Optical Flow”
+//
+// or at least a reasonable subset thereof. Unfinished.
+void interpolate_image(int argc, char **argv, int optind)
+{
+ const char *filename0 = argc >= (optind + 1) ? argv[optind] : "test1499.png";
+ const char *filename1 = argc >= (optind + 2) ? argv[optind + 1] : "test1500.png";
+ //const char *out_filename = argc >= (optind + 3) ? argv[optind + 2] : "interpolated.png";
+
+ // Load pictures.
+ unsigned width1, height1, width2, height2;
+ GLuint tex0 = load_texture(filename0, &width1, &height1, WITH_MIPMAPS);
+ GLuint tex1 = load_texture(filename1, &width2, &height2, WITH_MIPMAPS);
+
+ if (width1 != width2 || height1 != height2) {
+ fprintf(stderr, "Image dimensions don't match (%dx%d versus %dx%d)\n",
+ width1, height1, width2, height2);
+ exit(1);
+ }
+
+ // Move them into an array texture, since that's how the rest of the code
+ // would like them.
+ int levels = find_num_levels(width1, height1);
+ GLuint image_tex;
+ glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &image_tex);
+ glTextureStorage3D(image_tex, levels, GL_RGBA8, width1, height1, 2);
+ glCopyImageSubData(tex0, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 0, width1, height1, 1);
+ glCopyImageSubData(tex1, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 1, width1, height1, 1);
+ glDeleteTextures(1, &tex0);
+ glDeleteTextures(1, &tex1);
+ glGenerateTextureMipmap(image_tex);
+
+ // Set up some PBOs to do asynchronous readback.
+ GLuint pbos[5];
+ glCreateBuffers(5, pbos);
+ for (int i = 0; i < 5; ++i) {
+ glNamedBufferData(pbos[i], width1 * height1 * 4 * sizeof(uint8_t), nullptr, GL_STREAM_READ);
+ spare_pbos.push(pbos[i]);
+ }
+
+ OperatingPoint op = operating_point3;
+ if (!enable_variational_refinement) {
+ op.variational_refinement = false;
+ }
+ DISComputeFlow compute_flow(width1, height1, op);
+ GrayscaleConversion gray;
+ Interpolate interpolate(op, /*split_ycbcr_output=*/false);
+
+ GLuint tex_gray;
+ glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &tex_gray);
+ glTextureStorage3D(tex_gray, levels, GL_R8, width1, height1, 2);
+ gray.exec(image_tex, tex_gray, width1, height1, /*num_layers=*/2);
+ glGenerateTextureMipmap(tex_gray);
+
+ if (enable_warmup) {
+ in_warmup = true;
+ for (int i = 0; i < 10; ++i) {
+ GLuint bidirectional_flow_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
+ GLuint interpolated_tex = interpolate.exec(image_tex, tex_gray, bidirectional_flow_tex, width1, height1, 0.5f).first;
+ compute_flow.release_texture(bidirectional_flow_tex);
+ interpolate.release_texture(interpolated_tex);
+ }
+ in_warmup = false;
+ }
+
+ GLuint bidirectional_flow_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
+
+ for (int frameno = 1; frameno < 60; ++frameno) {
+ char ppm_filename[256];
+ snprintf(ppm_filename, sizeof(ppm_filename), "interp%04d.ppm", frameno);
+
+ float alpha = frameno / 60.0f;
+ GLuint interpolated_tex = interpolate.exec(image_tex, tex_gray, bidirectional_flow_tex, width1, height1, alpha).first;
+
+ schedule_read<RGBAType>(interpolated_tex, width1, height1, filename0, filename1, "", ppm_filename);
+ interpolate.release_texture(interpolated_tex);
+ }
+
+ while (!reads_in_progress.empty()) {
+ finish_one_read<RGBAType>(width1, height1);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ static const option long_options[] = {
+ { "smoothness-relative-weight", required_argument, 0, 's' }, // alpha.
+ { "intensity-relative-weight", required_argument, 0, 'i' }, // delta.
+ { "gradient-relative-weight", required_argument, 0, 'g' }, // gamma.
+ { "disable-timing", no_argument, 0, 1000 },
+ { "detailed-timing", no_argument, 0, 1003 },
+ { "disable-variational-refinement", no_argument, 0, 1001 },
+ { "interpolate", no_argument, 0, 1002 },
+ { "warmup", no_argument, 0, 1004 }
+ };
+
+ enable_timing = true;
+
+ for ( ;; ) {
+ int option_index = 0;
+ int c = getopt_long(argc, argv, "s:i:g:", long_options, &option_index);
+
+ if (c == -1) {
+ break;
+ }
+ switch (c) {
+ case 's':
+ vr_alpha = atof(optarg);
+ break;
+ case 'i':
+ vr_delta = atof(optarg);
+ break;
+ case 'g':
+ vr_gamma = atof(optarg);
+ break;
+ case 1000:
+ enable_timing = false;
+ break;
+ case 1001:
+ enable_variational_refinement = false;
+ break;
+ case 1002:
+ enable_interpolation = true;
+ break;
+ case 1003:
+ detailed_timing = true;
+ break;
+ case 1004:
+ enable_warmup = true;
+ break;
+ default:
+ fprintf(stderr, "Unknown option '%s'\n", argv[option_index]);
+ exit(1);
+ };
+ }
+
+ if (SDL_Init(SDL_INIT_EVERYTHING) == -1) {
+ fprintf(stderr, "SDL_Init failed: %s\n", SDL_GetError());
+ exit(1);
+ }
+ SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 8);
+ SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 0);
+ SDL_GL_SetAttribute(SDL_GL_STENCIL_SIZE, 0);
+ SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
+
+ SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
+ SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
+ SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 5);
+ // SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG);
+ window = SDL_CreateWindow("OpenGL window",
+ SDL_WINDOWPOS_UNDEFINED,
+ SDL_WINDOWPOS_UNDEFINED,
+ 64, 64,
+ SDL_WINDOW_OPENGL | SDL_WINDOW_HIDDEN);
+ SDL_GLContext context = SDL_GL_CreateContext(window);
+ assert(context != nullptr);
+
+ if (enable_interpolation) {
+ interpolate_image(argc, argv, optind);
+ } else {
+ compute_flow_only(argc, argv, optind);
+ }
+}
--- /dev/null
+syntax = "proto3";
+
+// Used as header before each frame in a .frames file:
+//
+// 1. "Ftbifrm0" (8 bytes, ASCII -- note that no byte repeats)
+// 2. Length of upcoming FrameHeaderProto (uint32, binary, big endian)
+// 3. The FrameHeaderProto itself
+// 4. The actual frame
+
+message FrameHeaderProto {
+ int32 stream_idx = 1;
+ int64 pts = 2;
+ int64 file_size = 3; // In bytes of compressed frame. TODO: rename to size.
+}
+
+message StreamContentsProto {
+ int32 stream_idx = 1;
+ repeated int64 pts = 2 [packed=true];
+ repeated int64 file_size = 3 [packed=true];
+ repeated int64 offset = 4 [packed=true];
+}
+
+message FileContentsProto {
+ repeated StreamContentsProto stream = 1; // Typically only one.
+}
--- /dev/null
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "frame_on_disk.h"
+
+using namespace std;
+
+FrameReader::~FrameReader()
+{
+ if (fd != -1) {
+ close(fd);
+ }
+}
+
+string FrameReader::read_frame(FrameOnDisk frame)
+{
+ if (int(frame.filename_idx) != last_filename_idx) {
+ if (fd != -1) {
+ close(fd); // Ignore errors.
+ }
+
+ string filename;
+ {
+ lock_guard<mutex> lock(frame_mu);
+ filename = frame_filenames[frame.filename_idx];
+ }
+
+ fd = open(filename.c_str(), O_RDONLY);
+ if (fd == -1) {
+ perror(filename.c_str());
+ exit(1);
+ }
+
+ // We want readahead. (Ignore errors.)
+ posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL);
+
+ last_filename_idx = frame.filename_idx;
+ }
+
+ string str;
+ str.resize(frame.size);
+ off_t offset = 0;
+ while (offset < frame.size) {
+ int ret = pread(fd, &str[offset], frame.size - offset, frame.offset + offset);
+ if (ret <= 0) {
+ perror("pread");
+ exit(1);
+ }
+
+ offset += ret;
+ }
+ return str;
+}
--- /dev/null
+#ifndef _FRAME_ON_DISK_H
+#define _FRAME_ON_DISK_H 1
+
+#include <mutex>
+#include <string>
+#include <vector>
+
+#include <stdint.h>
+
+#include "defs.h"
+
+extern std::mutex frame_mu;
+struct FrameOnDisk {
+ int64_t pts = -1; // -1 means empty.
+ off_t offset;
+ unsigned filename_idx;
+ uint32_t size; // Not using size_t saves a few bytes; we can have so many frames.
+};
+extern std::vector<FrameOnDisk> frames[MAX_STREAMS]; // Under frame_mu.
+extern std::vector<std::string> frame_filenames; // Under frame_mu.
+
+// A helper class to read frames from disk. It caches the file descriptor
+// so that the kernel has a better chance of doing readahead when it sees
+// the sequential reads. (For this reason, each display has a private
+// FrameReader. Thus, we can easily keep multiple open file descriptors around
+// for a single .frames file.)
+class FrameReader {
+public:
+ ~FrameReader();
+ std::string read_frame(FrameOnDisk frame);
+
+private:
+ int fd = -1;
+ int last_filename_idx = -1;
+};
+
+#endif // !defined(_FRAME_ON_DISK_H)
--- /dev/null
+#include "gpu_timers.h"
+
+#include <epoxy/gl.h>
+
+using namespace std;
+
+bool enable_timing = false;
+bool detailed_timing = false;
+bool in_warmup = false;
+
+pair<GLuint, GLuint> GPUTimers::begin_timer(const string &name, int level)
+{
+ if (!enable_timing) {
+ return make_pair(0, 0);
+ }
+
+ GLuint queries[2];
+ glGenQueries(2, queries);
+ glQueryCounter(queries[0], GL_TIMESTAMP);
+
+ Timer timer;
+ timer.name = name;
+ timer.level = level;
+ timer.query.first = queries[0];
+ timer.query.second = queries[1];
+ timers.push_back(timer);
+ return timer.query;
+}
+
+GLint64 find_elapsed(pair<GLuint, GLuint> queries)
+{
+ // NOTE: This makes the CPU wait for the GPU.
+ GLuint64 time_start, time_end;
+ glGetQueryObjectui64v(queries.first, GL_QUERY_RESULT, &time_start);
+ glGetQueryObjectui64v(queries.second, GL_QUERY_RESULT, &time_end);
+ return time_end - time_start;
+}
+
+void GPUTimers::print()
+{
+ for (size_t i = 0; i < timers.size(); ++i) {
+ if (timers[i].level >= 4 && !detailed_timing) {
+ // In practice, only affects the SOR sub-timers.
+ continue;
+ }
+
+ GLint64 time_elapsed = find_elapsed(timers[i].query);
+ for (int j = 0; j < timers[i].level * 2; ++j) {
+ fprintf(stderr, " ");
+ }
+
+ if (detailed_timing) {
+ // Look for any immediate subtimers, and see if they sum to the large one.
+ size_t num_subtimers = 0;
+ GLint64 sum_subtimers = 0;
+ for (size_t j = i + 1; j < timers.size() && timers[j].level > timers[i].level; ++j) {
+ if (timers[j].level != timers[i].level + 1)
+ continue;
+ ++num_subtimers;
+ sum_subtimers += find_elapsed(timers[j].query);
+ }
+
+ if (num_subtimers > 0 && (time_elapsed - sum_subtimers) / 1e6 >= 0.01) {
+ fprintf(stderr, "%-30s %4.3f ms [%4.3f ms unaccounted for]\n", timers[i].name.c_str(), time_elapsed / 1e6, (time_elapsed - sum_subtimers) / 1e6);
+ } else {
+ fprintf(stderr, "%-30s %4.3f ms\n", timers[i].name.c_str(), time_elapsed / 1e6);
+ }
+ } else {
+ fprintf(stderr, "%-30s %4.1f ms\n", timers[i].name.c_str(), time_elapsed / 1e6);
+ }
+ }
+}
--- /dev/null
+#ifndef _GPU_TIMERS_H
+#define _GPU_TIMERS_H 1
+
+#include <epoxy/gl.h>
+#include <string>
+#include <utility>
+#include <vector>
+
+extern bool enable_timing;
+extern bool detailed_timing;
+extern bool in_warmup;
+
+class GPUTimers {
+public:
+ void print();
+ std::pair<GLuint, GLuint> begin_timer(const std::string &name, int level);
+
+private:
+ struct Timer {
+ std::string name;
+ int level;
+ std::pair<GLuint, GLuint> query;
+ };
+ std::vector<Timer> timers;
+};
+
+// A simple RAII class for timing until the end of the scope.
+class ScopedTimer {
+public:
+ ScopedTimer(const std::string &name, GPUTimers *timers)
+ : timers(timers), level(0)
+ {
+ query = timers->begin_timer(name, level);
+ }
+
+ ScopedTimer(const std::string &name, ScopedTimer *parent_timer)
+ : timers(parent_timer->timers),
+ level(parent_timer->level + 1)
+ {
+ query = timers->begin_timer(name, level);
+ }
+
+ ~ScopedTimer()
+ {
+ end();
+ }
+
+ void end()
+ {
+ if (enable_timing && !ended) {
+ glQueryCounter(query.second, GL_TIMESTAMP);
+ ended = true;
+ }
+ }
+
+private:
+ GPUTimers *timers;
+ int level;
+ std::pair<GLuint, GLuint> query;
+ bool ended = false;
+};
+
+#endif // !defined(_GPU_TIMERS_H)
--- /dev/null
+#version 450 core
+
+in vec3 tc;
+out vec4 gray;
+
+uniform sampler2DArray tex;
+
+void main()
+{
+ vec4 color = texture(tex, tc);
+ gray.rgb = vec3(dot(color.rgb, vec3(0.2126f, 0.7152f, 0.0722f))); // Rec. 709.
+ gray.a = color.a;
+}
--- /dev/null
+#version 450 core
+
+in vec2 tc;
+out vec2 out_flow;
+
+uniform sampler2D left_tex, right_tex, up_tex, down_tex;
+
+void main()
+{
+ // Some of these may contain “junk”, in the sense that they were
+ // not written in the given pass, if they came from an edge.
+ // Most of the time, this is benign, since it means we'll get
+ // the previous value (left/right/up) again. However, if it were
+ // bogus on the very first pass, we need to exclude it.
+ // Thus the test for 100.0f (invalid flows are initialized to 1000,
+ // all valid ones are less than 1).
+ vec2 left = texture(left_tex, tc).xy;
+ vec2 right = texture(right_tex, tc).xy;
+ vec2 up = texture(up_tex, tc).xy;
+ vec2 down = texture(down_tex, tc).xy;
+
+ vec2 sum = vec2(0.0f);
+ float num = 0.0f;
+ if (left.x < 100.0f) {
+ sum = left;
+ num = 1.0f;
+ }
+ if (right.x < 100.0f) {
+ sum += right;
+ num += 1.0f;
+ }
+ if (up.x < 100.0f) {
+ sum += up;
+ num += 1.0f;
+ }
+ if (down.x < 100.0f) {
+ sum += down;
+ num += 1.0f;
+ }
+
+ // If _all_ of them were 0, this would mean the entire row _and_ column
+ // would be devoid of flow. If so, the zero flow is fine for our purposes.
+ if (num == 0.0f) {
+ out_flow = vec2(0.0f);
+ } else {
+ out_flow = sum / num;
+ }
+}
--- /dev/null
+#version 450 core
+
+in vec2 tc;
+out vec2 out_flow;
+
+uniform sampler2D tex;
+
+void main()
+{
+ vec2 flow = texture(tex, tc).xy;
+ if (flow.x > 100.0f) {
+ // Don't copy unset flows around.
+ discard;
+ }
+ out_flow = flow;
+}
--- /dev/null
+#version 450 core
+
+layout(location=0) in vec2 position;
+out vec2 tc;
+
+uniform float z;
+uniform vec2 sample_offset;
+
+void main()
+{
+ // Moving the position is equivalent to moving the texture coordinate,
+ // but cheaper -- as it means some of the fullscreen quad can be clipped away.
+ vec2 adjusted_pos = position - sample_offset;
+
+ // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+ //
+ // 2.000 0.000 0.000 -1.000
+ // 0.000 2.000 0.000 -1.000
+ // 0.000 0.000 -2.000 -1.000
+ // 0.000 0.000 0.000 1.000
+ gl_Position = vec4(2.0 * adjusted_pos.x - 1.0, 2.0 * adjusted_pos.y - 1.0, 2.0f * (z - 0.5f), 1.0);
+
+ tc = position;
+}
--- /dev/null
+#ifndef _JPEG_DESTROYER_H
+#define _JPEG_DESTROYER_H 1
+
+#include <jpeglib.h>
+
+class JPEGDestroyer {
+public:
+ JPEGDestroyer(jpeg_decompress_struct *dinfo)
+ : dinfo(dinfo) {}
+
+ ~JPEGDestroyer() {
+ jpeg_destroy_decompress(dinfo);
+ }
+
+private:
+ jpeg_decompress_struct *dinfo;
+};
+
+#endif // !defined(_JPEG_DESTROYER_H)
--- /dev/null
+#ifndef _JPEG_FRAME_H
+#define _JPEG_FRAME_H 1
+
+#include <memory>
+
+struct Frame {
+ bool is_semiplanar = false;
+ std::unique_ptr<uint8_t[]> y;
+ std::unique_ptr<uint8_t[]> cb, cr; // For planar.
+ std::unique_ptr<uint8_t[]> cbcr; // For semiplanar.
+ unsigned width, height;
+ unsigned chroma_subsampling_x, chroma_subsampling_y;
+ unsigned pitch_y, pitch_chroma;
+};
+
+#endif // !defined(_JPEG_FRAME_H)
--- /dev/null
+#include "jpeg_frame_view.h"
+
+#include "defs.h"
+#include "jpeg_destroyer.h"
+#include "shared/post_to_main_thread.h"
+#include "video_stream.h"
+#include "ycbcr_converter.h"
+
+#include <QMouseEvent>
+#include <QScreen>
+#include <atomic>
+#include <condition_variable>
+#include <deque>
+#include <jpeglib.h>
+#include <movit/init.h>
+#include <movit/resource_pool.h>
+#include <movit/util.h>
+#include <mutex>
+#include <stdint.h>
+#include <thread>
+#include <unistd.h>
+#include <utility>
+
+// Must come after the Qt stuff.
+#include "vaapi_jpeg_decoder.h"
+
+using namespace movit;
+using namespace std;
+
+namespace {
+
+// Just an arbitrary order for std::map.
+struct FrameOnDiskLexicalOrder
+{
+ bool operator() (const FrameOnDisk &a, const FrameOnDisk &b) const
+ {
+ if (a.pts != b.pts)
+ return a.pts < b.pts;
+ if (a.offset != b.offset)
+ return a.offset < b.offset;
+ if (a.filename_idx != b.filename_idx)
+ return a.filename_idx < b.filename_idx;
+ assert(a.size == b.size);
+ return false;
+ }
+};
+
+inline size_t frame_size(const Frame &frame)
+{
+ size_t y_size = frame.width * frame.height;
+ size_t cbcr_size = y_size / frame.chroma_subsampling_x / frame.chroma_subsampling_y;
+ return y_size + cbcr_size * 2;
+}
+
+struct LRUFrame {
+ shared_ptr<Frame> frame;
+ size_t last_used;
+};
+
+struct PendingDecode {
+ JPEGFrameView *destination;
+
+ // For actual decodes (only if frame below is nullptr).
+ FrameOnDisk primary, secondary;
+ float fade_alpha; // Irrelevant if secondary.stream_idx == -1.
+
+ // Already-decoded frames are also sent through PendingDecode,
+ // so that they get drawn in the right order. If frame is nullptr,
+ // it's a real decode.
+ shared_ptr<Frame> frame;
+};
+
+} // namespace
+
+thread JPEGFrameView::jpeg_decoder_thread;
+mutex cache_mu;
+map<FrameOnDisk, LRUFrame, FrameOnDiskLexicalOrder> cache; // Under cache_mu.
+size_t cache_bytes_used = 0; // Under cache_mu.
+condition_variable any_pending_decodes;
+deque<PendingDecode> pending_decodes; // Under cache_mu.
+atomic<size_t> event_counter{0};
+extern QGLWidget *global_share_widget;
+extern atomic<bool> should_quit;
+
+shared_ptr<Frame> decode_jpeg(const string &jpeg)
+{
+ shared_ptr<Frame> frame;
+ if (vaapi_jpeg_decoding_usable) {
+ frame = decode_jpeg_vaapi(jpeg);
+ if (frame != nullptr) {
+ return frame;
+ }
+ fprintf(stderr, "VA-API hardware decoding failed; falling back to software.\n");
+ }
+
+ frame.reset(new Frame);
+
+ jpeg_decompress_struct dinfo;
+ jpeg_error_mgr jerr;
+ dinfo.err = jpeg_std_error(&jerr);
+ jpeg_create_decompress(&dinfo);
+ JPEGDestroyer destroy_dinfo(&dinfo);
+
+ jpeg_mem_src(&dinfo, reinterpret_cast<const unsigned char *>(jpeg.data()), jpeg.size());
+ jpeg_read_header(&dinfo, true);
+
+ if (dinfo.num_components != 3) {
+ fprintf(stderr, "Not a color JPEG. (%d components, Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
+ dinfo.num_components,
+ dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
+ dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
+ dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
+ exit(1);
+ }
+ if (dinfo.comp_info[0].h_samp_factor != dinfo.max_h_samp_factor ||
+ dinfo.comp_info[0].v_samp_factor != dinfo.max_v_samp_factor || // Y' must not be subsampled.
+ dinfo.comp_info[1].h_samp_factor != dinfo.comp_info[2].h_samp_factor ||
+ dinfo.comp_info[1].v_samp_factor != dinfo.comp_info[2].v_samp_factor || // Cb and Cr must be identically subsampled.
+ (dinfo.max_h_samp_factor % dinfo.comp_info[1].h_samp_factor) != 0 ||
+ (dinfo.max_v_samp_factor % dinfo.comp_info[1].v_samp_factor) != 0) { // No 2:3 subsampling or other weirdness.
+ fprintf(stderr, "Unsupported subsampling scheme. (Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
+ dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
+ dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
+ dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
+ exit(1);
+ }
+ dinfo.raw_data_out = true;
+
+ jpeg_start_decompress(&dinfo);
+
+ frame->width = dinfo.output_width;
+ frame->height = dinfo.output_height;
+ frame->chroma_subsampling_x = dinfo.max_h_samp_factor / dinfo.comp_info[1].h_samp_factor;
+ frame->chroma_subsampling_y = dinfo.max_v_samp_factor / dinfo.comp_info[1].v_samp_factor;
+
+ unsigned h_mcu_size = DCTSIZE * dinfo.max_h_samp_factor;
+ unsigned v_mcu_size = DCTSIZE * dinfo.max_v_samp_factor;
+ unsigned mcu_width_blocks = (dinfo.output_width + h_mcu_size - 1) / h_mcu_size;
+ unsigned mcu_height_blocks = (dinfo.output_height + v_mcu_size - 1) / v_mcu_size;
+
+ unsigned luma_width_blocks = mcu_width_blocks * dinfo.comp_info[0].h_samp_factor;
+ unsigned chroma_width_blocks = mcu_width_blocks * dinfo.comp_info[1].h_samp_factor;
+ unsigned luma_height_blocks = mcu_height_blocks * dinfo.comp_info[0].v_samp_factor;
+ unsigned chroma_height_blocks = mcu_height_blocks * dinfo.comp_info[1].v_samp_factor;
+
+ // TODO: Decode into a PBO.
+ frame->y.reset(new uint8_t[luma_width_blocks * luma_height_blocks * DCTSIZE2]);
+ frame->cb.reset(new uint8_t[chroma_width_blocks * chroma_height_blocks * DCTSIZE2]);
+ frame->cr.reset(new uint8_t[chroma_width_blocks * chroma_height_blocks * DCTSIZE2]);
+ frame->pitch_y = luma_width_blocks * DCTSIZE;
+ frame->pitch_chroma = chroma_width_blocks * DCTSIZE;
+
+ JSAMPROW yptr[v_mcu_size], cbptr[v_mcu_size], crptr[v_mcu_size];
+ JSAMPARRAY data[3] = { yptr, cbptr, crptr };
+ for (unsigned y = 0; y < mcu_height_blocks; ++y) {
+ // NOTE: The last elements of cbptr/crptr will be unused for vertically subsampled chroma.
+ for (unsigned yy = 0; yy < v_mcu_size; ++yy) {
+ yptr[yy] = frame->y.get() + (y * DCTSIZE * dinfo.max_v_samp_factor + yy) * frame->pitch_y;
+ cbptr[yy] = frame->cb.get() + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * frame->pitch_chroma;
+ crptr[yy] = frame->cr.get() + (y * DCTSIZE * dinfo.comp_info[1].v_samp_factor + yy) * frame->pitch_chroma;
+ }
+
+ jpeg_read_raw_data(&dinfo, data, v_mcu_size);
+ }
+
+ (void)jpeg_finish_decompress(&dinfo);
+
+ return frame;
+}
+
+void prune_cache()
+{
+ // Assumes cache_mu is held.
+ int64_t bytes_still_to_remove = cache_bytes_used - (size_t(CACHE_SIZE_MB) * 1024 * 1024) * 9 / 10;
+ if (bytes_still_to_remove <= 0) return;
+
+ vector<pair<size_t, size_t>> lru_timestamps_and_size;
+ for (const auto &key_and_value : cache) {
+ lru_timestamps_and_size.emplace_back(
+ key_and_value.second.last_used,
+ frame_size(*key_and_value.second.frame));
+ }
+ sort(lru_timestamps_and_size.begin(), lru_timestamps_and_size.end());
+
+ // Remove the oldest ones until we are below 90% of the cache used.
+ size_t lru_cutoff_point = 0;
+ for (const pair<size_t, size_t> &it : lru_timestamps_and_size) {
+ lru_cutoff_point = it.first;
+ bytes_still_to_remove -= it.second;
+ if (bytes_still_to_remove <= 0) break;
+ }
+
+ for (auto it = cache.begin(); it != cache.end(); ) {
+ if (it->second.last_used <= lru_cutoff_point) {
+ cache_bytes_used -= frame_size(*it->second.frame);
+ it = cache.erase(it);
+ } else {
+ ++it;
+ }
+ }
+}
+
+shared_ptr<Frame> decode_jpeg_with_cache(FrameOnDisk frame_spec, CacheMissBehavior cache_miss_behavior, FrameReader *frame_reader, bool *did_decode)
+{
+ *did_decode = false;
+ {
+ unique_lock<mutex> lock(cache_mu);
+ auto it = cache.find(frame_spec);
+ if (it != cache.end()) {
+ it->second.last_used = event_counter++;
+ return it->second.frame;
+ }
+ }
+
+ if (cache_miss_behavior == RETURN_NULLPTR_IF_NOT_IN_CACHE) {
+ return nullptr;
+ }
+
+ *did_decode = true;
+ shared_ptr<Frame> frame = decode_jpeg(frame_reader->read_frame(frame_spec));
+
+ unique_lock<mutex> lock(cache_mu);
+ cache_bytes_used += frame_size(*frame);
+ cache[frame_spec] = LRUFrame{ frame, event_counter++ };
+
+ if (cache_bytes_used > size_t(CACHE_SIZE_MB) * 1024 * 1024) {
+ prune_cache();
+ }
+ return frame;
+}
+
+void JPEGFrameView::jpeg_decoder_thread_func()
+{
+ size_t num_decoded = 0, num_dropped = 0;
+
+ pthread_setname_np(pthread_self(), "JPEGDecoder");
+ while (!should_quit.load()) {
+ PendingDecode decode;
+ CacheMissBehavior cache_miss_behavior = DECODE_IF_NOT_IN_CACHE;
+ {
+ unique_lock<mutex> lock(cache_mu); // TODO: Perhaps under another lock?
+ any_pending_decodes.wait(lock, [] {
+ return !pending_decodes.empty() || should_quit.load();
+ });
+ if (should_quit.load())
+ break;
+ decode = pending_decodes.front();
+ pending_decodes.pop_front();
+
+ size_t num_pending = 0;
+ for (const PendingDecode &other_decode : pending_decodes) {
+ if (other_decode.destination == decode.destination) {
+ ++num_pending;
+ }
+ }
+ if (num_pending > 3) {
+ cache_miss_behavior = RETURN_NULLPTR_IF_NOT_IN_CACHE;
+ }
+ }
+
+ if (decode.frame != nullptr) {
+ // Already decoded, so just show it.
+ decode.destination->setDecodedFrame(decode.frame, nullptr, 1.0f);
+ continue;
+ }
+
+ shared_ptr<Frame> primary_frame, secondary_frame;
+ bool drop = false;
+ for (int subframe_idx = 0; subframe_idx < 2; ++subframe_idx) {
+ const FrameOnDisk &frame_spec = (subframe_idx == 0 ? decode.primary : decode.secondary);
+ if (frame_spec.pts == -1) {
+ // No secondary frame.
+ continue;
+ }
+
+ bool found_in_cache;
+ shared_ptr<Frame> frame = decode_jpeg_with_cache(frame_spec, cache_miss_behavior, &decode.destination->frame_reader, &found_in_cache);
+
+ if (frame == nullptr) {
+ assert(cache_miss_behavior == RETURN_NULLPTR_IF_NOT_IN_CACHE);
+ drop = true;
+ break;
+ }
+
+ if (!found_in_cache) {
+ ++num_decoded;
+ if (num_decoded % 1000 == 0) {
+ fprintf(stderr, "Decoded %zu images, dropped %zu (%.2f%% dropped)\n",
+ num_decoded, num_dropped, (100.0 * num_dropped) / (num_decoded + num_dropped));
+ }
+ }
+ if (subframe_idx == 0) {
+ primary_frame = std::move(frame);
+ } else {
+ secondary_frame = std::move(frame);
+ }
+ }
+ if (drop) {
+ ++num_dropped;
+ continue;
+ }
+
+ // TODO: Could we get jitter between non-interpolated and interpolated frames here?
+ decode.destination->setDecodedFrame(primary_frame, secondary_frame, decode.fade_alpha);
+ }
+}
+
+void JPEGFrameView::shutdown()
+{
+ any_pending_decodes.notify_all();
+ jpeg_decoder_thread.join();
+}
+
+JPEGFrameView::JPEGFrameView(QWidget *parent)
+ : QGLWidget(parent, global_share_widget)
+{
+}
+
+void JPEGFrameView::setFrame(unsigned stream_idx, FrameOnDisk frame, FrameOnDisk secondary_frame, float fade_alpha)
+{
+ current_stream_idx = stream_idx; // TODO: Does this interact with fades?
+
+ unique_lock<mutex> lock(cache_mu);
+ PendingDecode decode;
+ decode.primary = frame;
+ decode.secondary = secondary_frame;
+ decode.fade_alpha = fade_alpha;
+ decode.destination = this;
+ pending_decodes.push_back(decode);
+ any_pending_decodes.notify_all();
+}
+
+void JPEGFrameView::setFrame(shared_ptr<Frame> frame)
+{
+ unique_lock<mutex> lock(cache_mu);
+ PendingDecode decode;
+ decode.frame = std::move(frame);
+ decode.destination = this;
+ pending_decodes.push_back(decode);
+ any_pending_decodes.notify_all();
+}
+
+ResourcePool *resource_pool = nullptr;
+
+void JPEGFrameView::initializeGL()
+{
+ glDisable(GL_BLEND);
+ glDisable(GL_DEPTH_TEST);
+ check_error();
+
+ static once_flag once;
+ call_once(once, [] {
+ resource_pool = new ResourcePool;
+ jpeg_decoder_thread = std::thread(jpeg_decoder_thread_func);
+ });
+
+ ycbcr_converter.reset(new YCbCrConverter(YCbCrConverter::OUTPUT_TO_RGBA, resource_pool));
+
+ ImageFormat inout_format;
+ inout_format.color_space = COLORSPACE_sRGB;
+ inout_format.gamma_curve = GAMMA_sRGB;
+
+ overlay_chain.reset(new EffectChain(overlay_base_width, overlay_base_height, resource_pool));
+ overlay_input = (movit::FlatInput *)overlay_chain->add_input(new FlatInput(inout_format, FORMAT_GRAYSCALE, GL_UNSIGNED_BYTE, overlay_base_width, overlay_base_height));
+
+ overlay_chain->add_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED);
+ overlay_chain->finalize();
+}
+
+void JPEGFrameView::resizeGL(int width, int height)
+{
+ check_error();
+ glViewport(0, 0, width, height);
+ check_error();
+
+ // Save these, as width() and height() will lie with DPI scaling.
+ gl_width = width;
+ gl_height = height;
+}
+
+void JPEGFrameView::paintGL()
+{
+ glViewport(0, 0, gl_width, gl_height);
+ if (current_frame == nullptr) {
+ glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
+ glClear(GL_COLOR_BUFFER_BIT);
+ return;
+ }
+
+ check_error();
+ current_chain->render_to_screen();
+
+ if (overlay_image != nullptr) {
+ if (overlay_input_needs_refresh) {
+ overlay_input->set_width(overlay_width);
+ overlay_input->set_height(overlay_height);
+ overlay_input->set_pixel_data(overlay_image->bits());
+ }
+ glViewport(gl_width - overlay_width, 0, overlay_width, overlay_height);
+ overlay_chain->render_to_screen();
+ }
+}
+
+namespace {
+
+} // namespace
+
+void JPEGFrameView::setDecodedFrame(shared_ptr<Frame> frame, shared_ptr<Frame> secondary_frame, float fade_alpha)
+{
+ post_to_main_thread([this, frame, secondary_frame, fade_alpha] {
+ current_frame = frame;
+ current_secondary_frame = secondary_frame;
+
+ if (secondary_frame != nullptr) {
+ current_chain = ycbcr_converter->prepare_chain_for_fade(frame, secondary_frame, fade_alpha);
+ } else {
+ current_chain = ycbcr_converter->prepare_chain_for_conversion(frame);
+ }
+ update();
+ });
+}
+
+void JPEGFrameView::mousePressEvent(QMouseEvent *event)
+{
+ if (event->type() == QEvent::MouseButtonPress && event->button() == Qt::LeftButton) {
+ emit clicked();
+ }
+}
+
+void JPEGFrameView::set_overlay(const string &text)
+{
+ if (text.empty()) {
+ overlay_image.reset();
+ return;
+ }
+
+ float dpr = QGuiApplication::primaryScreen()->devicePixelRatio();
+ overlay_width = lrint(overlay_base_width * dpr);
+ overlay_height = lrint(overlay_base_height * dpr);
+
+ overlay_image.reset(new QImage(overlay_width, overlay_height, QImage::Format_Grayscale8));
+ overlay_image->setDevicePixelRatio(dpr);
+ overlay_image->fill(0);
+ QPainter painter(overlay_image.get());
+
+ painter.setPen(Qt::white);
+ QFont font = painter.font();
+ font.setPointSize(12);
+ painter.setFont(font);
+
+ painter.drawText(QRectF(0, 0, overlay_base_width, overlay_base_height), Qt::AlignCenter, QString::fromStdString(text));
+
+ // Don't refresh immediately; we might not have an OpenGL context here.
+ overlay_input_needs_refresh = true;
+}
--- /dev/null
+#ifndef _JPEG_FRAME_VIEW_H
+#define _JPEG_FRAME_VIEW_H 1
+
+#include "frame_on_disk.h"
+#include "jpeg_frame.h"
+#include "ycbcr_converter.h"
+
+#include <QGLWidget>
+#include <epoxy/gl.h>
+#include <memory>
+#include <movit/effect_chain.h>
+#include <movit/flat_input.h>
+#include <movit/mix_effect.h>
+#include <movit/ycbcr_input.h>
+#include <stdint.h>
+#include <thread>
+
+enum CacheMissBehavior {
+ DECODE_IF_NOT_IN_CACHE,
+ RETURN_NULLPTR_IF_NOT_IN_CACHE
+};
+
+std::shared_ptr<Frame> decode_jpeg(const std::string &jpeg);
+std::shared_ptr<Frame> decode_jpeg_with_cache(FrameOnDisk id, CacheMissBehavior cache_miss_behavior, FrameReader *frame_reader, bool *did_decode);
+
+class JPEGFrameView : public QGLWidget {
+ Q_OBJECT
+
+public:
+ JPEGFrameView(QWidget *parent);
+
+ void setFrame(unsigned stream_idx, FrameOnDisk frame, FrameOnDisk secondary_frame = {}, float fade_alpha = 0.0f);
+ void setFrame(std::shared_ptr<Frame> frame);
+
+ void mousePressEvent(QMouseEvent *event) override;
+
+ unsigned get_stream_idx() const { return current_stream_idx; }
+
+ void setDecodedFrame(std::shared_ptr<Frame> frame, std::shared_ptr<Frame> secondary_frame, float fade_alpha);
+ void set_overlay(const std::string &text); // Blank for none.
+
+ static void shutdown();
+
+signals:
+ void clicked();
+
+protected:
+ void initializeGL() override;
+ void resizeGL(int width, int height) override;
+ void paintGL() override;
+
+private:
+ static void jpeg_decoder_thread_func();
+
+ FrameReader frame_reader;
+
+ // The stream index of the latest frame we displayed.
+ unsigned current_stream_idx = 0;
+
+ std::unique_ptr<YCbCrConverter> ycbcr_converter;
+ movit::EffectChain *current_chain = nullptr; // Owned by ycbcr_converter.
+
+ std::shared_ptr<Frame> current_frame; // So that we hold on to the pixels.
+ std::shared_ptr<Frame> current_secondary_frame; // Same.
+
+ static constexpr int overlay_base_width = 16, overlay_base_height = 16;
+ int overlay_width = overlay_base_width, overlay_height = overlay_base_height;
+ std::unique_ptr<QImage> overlay_image; // If nullptr, no overlay.
+ std::unique_ptr<movit::EffectChain> overlay_chain; // Just to get the overlay on screen in the easiest way possible.
+ movit::FlatInput *overlay_input;
+ bool overlay_input_needs_refresh = false;
+
+ int gl_width, gl_height;
+
+ static std::thread jpeg_decoder_thread;
+};
+
+#endif // !defined(_JPEG_FRAME_VIEW_H)
--- /dev/null
+#include <assert.h>
+#include <arpa/inet.h>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <dirent.h>
+#include <getopt.h>
+#include <memory>
+#include <mutex>
+#include <stdint.h>
+#include <stdio.h>
+#include <string>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <thread>
+#include <vector>
+
+extern "C" {
+#include <libavformat/avformat.h>
+}
+
+#include "clip_list.h"
+#include "shared/context.h"
+#include "defs.h"
+#include "shared/disk_space_estimator.h"
+#include "shared/ffmpeg_raii.h"
+#include "flags.h"
+#include "frame_on_disk.h"
+#include "frame.pb.h"
+#include "shared/httpd.h"
+#include "mainwindow.h"
+#include "player.h"
+#include "shared/post_to_main_thread.h"
+#include "shared/ref_counted_gl_sync.h"
+#include "shared/timebase.h"
+#include "ui_mainwindow.h"
+#include "vaapi_jpeg_decoder.h"
+
+#include <QApplication>
+#include <QGLFormat>
+#include <QSurfaceFormat>
+#include <QProgressDialog>
+#include <movit/init.h>
+#include <movit/util.h>
+
+using namespace std;
+using namespace std::chrono;
+
+constexpr char frame_magic[] = "Ftbifrm0";
+constexpr size_t frame_magic_len = 8;
+
+mutex RefCountedGLsync::fence_lock;
+atomic<bool> should_quit{false};
+
+int64_t start_pts = -1;
+
+// TODO: Replace by some sort of GUI control, I guess.
+int64_t current_pts = 0;
+
+struct FrameFile {
+ FILE *fp = nullptr;
+ unsigned filename_idx;
+ size_t frames_written_so_far = 0;
+};
+std::map<int, FrameFile> open_frame_files;
+
+mutex frame_mu;
+vector<FrameOnDisk> frames[MAX_STREAMS]; // Under frame_mu.
+vector<string> frame_filenames; // Under frame_mu.
+
+namespace {
+
+FrameOnDisk write_frame(int stream_idx, int64_t pts, const uint8_t *data, size_t size, DB *db)
+{
+ if (open_frame_files.count(stream_idx) == 0) {
+ char filename[256];
+ snprintf(filename, sizeof(filename), "%s/frames/cam%d-pts%09ld.frames",
+ global_flags.working_directory.c_str(), stream_idx, pts);
+ FILE *fp = fopen(filename, "wb");
+ if (fp == nullptr) {
+ perror(filename);
+ exit(1);
+ }
+
+ lock_guard<mutex> lock(frame_mu);
+ unsigned filename_idx = frame_filenames.size();
+ frame_filenames.push_back(filename);
+ open_frame_files[stream_idx] = FrameFile{ fp, filename_idx, 0 };
+ }
+
+ FrameFile &file = open_frame_files[stream_idx];
+ unsigned filename_idx = file.filename_idx;
+ string filename;
+ {
+ lock_guard<mutex> lock(frame_mu);
+ filename = frame_filenames[filename_idx];
+ }
+
+ FrameHeaderProto hdr;
+ hdr.set_stream_idx(stream_idx);
+ hdr.set_pts(pts);
+ hdr.set_file_size(size);
+
+ string serialized;
+ if (!hdr.SerializeToString(&serialized)) {
+ fprintf(stderr, "Frame header serialization failed.\n");
+ exit(1);
+ }
+ uint32_t len = htonl(serialized.size());
+
+ if (fwrite(frame_magic, frame_magic_len, 1, file.fp) != 1) {
+ perror("fwrite");
+ exit(1);
+ }
+ if (fwrite(&len, sizeof(len), 1, file.fp) != 1) {
+ perror("fwrite");
+ exit(1);
+ }
+ if (fwrite(serialized.data(), serialized.size(), 1, file.fp) != 1) {
+ perror("fwrite");
+ exit(1);
+ }
+ off_t offset = ftell(file.fp);
+ if (fwrite(data, size, 1, file.fp) != 1) {
+ perror("fwrite");
+ exit(1);
+ }
+ fflush(file.fp); // No fsync(), though. We can accept losing a few frames.
+ global_disk_space_estimator->report_write(filename, 8 + sizeof(len) + serialized.size() + size, pts);
+
+ FrameOnDisk frame;
+ frame.pts = pts;
+ frame.filename_idx = filename_idx;
+ frame.offset = offset;
+ frame.size = size;
+
+ {
+ lock_guard<mutex> lock(frame_mu);
+ assert(stream_idx < MAX_STREAMS);
+ frames[stream_idx].push_back(frame);
+ }
+
+ if (++file.frames_written_so_far >= 1000) {
+ size_t size = ftell(file.fp);
+
+ // Start a new file next time.
+ if (fclose(file.fp) != 0) {
+ perror("fclose");
+ exit(1);
+ }
+ open_frame_files.erase(stream_idx);
+
+ // Write information about all frames in the finished file to SQLite.
+ // (If we crash before getting to do this, we'll be scanning through
+ // the file on next startup, and adding it to the database then.)
+ // NOTE: Since we don't fsync(), we could in theory get broken data
+ // but with the right size, but it would seem unlikely.
+ vector<DB::FrameOnDiskAndStreamIdx> frames_this_file;
+ {
+ lock_guard<mutex> lock(frame_mu);
+ for (size_t stream_idx = 0; stream_idx < MAX_STREAMS; ++stream_idx) {
+ for (const FrameOnDisk &frame : frames[stream_idx]) {
+ if (frame.filename_idx == filename_idx) {
+ frames_this_file.emplace_back(DB::FrameOnDiskAndStreamIdx{ frame, unsigned(stream_idx) });
+ }
+ }
+ }
+ }
+
+ const char *basename = filename.c_str();
+ while (strchr(basename, '/') != nullptr) {
+ basename = strchr(basename, '/');
+ }
+ db->store_frame_file(basename, size, frames_this_file);
+ }
+
+ return frame;
+}
+
+} // namespace
+
+HTTPD *global_httpd;
+
+void load_existing_frames();
+int record_thread_func();
+
+int main(int argc, char **argv)
+{
+ parse_flags(argc, argv);
+ if (optind == argc) {
+ global_flags.stream_source = "multiangle.mp4";
+ global_flags.slow_down_input = true;
+ } else if (optind + 1 == argc) {
+ global_flags.stream_source = argv[optind];
+ } else {
+ usage();
+ exit(1);
+ }
+
+ string frame_dir = global_flags.working_directory + "/frames";
+
+ struct stat st;
+ if (stat(frame_dir.c_str(), &st) == -1) {
+ fprintf(stderr, "%s does not exist, creating it.\n", frame_dir.c_str());
+ if (mkdir(frame_dir.c_str(), 0777) == -1) {
+ perror(global_flags.working_directory.c_str());
+ exit(1);
+ }
+ }
+
+ avformat_network_init();
+ global_httpd = new HTTPD;
+
+ QCoreApplication::setAttribute(Qt::AA_ShareOpenGLContexts, true);
+
+ QSurfaceFormat fmt;
+ fmt.setDepthBufferSize(0);
+ fmt.setStencilBufferSize(0);
+ fmt.setProfile(QSurfaceFormat::CoreProfile);
+ fmt.setMajorVersion(4);
+ fmt.setMinorVersion(5);
+
+ // Turn off vsync, since Qt generally gives us at most frame rate
+ // (display frequency) / (number of QGLWidgets active).
+ fmt.setSwapInterval(0);
+
+ QSurfaceFormat::setDefaultFormat(fmt);
+
+ QGLFormat::setDefaultFormat(QGLFormat::fromSurfaceFormat(fmt));
+
+ QApplication app(argc, argv);
+ global_share_widget = new QGLWidget();
+ if (!global_share_widget->isValid()) {
+ fprintf(stderr, "Failed to initialize OpenGL. Futatabi needs at least OpenGL 4.5 to function properly.\n");
+ exit(1);
+ }
+
+ // Initialize Movit.
+ {
+ QSurface *surface = create_surface();
+ QOpenGLContext *context = create_context(surface);
+ make_current(context, surface);
+ CHECK(movit::init_movit(MOVIT_SHADER_DIR, movit::MOVIT_DEBUG_OFF));
+ delete_context(context);
+ // TODO: Delete the surface, too.
+ }
+
+ load_existing_frames();
+
+ MainWindow main_window;
+ main_window.show();
+
+ global_httpd->add_endpoint("/queue_status", bind(&MainWindow::get_queue_status, &main_window), HTTPD::NO_CORS_POLICY);
+ global_httpd->start(global_flags.http_port);
+
+ init_jpeg_vaapi();
+
+ thread record_thread(record_thread_func);
+
+ int ret = app.exec();
+
+ should_quit = true;
+ record_thread.join();
+ JPEGFrameView::shutdown();
+
+ return ret;
+}
+
+void load_frame_file(const char *filename, const string &basename, unsigned filename_idx, DB *db)
+{
+ struct stat st;
+ if (stat(filename, &st) == -1) {
+ perror(filename);
+ exit(1);
+ }
+
+ vector<DB::FrameOnDiskAndStreamIdx> all_frames = db->load_frame_file(basename, st.st_size, filename_idx);
+ if (!all_frames.empty()) {
+ // We already had this cached in the database, so no need to look in the file.
+ for (const DB::FrameOnDiskAndStreamIdx &frame : all_frames) {
+ if (frame.stream_idx >= 0 && frame.stream_idx < MAX_STREAMS) {
+ frames[frame.stream_idx].push_back(frame.frame);
+ start_pts = max(start_pts, frame.frame.pts);
+ }
+ }
+ return;
+ }
+
+ FILE *fp = fopen(filename, "rb");
+ if (fp == nullptr) {
+ perror(filename);
+ exit(1);
+ }
+
+ size_t magic_offset = 0;
+ size_t skipped_bytes = 0;
+ while (!feof(fp) && !ferror(fp)) {
+ int ch = getc(fp);
+ if (ch == -1) {
+ break;
+ }
+ if (ch != frame_magic[magic_offset++]) {
+ skipped_bytes += magic_offset;
+ magic_offset = 0;
+ continue;
+ }
+ if (magic_offset < frame_magic_len) {
+ // Still reading the magic (hopefully).
+ continue;
+ }
+
+ // OK, found the magic. Try to parse the frame header.
+ magic_offset = 0;
+
+ if (skipped_bytes > 0) {
+ fprintf(stderr, "WARNING: %s: Skipped %zu garbage bytes in the middle.\n",
+ filename, skipped_bytes);
+ skipped_bytes = 0;
+ }
+
+ uint32_t len;
+ if (fread(&len, sizeof(len), 1, fp) != 1) {
+ fprintf(stderr, "WARNING: %s: Short read when getting length.\n", filename);
+ break;
+ }
+
+ string serialized;
+ serialized.resize(ntohl(len));
+ if (fread(&serialized[0], serialized.size(), 1, fp) != 1) {
+ fprintf(stderr, "WARNING: %s: Short read when reading frame header (%zu bytes).\n", filename, serialized.size());
+ break;
+ }
+
+ FrameHeaderProto hdr;
+ if (!hdr.ParseFromString(serialized)) {
+ fprintf(stderr, "WARNING: %s: Corrupted frame header.\n", filename);
+ continue;
+ }
+
+ FrameOnDisk frame;
+ frame.pts = hdr.pts();
+ frame.offset = ftell(fp);
+ frame.filename_idx = filename_idx;
+ frame.size = hdr.file_size();
+
+ if (fseek(fp, frame.offset + frame.size, SEEK_SET) == -1) {
+ fprintf(stderr, "WARNING: %s: Could not seek past frame (probably truncated).\n", filename);
+ continue;
+ }
+
+ if (hdr.stream_idx() >= 0 && hdr.stream_idx() < MAX_STREAMS) {
+ frames[hdr.stream_idx()].push_back(frame);
+ start_pts = max(start_pts, hdr.pts());
+ }
+ all_frames.emplace_back(DB::FrameOnDiskAndStreamIdx{ frame, unsigned(hdr.stream_idx()) });
+ }
+
+ if (skipped_bytes > 0) {
+ fprintf(stderr, "WARNING: %s: Skipped %zu garbage bytes at the end.\n",
+ filename, skipped_bytes);
+ }
+
+ size_t size = ftell(fp);
+ fclose(fp);
+
+ db->store_frame_file(basename, size, all_frames);
+}
+
+void load_existing_frames()
+{
+ QProgressDialog progress("Scanning frame directory...", "Abort", 0, 1);
+ progress.setWindowTitle("Futatabi");
+ progress.setWindowModality(Qt::WindowModal);
+ progress.setMinimumDuration(1000);
+ progress.setMaximum(1);
+ progress.setValue(0);
+
+ string frame_dir = global_flags.working_directory + "/frames";
+ DIR *dir = opendir(frame_dir.c_str());
+ if (dir == nullptr) {
+ perror("frames/");
+ start_pts = 0;
+ return;
+ }
+
+ vector<string> frame_basenames;
+ for ( ;; ) {
+ errno = 0;
+ dirent *de = readdir(dir);
+ if (de == nullptr) {
+ if (errno != 0) {
+ perror("readdir");
+ exit(1);
+ }
+ break;
+ }
+
+ if (de->d_type == DT_REG || de->d_type == DT_LNK) {
+ string filename = frame_dir + "/" + de->d_name;
+ frame_filenames.push_back(filename);
+ frame_basenames.push_back(de->d_name);
+ }
+
+ if (progress.wasCanceled()) {
+ exit(1);
+ }
+ }
+ closedir(dir);
+
+ progress.setMaximum(frame_filenames.size() + 2);
+ progress.setValue(1);
+
+ progress.setLabelText("Opening database...");
+ DB db(global_flags.working_directory + "/futatabi.db");
+
+ progress.setLabelText("Reading frame files...");
+ progress.setValue(2);
+
+ for (size_t i = 0; i < frame_filenames.size(); ++i) {
+ load_frame_file(frame_filenames[i].c_str(), frame_basenames[i], i, &db);
+ progress.setValue(i + 3);
+ if (progress.wasCanceled()) {
+ exit(1);
+ }
+ }
+
+ if (start_pts == -1) {
+ start_pts = 0;
+ } else {
+ // Add a gap of one second from the old frames to the new ones.
+ start_pts += TIMEBASE;
+ }
+
+ for (int stream_idx = 0; stream_idx < MAX_STREAMS; ++stream_idx) {
+ sort(frames[stream_idx].begin(), frames[stream_idx].end(),
+ [](const auto &a, const auto &b) { return a.pts < b.pts; });
+ }
+
+ db.clean_unused_frame_files(frame_basenames);
+}
+
+int record_thread_func()
+{
+ auto format_ctx = avformat_open_input_unique(global_flags.stream_source.c_str(), nullptr, nullptr);
+ if (format_ctx == nullptr) {
+ fprintf(stderr, "%s: Error opening file\n", global_flags.stream_source.c_str());
+ return 1;
+ }
+
+ int64_t last_pts = -1;
+ int64_t pts_offset;
+ DB db(global_flags.working_directory + "/futatabi.db");
+
+ while (!should_quit.load()) {
+ AVPacket pkt;
+ unique_ptr<AVPacket, decltype(av_packet_unref)*> pkt_cleanup(
+ &pkt, av_packet_unref);
+ av_init_packet(&pkt);
+ pkt.data = nullptr;
+ pkt.size = 0;
+
+ // TODO: Make it possible to abort av_read_frame() (use an interrupt callback);
+ // right now, should_quit will be ignored if it's hung on I/O.
+ if (av_read_frame(format_ctx.get(), &pkt) != 0) {
+ break;
+ }
+
+ // Convert pts to our own timebase.
+ AVRational stream_timebase = format_ctx->streams[pkt.stream_index]->time_base;
+ int64_t pts = av_rescale_q(pkt.pts, stream_timebase, AVRational{ 1, TIMEBASE });
+
+ // Translate offset into our stream.
+ if (last_pts == -1) {
+ pts_offset = start_pts - pts;
+ }
+ pts = std::max(pts + pts_offset, start_pts);
+
+ //fprintf(stderr, "Got a frame from camera %d, pts = %ld, size = %d\n",
+ // pkt.stream_index, pts, pkt.size);
+ FrameOnDisk frame = write_frame(pkt.stream_index, pts, pkt.data, pkt.size, &db);
+
+ post_to_main_thread([pkt, frame] {
+ if (pkt.stream_index == 0) {
+ global_mainwindow->ui->input1_display->setFrame(pkt.stream_index, frame);
+ } else if (pkt.stream_index == 1) {
+ global_mainwindow->ui->input2_display->setFrame(pkt.stream_index, frame);
+ } else if (pkt.stream_index == 2) {
+ global_mainwindow->ui->input3_display->setFrame(pkt.stream_index, frame);
+ } else if (pkt.stream_index == 3) {
+ global_mainwindow->ui->input4_display->setFrame(pkt.stream_index, frame);
+ }
+ });
+
+ if (last_pts != -1 && global_flags.slow_down_input) {
+ this_thread::sleep_for(microseconds((pts - last_pts) * 1000000 / TIMEBASE));
+ }
+ last_pts = pts;
+ current_pts = pts;
+ }
+
+ return 0;
+}
--- /dev/null
+#include "mainwindow.h"
+
+#include "clip_list.h"
+#include "shared/disk_space_estimator.h"
+#include "flags.h"
+#include "frame_on_disk.h"
+#include "player.h"
+#include "shared/post_to_main_thread.h"
+#include "shared/timebase.h"
+#include "ui_mainwindow.h"
+
+#include <QMouseEvent>
+#include <QShortcut>
+#include <QTimer>
+#include <QWheelEvent>
+#include <future>
+#include <sqlite3.h>
+#include <string>
+#include <vector>
+
+using namespace std;
+using namespace std::placeholders;
+
+MainWindow *global_mainwindow = nullptr;
+static ClipList *cliplist_clips;
+static PlayList *playlist_clips;
+
+extern int64_t current_pts;
+
+MainWindow::MainWindow()
+ : ui(new Ui::MainWindow),
+ db(global_flags.working_directory + "/futatabi.db")
+{
+ global_mainwindow = this;
+ ui->setupUi(this);
+
+ // The menus.
+ connect(ui->exit_action, &QAction::triggered, this, &MainWindow::exit_triggered);
+
+ global_disk_space_estimator = new DiskSpaceEstimator(bind(&MainWindow::report_disk_space, this, _1, _2));
+ disk_free_label = new QLabel(this);
+ disk_free_label->setStyleSheet("QLabel {padding-right: 5px;}");
+ ui->menuBar->setCornerWidget(disk_free_label);
+
+ StateProto state = db.get_state();
+
+ cliplist_clips = new ClipList(state.clip_list());
+ ui->clip_list->setModel(cliplist_clips);
+ connect(cliplist_clips, &ClipList::any_content_changed, this, &MainWindow::content_changed);
+
+ playlist_clips = new PlayList(state.play_list());
+ ui->playlist->setModel(playlist_clips);
+ connect(playlist_clips, &PlayList::any_content_changed, this, &MainWindow::content_changed);
+
+ // For un-highlighting when we lose focus.
+ ui->clip_list->installEventFilter(this);
+
+ // For scrubbing in the pts columns.
+ ui->clip_list->viewport()->installEventFilter(this);
+ ui->playlist->viewport()->installEventFilter(this);
+
+ QShortcut *cue_in = new QShortcut(QKeySequence(Qt::Key_A), this);
+ connect(cue_in, &QShortcut::activated, ui->cue_in_btn, &QPushButton::click);
+ connect(ui->cue_in_btn, &QPushButton::clicked, this, &MainWindow::cue_in_clicked);
+
+ QShortcut *cue_out = new QShortcut(QKeySequence(Qt::Key_S), this);
+ connect(cue_out, &QShortcut::activated, ui->cue_out_btn, &QPushButton::click);
+ connect(ui->cue_out_btn, &QPushButton::clicked, this, &MainWindow::cue_out_clicked);
+
+ QShortcut *queue = new QShortcut(QKeySequence(Qt::Key_Q), this);
+ connect(queue, &QShortcut::activated, ui->queue_btn, &QPushButton::click);
+ connect(ui->queue_btn, &QPushButton::clicked, this, &MainWindow::queue_clicked);
+
+ QShortcut *preview = new QShortcut(QKeySequence(Qt::Key_W), this);
+ connect(preview, &QShortcut::activated, ui->preview_btn, &QPushButton::click);
+ connect(ui->preview_btn, &QPushButton::clicked, this, &MainWindow::preview_clicked);
+
+ QShortcut *play = new QShortcut(QKeySequence(Qt::Key_Space), this);
+ connect(play, &QShortcut::activated, ui->play_btn, &QPushButton::click);
+ connect(ui->play_btn, &QPushButton::clicked, this, &MainWindow::play_clicked);
+
+ QShortcut *preview_1 = new QShortcut(QKeySequence(Qt::Key_1), this);
+ connect(preview_1, &QShortcut::activated, ui->preview_1_btn, &QPushButton::click);
+ connect(ui->input1_display, &JPEGFrameView::clicked, ui->preview_1_btn, &QPushButton::click);
+ connect(ui->preview_1_btn, &QPushButton::clicked, [this]{ preview_angle_clicked(0); });
+ ui->input1_display->set_overlay("1");
+
+ QShortcut *preview_2 = new QShortcut(QKeySequence(Qt::Key_2), this);
+ connect(preview_2, &QShortcut::activated, ui->preview_2_btn, &QPushButton::click);
+ connect(ui->input2_display, &JPEGFrameView::clicked, ui->preview_2_btn, &QPushButton::click);
+ connect(ui->preview_2_btn, &QPushButton::clicked, [this]{ preview_angle_clicked(1); });
+ ui->input2_display->set_overlay("2");
+
+ QShortcut *preview_3 = new QShortcut(QKeySequence(Qt::Key_3), this);
+ connect(preview_3, &QShortcut::activated, ui->preview_3_btn, &QPushButton::click);
+ connect(ui->input3_display, &JPEGFrameView::clicked, ui->preview_3_btn, &QPushButton::click);
+ connect(ui->preview_3_btn, &QPushButton::clicked, [this]{ preview_angle_clicked(2); });
+ ui->input3_display->set_overlay("3");
+
+ QShortcut *preview_4 = new QShortcut(QKeySequence(Qt::Key_4), this);
+ connect(preview_4, &QShortcut::activated, ui->preview_4_btn, &QPushButton::click);
+ connect(ui->input4_display, &JPEGFrameView::clicked, ui->preview_4_btn, &QPushButton::click);
+ connect(ui->preview_4_btn, &QPushButton::clicked, [this]{ preview_angle_clicked(3); });
+ ui->input4_display->set_overlay("4");
+
+ connect(ui->playlist_duplicate_btn, &QPushButton::clicked, this, &MainWindow::playlist_duplicate);
+
+ connect(ui->playlist_remove_btn, &QPushButton::clicked, this, &MainWindow::playlist_remove);
+ QShortcut *delete_key = new QShortcut(QKeySequence(Qt::Key_Delete), ui->playlist);
+ connect(delete_key, &QShortcut::activated, [this] {
+ if (ui->playlist->hasFocus()) {
+ playlist_remove();
+ }
+ });
+
+ // TODO: support drag-and-drop.
+ connect(ui->playlist_move_up_btn, &QPushButton::clicked, [this]{ playlist_move(-1); });
+ connect(ui->playlist_move_down_btn, &QPushButton::clicked, [this]{ playlist_move(1); });
+
+ connect(ui->playlist->selectionModel(), &QItemSelectionModel::selectionChanged,
+ this, &MainWindow::playlist_selection_changed);
+ playlist_selection_changed(); // First time set-up.
+
+ preview_player = new Player(ui->preview_display, /*also_output_to_stream=*/false);
+ live_player = new Player(ui->live_display, /*also_output_to_stream=*/true);
+ live_player->set_done_callback([this]{
+ post_to_main_thread([this]{
+ live_player_clip_done();
+ });
+ });
+ live_player->set_next_clip_callback(bind(&MainWindow::live_player_get_next_clip, this));
+ live_player->set_progress_callback([this](const map<size_t, double> &progress) {
+ post_to_main_thread([this, progress] {
+ live_player_clip_progress(progress);
+ });
+ });
+ set_output_status("paused");
+
+ defer_timeout = new QTimer(this);
+ defer_timeout->setSingleShot(true);
+ connect(defer_timeout, &QTimer::timeout, this, &MainWindow::defer_timer_expired);
+
+ connect(ui->clip_list->selectionModel(), &QItemSelectionModel::currentChanged,
+ this, &MainWindow::clip_list_selection_changed);
+}
+
+void MainWindow::cue_in_clicked()
+{
+ if (!cliplist_clips->empty() && cliplist_clips->back()->pts_out < 0) {
+ cliplist_clips->mutable_back()->pts_in = current_pts;
+ return;
+ }
+ Clip clip;
+ clip.pts_in = current_pts;
+ cliplist_clips->add_clip(clip);
+ playlist_selection_changed();
+}
+
+void MainWindow::cue_out_clicked()
+{
+ if (!cliplist_clips->empty()) {
+ cliplist_clips->mutable_back()->pts_out = current_pts;
+ // TODO: select the row in the clip list?
+ }
+}
+
+void MainWindow::queue_clicked()
+{
+ if (cliplist_clips->empty()) {
+ return;
+ }
+
+ QItemSelectionModel *selected = ui->clip_list->selectionModel();
+ if (!selected->hasSelection()) {
+ Clip clip = *cliplist_clips->back();
+ clip.stream_idx = 0;
+ if (clip.pts_out != -1) {
+ playlist_clips->add_clip(clip);
+ playlist_selection_changed();
+ }
+ return;
+ }
+
+ QModelIndex index = selected->currentIndex();
+ Clip clip = *cliplist_clips->clip(index.row());
+ if (index.column() >= int(ClipList::Column::CAMERA_1) &&
+ index.column() <= int(ClipList::Column::CAMERA_4)) {
+ clip.stream_idx = index.column() - int(ClipList::Column::CAMERA_1);
+ } else {
+ clip.stream_idx = ui->preview_display->get_stream_idx();
+ }
+
+ if (clip.pts_out != -1) {
+ playlist_clips->add_clip(clip);
+ playlist_selection_changed();
+ }
+}
+
+void MainWindow::preview_clicked()
+{
+ if (ui->playlist->hasFocus()) {
+ // Allow the playlist as preview iff it has focus and something is selected.
+ QItemSelectionModel *selected = ui->playlist->selectionModel();
+ if (selected->hasSelection()) {
+ QModelIndex index = selected->currentIndex();
+ const Clip &clip = *playlist_clips->clip(index.row());
+ preview_player->play_clip(clip, index.row(), clip.stream_idx);
+ return;
+ }
+ }
+
+ if (cliplist_clips->empty())
+ return;
+
+ QItemSelectionModel *selected = ui->clip_list->selectionModel();
+ if (!selected->hasSelection()) {
+ preview_player->play_clip(*cliplist_clips->back(), cliplist_clips->size() - 1, 0);
+ return;
+ }
+
+ QModelIndex index = selected->currentIndex();
+ unsigned stream_idx;
+ if (index.column() >= int(ClipList::Column::CAMERA_1) &&
+ index.column() <= int(ClipList::Column::CAMERA_4)) {
+ stream_idx = index.column() - int(ClipList::Column::CAMERA_1);
+ } else {
+ stream_idx = ui->preview_display->get_stream_idx();
+ }
+ preview_player->play_clip(*cliplist_clips->clip(index.row()), index.row(), stream_idx);
+}
+
+void MainWindow::preview_angle_clicked(unsigned stream_idx)
+{
+ preview_player->override_angle(stream_idx);
+
+ // Change the selection if we were previewing a clip from the clip list.
+ // (The only other thing we could be showing is a pts scrub, and if so,
+ // that would be selected.)
+ QItemSelectionModel *selected = ui->clip_list->selectionModel();
+ if (selected->hasSelection()) {
+ QModelIndex cell = selected->selectedIndexes()[0];
+ int column = int(ClipList::Column::CAMERA_1) + stream_idx;
+ selected->setCurrentIndex(cell.sibling(cell.row(), column), QItemSelectionModel::ClearAndSelect);
+ }
+}
+
+void MainWindow::playlist_duplicate()
+{
+ QItemSelectionModel *selected = ui->playlist->selectionModel();
+ if (!selected->hasSelection()) {
+ // Should have been grayed out, but OK.
+ return;
+ }
+ QModelIndexList rows = selected->selectedRows();
+ int first = rows.front().row(), last = rows.back().row();
+ playlist_clips->duplicate_clips(first, last);
+ playlist_selection_changed();
+}
+
+void MainWindow::playlist_remove()
+{
+ QItemSelectionModel *selected = ui->playlist->selectionModel();
+ if (!selected->hasSelection()) {
+ // Should have been grayed out, but OK.
+ return;
+ }
+ QModelIndexList rows = selected->selectedRows();
+ int first = rows.front().row(), last = rows.back().row();
+ playlist_clips->erase_clips(first, last);
+
+ // TODO: select the next one in the list?
+
+ playlist_selection_changed();
+}
+
+void MainWindow::playlist_move(int delta)
+{
+ QItemSelectionModel *selected = ui->playlist->selectionModel();
+ if (!selected->hasSelection()) {
+ // Should have been grayed out, but OK.
+ return;
+ }
+
+ QModelIndexList rows = selected->selectedRows();
+ int first = rows.front().row(), last = rows.back().row();
+ if ((delta == -1 && first == 0) ||
+ (delta == 1 && size_t(last) == playlist_clips->size() - 1)) {
+ // Should have been grayed out, but OK.
+ return;
+ }
+
+ playlist_clips->move_clips(first, last, delta);
+ playlist_selection_changed();
+}
+
+void MainWindow::defer_timer_expired()
+{
+ state_changed(deferred_state);
+}
+
+void MainWindow::content_changed()
+{
+ if (defer_timeout->isActive() &&
+ (!currently_deferring_model_changes || deferred_change_id != current_change_id)) {
+ // There's some deferred event waiting, but this event is unrelated.
+ // So it's time to short-circuit that timer and do the work it wanted to do.
+ defer_timeout->stop();
+ state_changed(deferred_state);
+ }
+ StateProto state;
+ *state.mutable_clip_list() = cliplist_clips->serialize();
+ *state.mutable_play_list() = playlist_clips->serialize();
+ if (currently_deferring_model_changes) {
+ deferred_change_id = current_change_id;
+ deferred_state = std::move(state);
+ defer_timeout->start(200);
+ return;
+ }
+ state_changed(state);
+}
+
+void MainWindow::state_changed(const StateProto &state)
+{
+ db.store_state(state);
+}
+
+void MainWindow::play_clicked()
+{
+ if (playlist_clips->empty())
+ return;
+
+ QItemSelectionModel *selected = ui->playlist->selectionModel();
+ int row;
+ if (!selected->hasSelection()) {
+ row = 0;
+ } else {
+ row = selected->selectedRows(0)[0].row();
+ }
+
+ const Clip &clip = *playlist_clips->clip(row);
+ live_player->play_clip(clip, row, clip.stream_idx);
+ playlist_clips->set_progress({{ row, 0.0f }});
+ playlist_clips->set_currently_playing(row, 0.0f);
+ playlist_selection_changed();
+}
+
+void MainWindow::live_player_clip_done()
+{
+ int row = playlist_clips->get_currently_playing();
+ if (row == -1 || row == int(playlist_clips->size()) - 1) {
+ set_output_status("paused");
+ playlist_clips->set_progress({});
+ playlist_clips->set_currently_playing(-1, 0.0f);
+ } else {
+ playlist_clips->set_progress({{ row + 1, 0.0f }});
+ playlist_clips->set_currently_playing(row + 1, 0.0f);
+ }
+}
+
+pair<Clip, size_t> MainWindow::live_player_get_next_clip()
+{
+ // playlist_clips can only be accessed on the main thread.
+ // Hopefully, we won't have to wait too long for this to come back.
+ promise<pair<Clip, size_t>> clip_promise;
+ future<pair<Clip, size_t>> clip = clip_promise.get_future();
+ post_to_main_thread([this, &clip_promise] {
+ int row = playlist_clips->get_currently_playing();
+ if (row != -1 && row < int(playlist_clips->size()) - 1) {
+ clip_promise.set_value(make_pair(*playlist_clips->clip(row + 1), row + 1));
+ } else {
+ clip_promise.set_value(make_pair(Clip(), 0));
+ }
+ });
+ return clip.get();
+}
+
+static string format_duration(double t)
+{
+ int t_ms = lrint(t * 1e3);
+
+ int ms = t_ms % 1000;
+ t_ms /= 1000;
+ int s = t_ms % 60;
+ t_ms /= 60;
+ int m = t_ms;
+
+ char buf[256];
+ snprintf(buf, sizeof(buf), "%d:%02d.%03d", m, s, ms);
+ return buf;
+}
+
+void MainWindow::live_player_clip_progress(const map<size_t, double> &progress)
+{
+ playlist_clips->set_progress(progress);
+
+ // Look at the last clip and then start counting from there.
+ assert(!progress.empty());
+ auto last_it = progress.end();
+ --last_it;
+ double remaining = 0.0;
+ double last_fade_time_seconds = 0.0;
+ for (size_t row = last_it->first; row < playlist_clips->size(); ++row) {
+ const Clip clip = *playlist_clips->clip(row);
+ double clip_length = double(clip.pts_out - clip.pts_in) / TIMEBASE / 0.5; // FIXME: stop hardcoding speed.
+ if (row == last_it->first) {
+ // A clip we're playing: Subtract the part we've already played.
+ remaining = clip_length * (1.0 - last_it->second);
+ } else {
+ // A clip we haven't played yet: Subtract the part that's overlapping
+ // with a previous clip (due to fade).
+ remaining += max(clip_length - last_fade_time_seconds, 0.0);
+ }
+ last_fade_time_seconds = min(clip_length, clip.fade_time_seconds);
+ }
+ set_output_status(format_duration(remaining) + " left");
+}
+
+void MainWindow::resizeEvent(QResizeEvent *event)
+{
+ QMainWindow::resizeEvent(event);
+
+ // Ask for a relayout, but only after the event loop is done doing relayout
+ // on everything else.
+ QMetaObject::invokeMethod(this, "relayout", Qt::QueuedConnection);
+}
+
+void MainWindow::relayout()
+{
+ ui->live_display->setMinimumWidth(ui->live_display->height() * 16 / 9);
+ ui->preview_display->setMinimumWidth(ui->preview_display->height() * 16 / 9);
+}
+
+void set_pts_in(int64_t pts, int64_t current_pts, ClipProxy &clip)
+{
+ pts = std::max<int64_t>(pts, 0);
+ if (clip->pts_out == -1) {
+ pts = std::min(pts, current_pts);
+ } else {
+ pts = std::min(pts, clip->pts_out);
+ }
+ clip->pts_in = pts;
+}
+
+bool MainWindow::eventFilter(QObject *watched, QEvent *event)
+{
+ constexpr int dead_zone_pixels = 3; // To avoid that simple clicks get misinterpreted.
+ constexpr int scrub_sensitivity = 100; // pts units per pixel.
+ constexpr int wheel_sensitivity = 100; // pts units per degree.
+ constexpr int camera_degrees_per_pixel = 15; // One click of most mice.
+
+ unsigned stream_idx = ui->preview_display->get_stream_idx();
+
+ if (watched == ui->clip_list) {
+ if (event->type() == QEvent::FocusOut) {
+ highlight_camera_input(-1);
+ }
+ return false;
+ }
+
+ if (event->type() != QEvent::Wheel) {
+ last_mousewheel_camera_row = -1;
+ }
+
+ if (event->type() == QEvent::MouseButtonPress) {
+ QMouseEvent *mouse = (QMouseEvent *)event;
+
+ QTableView *destination;
+ ScrubType type;
+
+ if (watched == ui->clip_list->viewport()) {
+ destination = ui->clip_list;
+ type = SCRUBBING_CLIP_LIST;
+ } else if (watched == ui->playlist->viewport()) {
+ destination = ui->playlist;
+ type = SCRUBBING_PLAYLIST;
+ } else {
+ return false;
+ }
+ int column = destination->columnAt(mouse->x());
+ int row = destination->rowAt(mouse->y());
+ if (column == -1 || row == -1)
+ return false;
+
+ if (type == SCRUBBING_CLIP_LIST) {
+ if (ClipList::Column(column) == ClipList::Column::IN) {
+ scrub_pts_origin = cliplist_clips->clip(row)->pts_in;
+ preview_single_frame(scrub_pts_origin, stream_idx, FIRST_AT_OR_AFTER);
+ } else if (ClipList::Column(column) == ClipList::Column::OUT) {
+ scrub_pts_origin = cliplist_clips->clip(row)->pts_out;
+ preview_single_frame(scrub_pts_origin, stream_idx, LAST_BEFORE);
+ } else {
+ return false;
+ }
+ } else {
+ if (PlayList::Column(column) == PlayList::Column::IN) {
+ scrub_pts_origin = playlist_clips->clip(row)->pts_in;
+ preview_single_frame(scrub_pts_origin, stream_idx, FIRST_AT_OR_AFTER);
+ } else if (PlayList::Column(column) == PlayList::Column::OUT) {
+ scrub_pts_origin = playlist_clips->clip(row)->pts_out;
+ preview_single_frame(scrub_pts_origin, stream_idx, LAST_BEFORE);
+ } else {
+ return false;
+ }
+ }
+
+ scrubbing = true;
+ scrub_row = row;
+ scrub_column = column;
+ scrub_x_origin = mouse->x();
+ scrub_type = type;
+ } else if (event->type() == QEvent::MouseMove) {
+ if (scrubbing) {
+ QMouseEvent *mouse = (QMouseEvent *)event;
+ int offset = mouse->x() - scrub_x_origin;
+ int adjusted_offset;
+ if (offset >= dead_zone_pixels) {
+ adjusted_offset = offset - dead_zone_pixels;
+ } else if (offset < -dead_zone_pixels) {
+ adjusted_offset = offset + dead_zone_pixels;
+ } else {
+ adjusted_offset = 0;
+ }
+
+ int64_t pts = scrub_pts_origin + adjusted_offset * scrub_sensitivity;
+ currently_deferring_model_changes = true;
+ if (scrub_type == SCRUBBING_CLIP_LIST) {
+ ClipProxy clip = cliplist_clips->mutable_clip(scrub_row);
+ if (scrub_column == int(ClipList::Column::IN)) {
+ current_change_id = "cliplist:in:" + to_string(scrub_row);
+ set_pts_in(pts, current_pts, clip);
+ preview_single_frame(pts, stream_idx, FIRST_AT_OR_AFTER);
+ } else {
+ current_change_id = "cliplist:out" + to_string(scrub_row);
+ pts = std::max(pts, clip->pts_in);
+ pts = std::min(pts, current_pts);
+ clip->pts_out = pts;
+ preview_single_frame(pts, stream_idx, LAST_BEFORE);
+ }
+ } else {
+ ClipProxy clip = playlist_clips->mutable_clip(scrub_row);
+ if (scrub_column == int(PlayList::Column::IN)) {
+ current_change_id = "playlist:in:" + to_string(scrub_row);
+ set_pts_in(pts, current_pts, clip);
+ preview_single_frame(pts, clip->stream_idx, FIRST_AT_OR_AFTER);
+ } else {
+ current_change_id = "playlist:out:" + to_string(scrub_row);
+ pts = std::max(pts, clip->pts_in);
+ pts = std::min(pts, current_pts);
+ clip->pts_out = pts;
+ preview_single_frame(pts, clip->stream_idx, LAST_BEFORE);
+ }
+ }
+ currently_deferring_model_changes = false;
+
+ return true; // Don't use this mouse movement for selecting things.
+ }
+ } else if (event->type() == QEvent::Wheel) {
+ QWheelEvent *wheel = (QWheelEvent *)event;
+
+ QTableView *destination;
+ int in_column, out_column, camera_column;
+ if (watched == ui->clip_list->viewport()) {
+ destination = ui->clip_list;
+ in_column = int(ClipList::Column::IN);
+ out_column = int(ClipList::Column::OUT);
+ camera_column = -1;
+ last_mousewheel_camera_row = -1;
+ } else if (watched == ui->playlist->viewport()) {
+ destination = ui->playlist;
+ in_column = int(PlayList::Column::IN);
+ out_column = int(PlayList::Column::OUT);
+ camera_column = int(PlayList::Column::CAMERA);
+ } else {
+ last_mousewheel_camera_row = -1;
+ return false;
+ }
+ int column = destination->columnAt(wheel->x());
+ int row = destination->rowAt(wheel->y());
+ if (column == -1 || row == -1) return false;
+
+ // Only adjust pts with the wheel if the given row is selected.
+ if (!destination->hasFocus() ||
+ row != destination->selectionModel()->currentIndex().row()) {
+ return false;
+ }
+
+ currently_deferring_model_changes = true;
+ {
+ current_change_id = (watched == ui->clip_list->viewport()) ? "cliplist:" : "playlist:";
+ ClipProxy clip = (watched == ui->clip_list->viewport()) ?
+ cliplist_clips->mutable_clip(row) : playlist_clips->mutable_clip(row);
+ if (watched == ui->playlist->viewport()) {
+ stream_idx = clip->stream_idx;
+ }
+
+ if (column != camera_column) {
+ last_mousewheel_camera_row = -1;
+ }
+ if (column == in_column) {
+ current_change_id += "in:" + to_string(row);
+ int64_t pts = clip->pts_in + wheel->angleDelta().y() * wheel_sensitivity;
+ set_pts_in(pts, current_pts, clip);
+ preview_single_frame(pts, stream_idx, FIRST_AT_OR_AFTER);
+ } else if (column == out_column) {
+ current_change_id += "out:" + to_string(row);
+ int64_t pts = clip->pts_out + wheel->angleDelta().y() * wheel_sensitivity;
+ pts = std::max(pts, clip->pts_in);
+ pts = std::min(pts, current_pts);
+ clip->pts_out = pts;
+ preview_single_frame(pts, stream_idx, LAST_BEFORE);
+ } else if (column == camera_column) {
+ current_change_id += "camera:" + to_string(row);
+ int angle_degrees = wheel->angleDelta().y();
+ if (last_mousewheel_camera_row == row) {
+ angle_degrees += leftover_angle_degrees;
+ }
+
+ int stream_idx = clip->stream_idx + angle_degrees / camera_degrees_per_pixel;
+ stream_idx = std::max(stream_idx, 0);
+ stream_idx = std::min(stream_idx, NUM_CAMERAS - 1);
+ clip->stream_idx = stream_idx;
+
+ last_mousewheel_camera_row = row;
+ leftover_angle_degrees = angle_degrees % camera_degrees_per_pixel;
+
+ // Don't update the live view, that's rarely what the operator wants.
+ }
+ }
+ currently_deferring_model_changes = false;
+ return true; // Don't scroll.
+ } else if (event->type() == QEvent::MouseButtonRelease) {
+ scrubbing = false;
+ }
+ return false;
+}
+
+void MainWindow::preview_single_frame(int64_t pts, unsigned stream_idx, MainWindow::Rounding rounding)
+{
+ if (rounding == LAST_BEFORE) {
+ lock_guard<mutex> lock(frame_mu);
+ if (frames[stream_idx].empty())
+ return;
+ auto it = lower_bound(frames[stream_idx].begin(), frames[stream_idx].end(), pts,
+ [](const FrameOnDisk &frame, int64_t pts) { return frame.pts < pts; });
+ if (it != frames[stream_idx].end()) {
+ pts = it->pts;
+ }
+ } else {
+ assert(rounding == FIRST_AT_OR_AFTER);
+ lock_guard<mutex> lock(frame_mu);
+ if (frames[stream_idx].empty())
+ return;
+ auto it = upper_bound(frames[stream_idx].begin(), frames[stream_idx].end(), pts - 1,
+ [](int64_t pts, const FrameOnDisk &frame) { return pts < frame.pts; });
+ if (it != frames[stream_idx].end()) {
+ pts = it->pts;
+ }
+ }
+
+ Clip fake_clip;
+ fake_clip.pts_in = pts;
+ fake_clip.pts_out = pts + 1;
+ preview_player->play_clip(fake_clip, 0, stream_idx);
+}
+
+void MainWindow::playlist_selection_changed()
+{
+ QItemSelectionModel *selected = ui->playlist->selectionModel();
+ bool any_selected = selected->hasSelection();
+ ui->playlist_duplicate_btn->setEnabled(any_selected);
+ ui->playlist_remove_btn->setEnabled(any_selected);
+ ui->playlist_move_up_btn->setEnabled(
+ any_selected && selected->selectedRows().front().row() > 0);
+ ui->playlist_move_down_btn->setEnabled(
+ any_selected && selected->selectedRows().back().row() < int(playlist_clips->size()) - 1);
+ ui->play_btn->setEnabled(!playlist_clips->empty());
+
+ if (!any_selected) {
+ set_output_status("paused");
+ } else {
+ double remaining = 0.0;
+ for (int row = selected->selectedRows().front().row(); row < int(playlist_clips->size()); ++row) {
+ const Clip clip = *playlist_clips->clip(row);
+ remaining += double(clip.pts_out - clip.pts_in) / TIMEBASE / 0.5; // FIXME: stop hardcoding speed.
+ }
+ set_output_status(format_duration(remaining) + " ready");
+ }
+}
+
+void MainWindow::clip_list_selection_changed(const QModelIndex ¤t, const QModelIndex &)
+{
+ int camera_selected = -1;
+ if (current.column() >= int(ClipList::Column::CAMERA_1) &&
+ current.column() <= int(ClipList::Column::CAMERA_4)) {
+ camera_selected = current.column() - int(ClipList::Column::CAMERA_1);
+ }
+ highlight_camera_input(camera_selected);
+}
+
+void MainWindow::report_disk_space(off_t free_bytes, double estimated_seconds_left)
+{
+ char time_str[256];
+ if (estimated_seconds_left < 60.0) {
+ strcpy(time_str, "<font color=\"red\">Less than a minute</font>");
+ } else if (estimated_seconds_left < 1800.0) { // Less than half an hour: Xm Ys (red).
+ int s = lrintf(estimated_seconds_left);
+ int m = s / 60;
+ s %= 60;
+ snprintf(time_str, sizeof(time_str), "<font color=\"red\">%dm %ds</font>", m, s);
+ } else if (estimated_seconds_left < 3600.0) { // Less than an hour: Xm.
+ int m = lrintf(estimated_seconds_left / 60.0);
+ snprintf(time_str, sizeof(time_str), "%dm", m);
+ } else if (estimated_seconds_left < 36000.0) { // Less than ten hours: Xh Ym.
+ int m = lrintf(estimated_seconds_left / 60.0);
+ int h = m / 60;
+ m %= 60;
+ snprintf(time_str, sizeof(time_str), "%dh %dm", h, m);
+ } else { // More than ten hours: Xh.
+ int h = lrintf(estimated_seconds_left / 3600.0);
+ snprintf(time_str, sizeof(time_str), "%dh", h);
+ }
+ char buf[256];
+ snprintf(buf, sizeof(buf), "Disk free: %'.0f MB (approx. %s)", free_bytes / 1048576.0, time_str);
+
+ std::string label = buf;
+
+ post_to_main_thread([this, label] {
+ disk_free_label->setText(QString::fromStdString(label));
+ ui->menuBar->setCornerWidget(disk_free_label); // Need to set this again for the sizing to get right.
+ });
+}
+
+void MainWindow::exit_triggered()
+{
+ close();
+}
+
+void MainWindow::highlight_camera_input(int stream_idx)
+{
+ if (stream_idx == 0) {
+ ui->input1_frame->setStyleSheet("background: rgb(0,255,0)");
+ } else {
+ ui->input1_frame->setStyleSheet("");
+ }
+ if (stream_idx == 1) {
+ ui->input2_frame->setStyleSheet("background: rgb(0,255,0)");
+ } else {
+ ui->input2_frame->setStyleSheet("");
+ }
+ if (stream_idx == 2) {
+ ui->input3_frame->setStyleSheet("background: rgb(0,255,0)");
+ } else {
+ ui->input3_frame->setStyleSheet("");
+ }
+ if (stream_idx == 3) {
+ ui->input4_frame->setStyleSheet("background: rgb(0,255,0)");
+ } else {
+ ui->input4_frame->setStyleSheet("");
+ }
+}
+
+void MainWindow::set_output_status(const string &status)
+{
+ ui->live_label->setText(QString::fromStdString("Current output (" + status + ")"));
+
+ lock_guard<mutex> lock(queue_status_mu);
+ queue_status = status;
+}
+
+pair<string, string> MainWindow::get_queue_status() const {
+ lock_guard<mutex> lock(queue_status_mu);
+ return {queue_status, "text/plain"};
+}
--- /dev/null
+#ifndef MAINWINDOW_H
+#define MAINWINDOW_H
+
+#include "clip_list.h"
+#include "db.h"
+#include "state.pb.h"
+
+#include <mutex>
+#include <QLabel>
+#include <QMainWindow>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <string>
+#include <utility>
+
+namespace Ui {
+class MainWindow;
+} // namespace Ui
+
+class Player;
+
+class MainWindow : public QMainWindow {
+ Q_OBJECT
+
+public:
+ MainWindow();
+
+ // HTTP callback. TODO: Does perhaps not belong to MainWindow?
+ std::pair<std::string, std::string> get_queue_status() const;
+
+//private:
+ Ui::MainWindow *ui;
+
+private:
+ QLabel *disk_free_label;
+ Player *preview_player, *live_player;
+ DB db;
+
+ // State when doing a scrub operation on a timestamp with the mouse.
+ bool scrubbing = false;
+ int scrub_x_origin; // In pixels on the viewport.
+ int64_t scrub_pts_origin;
+
+ // Which element (e.g. pts_in on clip 4) we are scrubbing.
+ enum ScrubType { SCRUBBING_CLIP_LIST, SCRUBBING_PLAYLIST } scrub_type;
+ int scrub_row;
+ int scrub_column;
+
+ // Used to keep track of small mouse wheel motions on the camera index in the playlist.
+ int last_mousewheel_camera_row = -1;
+ int leftover_angle_degrees = 0;
+
+ // Some operations, notably scrubbing and scrolling, happen in so large increments
+ // that we want to group them instead of saving to disk every single time.
+ // If they happen (ie., we get a callback from the model that it's changed) while
+ // currently_deferring_model_changes, we fire off this timer. If it manages to elapse
+ // before some other event happens, we count the event. (If the other event is of the
+ // same kind, we just fire off the timer anew instead of taking any action.)
+ QTimer *defer_timeout;
+ std::string deferred_change_id;
+ StateProto deferred_state;
+
+ // Before a change that should be deferred (see above), currently_deferring_model_changes
+ // must be set to true, and current_change_id must be given contents describing what's
+ // changed to avoid accidental grouping.
+ bool currently_deferring_model_changes = false;
+ std::string current_change_id;
+
+ mutable std::mutex queue_status_mu;
+ std::string queue_status; // Under queue_status_mu.
+
+ void cue_in_clicked();
+ void cue_out_clicked();
+ void queue_clicked();
+ void preview_clicked();
+ void preview_angle_clicked(unsigned stream_idx);
+ void play_clicked();
+ void live_player_clip_done();
+ std::pair<Clip, size_t> live_player_get_next_clip();
+ void live_player_clip_progress(const std::map<size_t, double> &progress);
+ void set_output_status(const std::string &status);
+ void playlist_duplicate();
+ void playlist_remove();
+ void playlist_move(int delta);
+
+ void defer_timer_expired();
+ void content_changed(); // In clip_list or play_list.
+ void state_changed(const StateProto &state); // Called post-filtering.
+
+ enum Rounding { FIRST_AT_OR_AFTER, LAST_BEFORE };
+ void preview_single_frame(int64_t pts, unsigned stream_idx, Rounding rounding);
+
+ // Also covers when the playlist itself changes.
+ void playlist_selection_changed();
+
+ void clip_list_selection_changed(const QModelIndex ¤t, const QModelIndex &previous);
+
+ void resizeEvent(QResizeEvent *event) override;
+ bool eventFilter(QObject *watched, QEvent *event) override;
+
+ void report_disk_space(off_t free_bytes, double estimated_seconds_left);
+ void exit_triggered();
+
+ void highlight_camera_input(int stream_idx);
+
+private slots:
+ void relayout();
+};
+
+extern MainWindow *global_mainwindow;
+
+#endif
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>MainWindow</class>
+ <widget class="QMainWindow" name="MainWindow">
+ <property name="geometry">
+ <rect>
+ <x>0</x>
+ <y>0</y>
+ <width>1038</width>
+ <height>600</height>
+ </rect>
+ </property>
+ <property name="windowTitle">
+ <string>Futatabi</string>
+ </property>
+ <widget class="QWidget" name="centralwidget">
+ <layout class="QGridLayout" name="gridLayout">
+ <item row="0" column="0">
+ <widget class="QSplitter" name="splitter">
+ <property name="orientation">
+ <enum>Qt::Horizontal</enum>
+ </property>
+ <widget class="QWidget" name="horizontalLayoutWidget">
+ <layout class="QVBoxLayout" name="clip_and_play_lists">
+ <item>
+ <widget class="QTableView" name="clip_list"/>
+ </item>
+ <item>
+ <layout class="QHBoxLayout" name="clip_list_buttons">
+ <item>
+ <widget class="QPushButton" name="queue_btn">
+ <property name="text">
+ <string>Queue (&Q)</string>
+ </property>
+ <property name="icon">
+ <iconset theme="list-add">
+ <normaloff>.</normaloff>.</iconset>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QPushButton" name="preview_btn">
+ <property name="text">
+ <string>Preview (&W)</string>
+ </property>
+ <property name="icon">
+ <iconset theme="media-playback-start">
+ <normaloff>.</normaloff>.</iconset>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QPushButton" name="cue_in_btn">
+ <property name="text">
+ <string>Cue in (&A)</string>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QPushButton" name="cue_out_btn">
+ <property name="text">
+ <string>Cue out (&S)</string>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <spacer name="horizontalSpacer_2">
+ <property name="orientation">
+ <enum>Qt::Horizontal</enum>
+ </property>
+ <property name="sizeHint" stdset="0">
+ <size>
+ <width>40</width>
+ <height>20</height>
+ </size>
+ </property>
+ </spacer>
+ </item>
+ </layout>
+ </item>
+ <item>
+ <widget class="QTableView" name="playlist">
+ <property name="selectionMode">
+ <enum>QAbstractItemView::ContiguousSelection</enum>
+ </property>
+ <property name="selectionBehavior">
+ <enum>QAbstractItemView::SelectRows</enum>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <layout class="QHBoxLayout" name="playlist_buttons">
+ <item>
+ <widget class="QPushButton" name="playlist_duplicate_btn">
+ <property name="text">
+ <string>Duplicate</string>
+ </property>
+ <property name="icon">
+ <iconset theme="list-add">
+ <normaloff>.</normaloff>.</iconset>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QPushButton" name="playlist_remove_btn">
+ <property name="sizePolicy">
+ <sizepolicy hsizetype="Fixed" vsizetype="Fixed">
+ <horstretch>0</horstretch>
+ <verstretch>0</verstretch>
+ </sizepolicy>
+ </property>
+ <property name="text">
+ <string>Remove</string>
+ </property>
+ <property name="icon">
+ <iconset theme="list-remove">
+ <normaloff>.</normaloff>.</iconset>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QPushButton" name="playlist_move_up_btn">
+ <property name="text">
+ <string>Move up</string>
+ </property>
+ <property name="icon">
+ <iconset theme="go-up">
+ <normaloff>.</normaloff>.</iconset>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QPushButton" name="playlist_move_down_btn">
+ <property name="text">
+ <string>Move down</string>
+ </property>
+ <property name="icon">
+ <iconset theme="go-down">
+ <normaloff>.</normaloff>.</iconset>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <spacer name="horizontalSpacer">
+ <property name="orientation">
+ <enum>Qt::Horizontal</enum>
+ </property>
+ <property name="sizeHint" stdset="0">
+ <size>
+ <width>40</width>
+ <height>20</height>
+ </size>
+ </property>
+ </spacer>
+ </item>
+ <item>
+ <widget class="QPushButton" name="play_btn">
+ <property name="text">
+ <string>Play (space)</string>
+ </property>
+ <property name="icon">
+ <iconset theme="media-playback-start">
+ <normaloff>.</normaloff>.</iconset>
+ </property>
+ </widget>
+ </item>
+ </layout>
+ </item>
+ </layout>
+ </widget>
+ <widget class="QWidget" name="verticalLayoutWidget_4">
+ <layout class="QVBoxLayout" name="video_displays" stretch="1,2">
+ <item>
+ <layout class="QHBoxLayout" name="preview_and_live_panes">
+ <item>
+ <layout class="QVBoxLayout" name="preview_pane" stretch="1,0">
+ <item>
+ <widget class="JPEGFrameView" name="preview_display" native="true"/>
+ </item>
+ <item>
+ <layout class="QHBoxLayout" name="horizontalLayout_3">
+ <property name="spacing">
+ <number>0</number>
+ </property>
+ <item>
+ <widget class="QLabel" name="label_2">
+ <property name="text">
+ <string>Preview output</string>
+ </property>
+ <property name="alignment">
+ <set>Qt::AlignCenter</set>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QPushButton" name="preview_1_btn">
+ <property name="sizePolicy">
+ <sizepolicy hsizetype="Maximum" vsizetype="Fixed">
+ <horstretch>0</horstretch>
+ <verstretch>0</verstretch>
+ </sizepolicy>
+ </property>
+ <property name="maximumSize">
+ <size>
+ <width>20</width>
+ <height>17</height>
+ </size>
+ </property>
+ <property name="text">
+ <string>1</string>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QPushButton" name="preview_2_btn">
+ <property name="sizePolicy">
+ <sizepolicy hsizetype="Maximum" vsizetype="Fixed">
+ <horstretch>0</horstretch>
+ <verstretch>0</verstretch>
+ </sizepolicy>
+ </property>
+ <property name="maximumSize">
+ <size>
+ <width>20</width>
+ <height>17</height>
+ </size>
+ </property>
+ <property name="text">
+ <string>2</string>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QPushButton" name="preview_3_btn">
+ <property name="maximumSize">
+ <size>
+ <width>20</width>
+ <height>17</height>
+ </size>
+ </property>
+ <property name="text">
+ <string>3</string>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QPushButton" name="preview_4_btn">
+ <property name="maximumSize">
+ <size>
+ <width>20</width>
+ <height>17</height>
+ </size>
+ </property>
+ <property name="text">
+ <string>4</string>
+ </property>
+ </widget>
+ </item>
+ </layout>
+ </item>
+ </layout>
+ </item>
+ <item>
+ <layout class="QVBoxLayout" name="live_pane" stretch="1,0">
+ <item>
+ <widget class="JPEGFrameView" name="live_display" native="true"/>
+ </item>
+ <item>
+ <widget class="QLabel" name="live_label">
+ <property name="text">
+ <string>Current output (paused)</string>
+ </property>
+ <property name="alignment">
+ <set>Qt::AlignCenter</set>
+ </property>
+ </widget>
+ </item>
+ </layout>
+ </item>
+ </layout>
+ </item>
+ <item>
+ <layout class="QVBoxLayout" name="input_pane" stretch="1,0">
+ <item>
+ <layout class="QGridLayout" name="input_displays">
+ <property name="spacing">
+ <number>0</number>
+ </property>
+ <item row="0" column="0">
+ <widget class="QFrame" name="input1_frame">
+ <property name="frameShape">
+ <enum>QFrame::Box</enum>
+ </property>
+ <property name="frameShadow">
+ <enum>QFrame::Plain</enum>
+ </property>
+ <property name="lineWidth">
+ <number>0</number>
+ </property>
+ <layout class="QGridLayout" name="gridLayout_2">
+ <property name="leftMargin">
+ <number>3</number>
+ </property>
+ <property name="topMargin">
+ <number>3</number>
+ </property>
+ <property name="rightMargin">
+ <number>3</number>
+ </property>
+ <property name="bottomMargin">
+ <number>3</number>
+ </property>
+ <item row="0" column="0">
+ <widget class="JPEGFrameView" name="input1_display" native="true">
+ <property name="autoFillBackground">
+ <bool>true</bool>
+ </property>
+ </widget>
+ </item>
+ </layout>
+ </widget>
+ </item>
+ <item row="1" column="0">
+ <widget class="QFrame" name="input3_frame">
+ <property name="frameShape">
+ <enum>QFrame::Box</enum>
+ </property>
+ <property name="frameShadow">
+ <enum>QFrame::Plain</enum>
+ </property>
+ <property name="lineWidth">
+ <number>0</number>
+ </property>
+ <layout class="QGridLayout" name="gridLayout_4">
+ <property name="leftMargin">
+ <number>3</number>
+ </property>
+ <property name="topMargin">
+ <number>3</number>
+ </property>
+ <property name="rightMargin">
+ <number>3</number>
+ </property>
+ <property name="bottomMargin">
+ <number>3</number>
+ </property>
+ <item row="0" column="0">
+ <widget class="JPEGFrameView" name="input3_display" native="true"/>
+ </item>
+ </layout>
+ </widget>
+ </item>
+ <item row="0" column="1">
+ <widget class="QFrame" name="input2_frame">
+ <property name="frameShape">
+ <enum>QFrame::Box</enum>
+ </property>
+ <property name="frameShadow">
+ <enum>QFrame::Plain</enum>
+ </property>
+ <property name="lineWidth">
+ <number>0</number>
+ </property>
+ <layout class="QGridLayout" name="gridLayout_3">
+ <property name="leftMargin">
+ <number>3</number>
+ </property>
+ <property name="topMargin">
+ <number>3</number>
+ </property>
+ <property name="rightMargin">
+ <number>3</number>
+ </property>
+ <property name="bottomMargin">
+ <number>3</number>
+ </property>
+ <item row="0" column="0">
+ <widget class="JPEGFrameView" name="input2_display" native="true">
+ <property name="autoFillBackground">
+ <bool>true</bool>
+ </property>
+ </widget>
+ </item>
+ </layout>
+ </widget>
+ </item>
+ <item row="1" column="1">
+ <widget class="QFrame" name="input4_frame">
+ <property name="autoFillBackground">
+ <bool>true</bool>
+ </property>
+ <property name="frameShape">
+ <enum>QFrame::Box</enum>
+ </property>
+ <property name="frameShadow">
+ <enum>QFrame::Plain</enum>
+ </property>
+ <property name="lineWidth">
+ <number>0</number>
+ </property>
+ <layout class="QGridLayout" name="gridLayout_5">
+ <property name="leftMargin">
+ <number>3</number>
+ </property>
+ <property name="topMargin">
+ <number>3</number>
+ </property>
+ <property name="rightMargin">
+ <number>3</number>
+ </property>
+ <property name="bottomMargin">
+ <number>3</number>
+ </property>
+ <item row="0" column="0">
+ <widget class="JPEGFrameView" name="input4_display" native="true"/>
+ </item>
+ </layout>
+ </widget>
+ </item>
+ </layout>
+ </item>
+ <item>
+ <widget class="QLabel" name="label">
+ <property name="text">
+ <string>Current inputs</string>
+ </property>
+ <property name="alignment">
+ <set>Qt::AlignCenter</set>
+ </property>
+ </widget>
+ </item>
+ </layout>
+ </item>
+ </layout>
+ </widget>
+ </widget>
+ </item>
+ </layout>
+ </widget>
+ <widget class="QMenuBar" name="menuBar">
+ <property name="geometry">
+ <rect>
+ <x>0</x>
+ <y>0</y>
+ <width>1038</width>
+ <height>22</height>
+ </rect>
+ </property>
+ <widget class="QMenu" name="menuFile">
+ <property name="title">
+ <string>&File</string>
+ </property>
+ <addaction name="exit_action"/>
+ </widget>
+ <addaction name="menuFile"/>
+ </widget>
+ <action name="exit_action">
+ <property name="text">
+ <string>E&xit</string>
+ </property>
+ </action>
+ </widget>
+ <customwidgets>
+ <customwidget>
+ <class>JPEGFrameView</class>
+ <extends>QWidget</extends>
+ <header>jpeg_frame_view.h</header>
+ </customwidget>
+ </customwidgets>
+ <resources/>
+ <connections/>
+</ui>
--- /dev/null
+youtube-dl 'https://www.youtube.com/watch?v=Wa2fBiCEzTc'
+FILE='MULTI ANGLE _ George Evans pops up with a 92nd-minute winner in Blackburn!-Wa2fBiCEzTc.mp4'
+ffmpeg -y -ss 0:03.290 -t 0:37 -i "$FILE" -c:v mjpeg -an angle1.mp4
+ffmpeg -y -ss 0:40 -t 0:40 -i "$FILE" -c:v mjpeg -an angle2.mp4
+ffmpeg -y -ss 1:12.880 -i "$FILE" -c:v mjpeg -an angle3.mp4
+ffmpeg -y -ss 0:07 -i ./angle3.mp4 -c:v copy -copyts -start_at_zero angle3-cut.mp4
+ffmpeg -y -copyts -i angle1.mp4 -i angle2.mp4 -i angle3-cut.mp4 -map 0:0 -map 1:0 -map 2:0 -c:v copy multiangle.mp4
+
--- /dev/null
+qt5 = import('qt5')
+protoc = find_program('protoc')
+
+epoxydep = dependency('epoxy')
+libavcodecdep = dependency('libavcodec')
+libavformatdep = dependency('libavformat')
+libavutildep = dependency('libavutil')
+libjpegdep = dependency('libjpeg')
+libswscaledep = dependency('libswscale')
+movitdep = dependency('movit')
+protobufdep = dependency('protobuf')
+qt5deps = dependency('qt5', modules: ['Core', 'Gui', 'Widgets', 'OpenGLExtensions', 'OpenGL', 'PrintSupport'])
+sdl2_imagedep = dependency('SDL2_image')
+sdl2dep = dependency('sdl2')
+sqlite3dep = dependency('sqlite3')
+vadrmdep = dependency('libva-drm')
+vax11dep = dependency('libva-x11')
+x11dep = dependency('x11')
+
+# Protobuf compilation.
+gen = generator(protoc, \
+ output : ['@BASENAME@.pb.cc', '@BASENAME@.pb.h'],
+ arguments : ['--proto_path=@CURRENT_SOURCE_DIR@', '--cpp_out=@BUILD_DIR@', '@INPUT@'])
+proto_generated = gen.process('state.proto', 'frame.proto')
+
+# Preprocess Qt as needed.
+moc_files = qt5.preprocess(
+ moc_headers: ['mainwindow.h', 'jpeg_frame_view.h', 'clip_list.h'],
+ ui_files: ['mainwindow.ui'],
+ dependencies: qt5deps)
+
+# Flow objects.
+srcs = ['flow.cpp', 'gpu_timers.cpp']
+
+# All the other files.
+srcs += ['main.cpp', 'player.cpp', 'video_stream.cpp', 'chroma_subsampler.cpp']
+srcs += ['vaapi_jpeg_decoder.cpp', 'db.cpp', 'ycbcr_converter.cpp', 'flags.cpp']
+srcs += ['mainwindow.cpp', 'jpeg_frame_view.cpp', 'clip_list.cpp', 'frame_on_disk.cpp']
+srcs += moc_files
+srcs += proto_generated
+
+# Shaders needed at runtime.
+shaders = ['chroma_subsample.vert', 'densify.vert', 'equations.vert', 'hole_fill.vert', 'motion_search.vert', 'sor.vert', 'splat.vert', 'vs.vert']
+shaders += ['add_base_flow.frag', 'blend.frag', 'chroma_subsample.frag', 'densify.frag', 'derivatives.frag', 'diffusivity.frag',
+ 'equations.frag', 'gray.frag', 'hole_blend.frag', 'hole_fill.frag', 'motion_search.frag', 'prewarp.frag', 'resize_flow.frag',
+ 'sobel.frag', 'sor.frag', 'splat.frag']
+
+foreach shader : shaders
+ run_command('ln', '-s', join_paths(meson.current_source_dir(), shader), meson.current_build_dir())
+endforeach
+
+shader_srcs = bin2h_gen.process(shaders)
+srcs += shader_srcs
+
+executable('futatabi', srcs, dependencies: [shareddep, qt5deps, libjpegdep, movitdep, libmicrohttpddep, protobufdep, sqlite3dep, vax11dep, vadrmdep, x11dep, libavformatdep, libavcodecdep, libavutildep, libswscaledep], link_with: shared)
+executable('flow', 'flow_main.cpp', 'flow.cpp', 'gpu_timers.cpp', shader_srcs, dependencies: [shareddep, epoxydep, sdl2dep, sdl2_imagedep])
+executable('eval', 'eval.cpp', 'util.cpp')
+executable('vis', 'vis.cpp', 'util.cpp')
--- /dev/null
+#version 450 core
+
+/*
+ The motion search is one of the two major components of DIS. It works more or less
+ like you'd expect; there's a bunch of overlapping patches (8x8 or 12x12 pixels) in
+ a grid, and for each patch, there's a search to try to find the most similar patch
+ in the other frame.
+
+ Unlike in a typical video codec, the DIS patch search is based on gradient descent;
+ conceptually, you start with an initial guess (the value from the previous level,
+ or the zero flow for the very first level), subtract the reference (“template”)
+ patch from the candidate, look at the gradient to see in what direction there is
+ a lower difference, and then inch a bit toward that direction. (There is seemingly
+ nothing like AdaM, Momentum or similar, but the searched value is only in two
+ dimensions, so perhaps it doesn't matter as much then.)
+
+ DIS does a tweak to this concept. Since the procedure as outlined above requires
+ computing the gradient of the candidate patch, it uses the reference patch as
+ candidate (thus the “inverse” name), and thus uses _its_ gradient to understand
+ in which direction to move. (This is a bit dodgy, but not _that_ dodgy; after
+ all, the two patches are supposed to be quite similar, so their surroundings and
+ thus also gradients should also be quite similar.) It's not entirely clear whether
+ this is still a win on GPU, where calculations are much cheaper, especially
+ the way we parallelize the search, but we've kept it around for now.
+
+ The inverse search is explained and derived in the supplementary material of the
+ paper, section A. Do note that there's a typo; the text under equation 9 claims
+ that the matrix H is n x n (where presumably n is the patch size), while in reality,
+ it's 2x2.
+
+ Our GPU parallellization is fairly dumb right now; we do one patch per fragment
+ (ie., parallellize only over patches, not within each patch), which may not
+ be optimal. In particular, in the initial level, we only have 40 patches,
+ which is on the low side for a GPU, and the memory access patterns may also not
+ be ideal.
+ */
+
+in vec3 flow_tc;
+in vec2 patch_center;
+flat in int ref_layer, search_layer;
+out vec3 out_flow;
+
+uniform sampler2DArray flow_tex, image_tex;
+uniform usampler2DArray grad_tex; // Also contains the corresponding reference image.
+uniform vec2 inv_image_size, inv_prev_level_size;
+uniform uint patch_size;
+uniform uint num_iterations;
+
+vec3 unpack_gradients(uint v)
+{
+ uint vi = v & 0xffu;
+ uint xi = (v >> 8) & 0xfffu;
+ uint yi = v >> 20;
+ vec3 r = vec3(xi * (1.0f / 4095.0f) - 0.5f, yi * (1.0f / 4095.0f) - 0.5f, vi * (1.0f / 255.0f));
+ return r;
+}
+
+// Note: The third variable is the actual pixel value.
+vec3 get_gradients(vec3 tc)
+{
+ vec3 grad = unpack_gradients(texture(grad_tex, tc).x);
+
+ // Zero gradients outside the image. (We'd do this with a sampler,
+ // but we want the repeat behavior for the actual texels, in the
+ // z channel.)
+ if (any(lessThan(tc.xy, vec2(0.0f))) || any(greaterThan(tc.xy, vec2(1.0f)))) {
+ grad.xy = vec2(0.0f);
+ }
+
+ return grad;
+}
+
+void main()
+{
+ vec2 image_size = textureSize(grad_tex, 0).xy;
+
+ // Lock the patch center to an integer, so that we never get
+ // any bilinear artifacts for the gradient. (NOTE: This assumes an
+ // even patch size.) Then calculate the bottom-left texel of the patch.
+ vec2 base = (round(patch_center * image_size) - (0.5f * patch_size - 0.5f))
+ * inv_image_size;
+
+ // First, precompute the pseudo-Hessian for the template patch.
+ // This is the part where we really save by the inverse search
+ // (ie., we can compute it up-front instead of anew for each
+ // patch).
+ //
+ // H = sum(S^T S)
+ //
+ // where S is the gradient at each point in the patch. Note that
+ // this is an outer product, so we get a (symmetric) 2x2 matrix,
+ // not a scalar.
+ mat2 H = mat2(0.0f);
+ vec2 grad_sum = vec2(0.0f); // Used for patch normalization.
+ float template_sum = 0.0f;
+ for (uint y = 0; y < patch_size; ++y) {
+ for (uint x = 0; x < patch_size; ++x) {
+ vec2 tc = base + uvec2(x, y) * inv_image_size;
+ vec3 grad = get_gradients(vec3(tc, ref_layer));
+ H[0][0] += grad.x * grad.x;
+ H[1][1] += grad.y * grad.y;
+ H[0][1] += grad.x * grad.y;
+
+ template_sum += grad.z; // The actual template pixel value.
+ grad_sum += grad.xy;
+ }
+ }
+ H[1][0] = H[0][1];
+
+ // Make sure we don't get a singular matrix even if e.g. the picture is
+ // all black. (The paper doesn't mention this, but the reference code
+ // does it, and it seems like a reasonable hack to avoid NaNs. With such
+ // a H, we'll go out-of-bounds pretty soon, though.)
+ if (determinant(H) < 1e-6) {
+ H[0][0] += 1e-6;
+ H[1][1] += 1e-6;
+ }
+
+ mat2 H_inv = inverse(H);
+
+ // Fetch the initial guess for the flow, and convert from the previous size to this one.
+ vec2 initial_u = texture(flow_tex, flow_tc).xy * (image_size * inv_prev_level_size);
+ vec2 u = initial_u;
+ float mean_diff, first_mean_diff;
+
+ for (uint i = 0; i < num_iterations; ++i) {
+ vec2 du = vec2(0.0, 0.0);
+ float warped_sum = 0.0f;
+ vec2 u_norm = u * inv_image_size; // In [0..1] coordinates instead of pixels.
+ for (uint y = 0; y < patch_size; ++y) {
+ for (uint x = 0; x < patch_size; ++x) {
+ vec2 tc = base + uvec2(x, y) * inv_image_size;
+ vec3 grad = get_gradients(vec3(tc, ref_layer));
+ float t = grad.z;
+ float warped = texture(image_tex, vec3(tc + u_norm, search_layer)).x;
+ du += grad.xy * (warped - t);
+ warped_sum += warped;
+ }
+ }
+
+ // Subtract the mean for patch normalization. We've done our
+ // sums without subtracting the means (because we didn't know them
+ // beforehand), ie.:
+ //
+ // sum(S^T * ((x + µ1) - (y + µ2))) = sum(S^T * (x - y)) + (µ1 – µ2) sum(S^T)
+ //
+ // which gives trivially
+ //
+ // sum(S^T * (x - y)) = [what we calculated] - (µ1 - µ2) sum(S^T)
+ //
+ // so we can just subtract away the mean difference here.
+ mean_diff = (warped_sum - template_sum) * (1.0 / float(patch_size * patch_size));
+ du -= grad_sum * mean_diff;
+
+ if (i == 0) {
+ first_mean_diff = mean_diff;
+ }
+
+ // Do the actual update.
+ u -= H_inv * du;
+ }
+
+ // Reject if we moved too far. Note that the paper says “too far” is the
+ // patch size, but the DIS code uses half of a patch size. The latter seems
+ // to give much better overall results.
+ //
+ // Also reject if the patch goes out-of-bounds (the paper does not mention this,
+ // but the code does, and it seems to be critical to avoid really bad behavior
+ // at the edges).
+ vec2 patch_center = (base * image_size - 0.5f) + patch_size * 0.5f + u;
+ if (length(u - initial_u) > (patch_size * 0.5f) ||
+ patch_center.x < -(patch_size * 0.5f) ||
+ image_size.x - patch_center.x < -(patch_size * 0.5f) ||
+ patch_center.y < -(patch_size * 0.5f) ||
+ image_size.y - patch_center.y < -(patch_size * 0.5f)) {
+ u = initial_u;
+ mean_diff = first_mean_diff;
+ }
+
+ // NOTE: The mean patch diff will be for the second-to-last patch,
+ // not the true position of du. But hopefully, it will be very close.
+ u *= inv_image_size;
+ out_flow = vec3(u.x, u.y, mean_diff);
+}
--- /dev/null
+#version 450 core
+#extension GL_ARB_shader_viewport_layer_array : require
+
+layout(location=0) in vec2 position;
+out vec3 flow_tc;
+out vec2 patch_center;
+flat out int ref_layer, search_layer;
+
+uniform sampler2DArray flow_tex;
+uniform vec2 out_flow_size;
+
+void main()
+{
+ // Patch placement: We want the outermost patches to have centers exactly in the
+ // image corners, so that the bottom-left patch has centre (0,0) and the
+ // upper-right patch has center (1,1). The position we get in is _almost_ there;
+ // since the quad's corners are in (0,0) and (1,1), the fragment shader will get
+ // centers in x=0.5/w, x=1.5/w and so on (and similar for y).
+ //
+ // In other words, find some f(x) = ax + b so that
+ //
+ // a 0.5 / w + b = 0
+ // a (1.0 - 0.5 / w) + b = 1
+ //
+ // which gives
+ //
+ // a = 1 / (w - 1)
+ // b = w / 2 (w - 1)
+ vec2 a = out_flow_size / (out_flow_size - 1);
+ vec2 b = -1.0 / (2 * (out_flow_size - 1.0));
+ patch_center = a * position + b;
+
+ // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+ //
+ // 2.000 0.000 0.000 -1.000
+ // 0.000 2.000 0.000 -1.000
+ // 0.000 0.000 -2.000 -1.000
+ // 0.000 0.000 0.000 1.000
+ gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
+ flow_tc = vec3(position, gl_InstanceID);
+
+ gl_Layer = gl_InstanceID;
+
+ // Forward flow (0) goes from 0 to 1. Backward flow (1) goes from 1 to 0.
+ ref_layer = gl_InstanceID;
+ search_layer = 1 - gl_InstanceID;
+}
--- /dev/null
+#include "player.h"
+
+#include "clip_list.h"
+#include "shared/context.h"
+#include "defs.h"
+#include "shared/ffmpeg_raii.h"
+#include "frame_on_disk.h"
+#include "shared/httpd.h"
+#include "jpeg_frame_view.h"
+#include "shared/mux.h"
+#include "shared/timebase.h"
+#include "video_stream.h"
+
+#include <algorithm>
+#include <chrono>
+#include <condition_variable>
+#include <movit/util.h>
+#include <mutex>
+#include <stdio.h>
+#include <thread>
+#include <vector>
+
+using namespace std;
+using namespace std::chrono;
+
+extern HTTPD *global_httpd;
+
+void Player::thread_func(bool also_output_to_stream)
+{
+ pthread_setname_np(pthread_self(), "Player");
+
+ QSurface *surface = create_surface();
+ QOpenGLContext *context = create_context(surface);
+ if (!make_current(context, surface)) {
+ printf("oops\n");
+ exit(1);
+ }
+
+ check_error();
+
+ // Create the VideoStream object, now that we have an OpenGL context.
+ if (also_output_to_stream) {
+ video_stream.reset(new VideoStream);
+ video_stream->start();
+ }
+
+ check_error();
+
+ constexpr double output_framerate = 60000.0 / 1001.0; // FIXME: make configurable
+ int64_t pts = 0;
+ Clip next_clip;
+ size_t next_clip_idx = size_t(-1);
+ bool got_next_clip = false;
+ double next_clip_fade_time = -1.0;
+
+ for ( ;; ) {
+wait_for_clip:
+ bool clip_ready;
+ steady_clock::time_point before_sleep = steady_clock::now();
+
+ // Wait until we're supposed to play something.
+ {
+ unique_lock<mutex> lock(queue_state_mu);
+ clip_ready = new_clip_changed.wait_for(lock, milliseconds(100), [this] {
+ return new_clip_ready && current_clip.pts_in != -1;
+ });
+ new_clip_ready = false;
+ playing = true;
+ }
+
+ steady_clock::duration time_slept = steady_clock::now() - before_sleep;
+ pts += duration_cast<duration<size_t, TimebaseRatio>>(time_slept).count();
+
+ if (!clip_ready) {
+ if (video_stream != nullptr) {
+ video_stream->schedule_refresh_frame(steady_clock::now(), pts, /*display_func=*/nullptr, QueueSpotHolder());
+ }
+ continue;
+ }
+
+ Clip clip;
+ size_t clip_idx;
+ unsigned stream_idx;
+ {
+ lock_guard<mutex> lock(mu);
+ clip = current_clip;
+ clip_idx = current_clip_idx;
+ stream_idx = current_stream_idx;
+ }
+ steady_clock::time_point origin = steady_clock::now(); // TODO: Add a 100 ms buffer for ramp-up?
+ int64_t in_pts_origin = clip.pts_in;
+got_clip:
+ int64_t out_pts_origin = pts;
+
+ // Start playing exactly at a frame.
+ // TODO: Snap secondary (fade-to) clips in the same fashion
+ // so that we don't get jank here).
+ {
+ lock_guard<mutex> lock(frame_mu);
+
+ // Find the first frame such that frame.pts <= in_pts.
+ auto it = lower_bound(frames[stream_idx].begin(),
+ frames[stream_idx].end(),
+ in_pts_origin,
+ [](const FrameOnDisk &frame, int64_t pts) { return frame.pts < pts; });
+ if (it != frames[stream_idx].end()) {
+ in_pts_origin = it->pts;
+ }
+ }
+
+ // TODO: Lock to a rational multiple of the frame rate if possible.
+ double speed = 0.5;
+
+ int64_t in_pts_start_next_clip = -1;
+ for (int frameno = 0; ; ++frameno) { // Ends when the clip ends.
+ double out_pts = out_pts_origin + TIMEBASE * frameno / output_framerate;
+ steady_clock::time_point next_frame_start =
+ origin + microseconds(lrint((out_pts - out_pts_origin) * 1e6 / TIMEBASE));
+ int64_t in_pts = lrint(in_pts_origin + TIMEBASE * frameno * speed / output_framerate);
+ pts = lrint(out_pts);
+
+ if (in_pts >= clip.pts_out) {
+ break;
+ }
+
+ steady_clock::duration time_behind = steady_clock::now() - next_frame_start;
+ if (time_behind >= milliseconds(200)) {
+ fprintf(stderr, "WARNING: %ld ms behind, dropping a frame (no matter the type).\n",
+ lrint(1e3 * duration<double>(time_behind).count()));
+ continue;
+ }
+
+ double time_left_this_clip = double(clip.pts_out - in_pts) / TIMEBASE / speed;
+ if (!got_next_clip && next_clip_callback != nullptr && time_left_this_clip <= clip.fade_time_seconds) {
+ // Find the next clip so that we can begin a fade.
+ tie(next_clip, next_clip_idx) = next_clip_callback();
+ if (next_clip.pts_in != -1) {
+ got_next_clip = true;
+
+ double duration_next_clip = (next_clip.pts_out - next_clip.pts_in) / TIMEBASE / speed;
+ next_clip_fade_time = std::min(time_left_this_clip, duration_next_clip);
+ in_pts_start_next_clip = next_clip.pts_in + lrint(next_clip_fade_time * TIMEBASE * speed);
+ }
+ }
+
+ // pts not affected by the swapping below.
+ int64_t in_pts_for_progress = in_pts, in_pts_secondary_for_progress = -1;
+
+ int primary_stream_idx = stream_idx;
+ FrameOnDisk secondary_frame;
+ int secondary_stream_idx = -1;
+ float fade_alpha = 0.0f;
+ if (got_next_clip && time_left_this_clip <= next_clip_fade_time) {
+ secondary_stream_idx = next_clip.stream_idx;
+ int64_t in_pts_secondary = lrint(next_clip.pts_in + (next_clip_fade_time - time_left_this_clip) * TIMEBASE * speed);
+ in_pts_secondary_for_progress = in_pts_secondary;
+ fade_alpha = 1.0f - time_left_this_clip / next_clip_fade_time;
+
+ // If more than half-way through the fade, interpolate the next clip
+ // instead of the current one, since it's more visible.
+ if (fade_alpha >= 0.5f) {
+ swap(primary_stream_idx, secondary_stream_idx);
+ swap(in_pts, in_pts_secondary);
+ fade_alpha = 1.0f - fade_alpha;
+ }
+
+ FrameOnDisk frame_lower, frame_upper;
+ bool ok = find_surrounding_frames(in_pts_secondary, secondary_stream_idx, &frame_lower, &frame_upper);
+ if (ok) {
+ secondary_frame = frame_lower;
+ }
+ }
+
+ if (progress_callback != nullptr) {
+ // NOTE: None of this will take into account any snapping done below.
+ double played_this_clip = double(in_pts_for_progress - clip.pts_in) / TIMEBASE / speed;
+ double total_length = double(clip.pts_out - clip.pts_in) / TIMEBASE / speed;
+ map<size_t, double> progress{{ clip_idx, played_this_clip / total_length }};
+
+ if (got_next_clip && time_left_this_clip <= next_clip_fade_time) {
+ double played_next_clip = double(in_pts_secondary_for_progress - next_clip.pts_in) / TIMEBASE / speed;
+ double total_next_length = double(next_clip.pts_out - next_clip.pts_in) / TIMEBASE / speed;
+ progress[next_clip_idx] = played_next_clip / total_next_length;
+ }
+ progress_callback(progress);
+ }
+
+ FrameOnDisk frame_lower, frame_upper;
+ bool ok = find_surrounding_frames(in_pts, primary_stream_idx, &frame_lower, &frame_upper);
+ if (!ok) {
+ break;
+ }
+
+ {
+ unique_lock<mutex> lock(queue_state_mu);
+ if (video_stream == nullptr) {
+ // No queue, just wait until the right time and then show the frame.
+ new_clip_changed.wait_until(lock, next_frame_start, [this]{
+ return new_clip_ready || override_stream_idx != -1;
+ });
+ } else {
+ // If the queue is full (which is really the state we'd like to be in),
+ // wait until there's room for one more frame (ie., one was output from
+ // VideoStream), or until or until there's a new clip we're supposed to play.
+ //
+ // In this case, we don't sleep until next_frame_start; the displaying is
+ // done by the queue.
+ new_clip_changed.wait(lock, [this]{
+ if (num_queued_frames < max_queued_frames) {
+ return true;
+ }
+ return new_clip_ready || override_stream_idx != -1;
+ });
+ }
+ if (new_clip_ready) {
+ if (video_stream != nullptr) {
+ lock.unlock(); // Urg.
+ video_stream->clear_queue();
+ lock.lock();
+ }
+ got_next_clip = false;
+ goto wait_for_clip;
+ }
+ if (override_stream_idx != -1) {
+ stream_idx = override_stream_idx;
+ override_stream_idx = -1;
+ continue;
+ }
+ }
+
+ if (frame_lower.pts == frame_upper.pts) {
+ auto display_func = [this, primary_stream_idx, frame_lower, secondary_frame, fade_alpha]{
+ destination->setFrame(primary_stream_idx, frame_lower, secondary_frame, fade_alpha);
+ };
+ if (video_stream == nullptr) {
+ display_func();
+ } else {
+ if (secondary_stream_idx == -1) {
+ video_stream->schedule_original_frame(
+ next_frame_start, pts, display_func, QueueSpotHolder(this),
+ frame_lower);
+ } else {
+ assert(secondary_frame.pts != -1);
+ video_stream->schedule_faded_frame(next_frame_start, pts, display_func,
+ QueueSpotHolder(this), frame_lower,
+ secondary_frame, fade_alpha);
+ }
+ }
+ continue;
+ }
+
+ // Snap to input frame: If we can do so with less than 1% jitter
+ // (ie., move less than 1% of an _output_ frame), do so.
+ // TODO: Snap secondary (fade-to) clips in the same fashion.
+ bool snapped = false;
+ for (int64_t snap_pts : { frame_lower.pts, frame_upper.pts }) {
+ double snap_pts_as_frameno = (snap_pts - in_pts_origin) * output_framerate / TIMEBASE / speed;
+ if (fabs(snap_pts_as_frameno - frameno) < 0.01) {
+ FrameOnDisk snap_frame = frame_lower;
+ snap_frame.pts = snap_pts;
+ auto display_func = [this, primary_stream_idx, snap_frame, secondary_frame, fade_alpha]{
+ destination->setFrame(primary_stream_idx, snap_frame, secondary_frame, fade_alpha);
+ };
+ if (video_stream == nullptr) {
+ display_func();
+ } else {
+ if (secondary_stream_idx == -1) {
+ video_stream->schedule_original_frame(
+ next_frame_start, pts, display_func,
+ QueueSpotHolder(this), snap_frame);
+ } else {
+ assert(secondary_frame.pts != -1);
+ video_stream->schedule_faded_frame(
+ next_frame_start, pts, display_func, QueueSpotHolder(this),
+ snap_frame, secondary_frame, fade_alpha);
+ }
+ }
+ in_pts_origin += snap_pts - in_pts;
+ snapped = true;
+ break;
+ }
+ }
+ if (snapped) {
+ continue;
+ }
+
+ if (time_behind >= milliseconds(100)) {
+ fprintf(stderr, "WARNING: %ld ms behind, dropping an interpolated frame.\n",
+ lrint(1e3 * duration<double>(time_behind).count()));
+ continue;
+ }
+
+ double alpha = double(in_pts - frame_lower.pts) / (frame_upper.pts - frame_lower.pts);
+
+ if (video_stream == nullptr) {
+ // Previews don't do any interpolation.
+ assert(secondary_stream_idx == -1);
+ destination->setFrame(primary_stream_idx, frame_lower);
+ } else {
+ auto display_func = [this](shared_ptr<Frame> frame) {
+ destination->setFrame(frame);
+ };
+ video_stream->schedule_interpolated_frame(
+ next_frame_start, pts, display_func, QueueSpotHolder(this),
+ frame_lower, frame_upper, alpha,
+ secondary_frame, fade_alpha);
+ }
+ }
+
+ // The clip ended.
+
+ // Last-ditch effort to get the next clip (if e.g. the fade time was zero seconds).
+ if (!got_next_clip && next_clip_callback != nullptr) {
+ tie(next_clip, next_clip_idx) = next_clip_callback();
+ if (next_clip.pts_in != -1) {
+ got_next_clip = true;
+ in_pts_start_next_clip = next_clip.pts_in;
+ }
+ }
+
+ // Switch to next clip if we got it.
+ if (got_next_clip) {
+ clip = next_clip;
+ clip_idx = next_clip_idx;
+ stream_idx = next_clip.stream_idx; // Override is used for previews only, and next_clip is used for live ony.
+ if (done_callback != nullptr) {
+ done_callback();
+ }
+ got_next_clip = false;
+
+ // Start the next clip from the point where the fade went out.
+ origin = steady_clock::now();
+ in_pts_origin = in_pts_start_next_clip;
+ goto got_clip;
+ }
+
+ {
+ unique_lock<mutex> lock(queue_state_mu);
+ playing = false;
+ }
+ if (done_callback != nullptr) {
+ done_callback();
+ }
+ }
+}
+
+// Find the frame immediately before and after this point.
+bool Player::find_surrounding_frames(int64_t pts, int stream_idx, FrameOnDisk *frame_lower, FrameOnDisk *frame_upper)
+{
+ lock_guard<mutex> lock(frame_mu);
+
+ // Find the first frame such that frame.pts >= pts.
+ auto it = lower_bound(frames[stream_idx].begin(),
+ frames[stream_idx].end(),
+ pts,
+ [](const FrameOnDisk &frame, int64_t pts) { return frame.pts < pts; });
+ if (it == frames[stream_idx].end()) {
+ return false;
+ }
+ *frame_upper = *it;
+
+ // Find the last frame such that in_pts <= frame.pts (if any).
+ if (it == frames[stream_idx].begin()) {
+ *frame_lower = *it;
+ } else {
+ *frame_lower = *(it - 1);
+ }
+ assert(pts >= frame_lower->pts);
+ assert(pts <= frame_upper->pts);
+ return true;
+}
+
+Player::Player(JPEGFrameView *destination, bool also_output_to_stream)
+ : destination(destination)
+{
+ thread(&Player::thread_func, this, also_output_to_stream).detach();
+}
+
+void Player::play_clip(const Clip &clip, size_t clip_idx, unsigned stream_idx)
+{
+ {
+ lock_guard<mutex> lock(mu);
+ current_clip = clip;
+ current_stream_idx = stream_idx;
+ current_clip_idx = clip_idx;
+ }
+
+ {
+ lock_guard<mutex> lock(queue_state_mu);
+ new_clip_ready = true;
+ override_stream_idx = -1;
+ new_clip_changed.notify_all();
+ }
+}
+
+void Player::override_angle(unsigned stream_idx)
+{
+ // Corner case: If a new clip is waiting to be played, change its stream and then we're done.
+ {
+ unique_lock<mutex> lock(queue_state_mu);
+ if (new_clip_ready) {
+ lock_guard<mutex> lock2(mu);
+ current_stream_idx = stream_idx;
+ return;
+ }
+ }
+
+ // If we are playing a clip, set override_stream_idx, and the player thread will
+ // pick it up and change its internal index.
+ {
+ unique_lock<mutex> lock(queue_state_mu);
+ if (playing) {
+ override_stream_idx = stream_idx;
+ new_clip_changed.notify_all();
+ }
+ }
+
+ // OK, so we're standing still, presumably at the end of a clip.
+ // Look at the current pts_out (if it exists), and show the closest
+ // thing we've got.
+ int64_t pts_out;
+ {
+ lock_guard<mutex> lock(mu);
+ if (current_clip.pts_out < 0) {
+ return;
+ }
+ pts_out = current_clip.pts_out;
+ }
+
+ lock_guard<mutex> lock(frame_mu);
+ auto it = upper_bound(frames[stream_idx].begin(), frames[stream_idx].end(), pts_out,
+ [](int64_t pts, const FrameOnDisk &frame) { return pts < frame.pts; });
+ if (it == frames[stream_idx].end()) {
+ return;
+ }
+ destination->setFrame(stream_idx, *it);
+}
+
+void Player::take_queue_spot()
+{
+ unique_lock<mutex> lock(queue_state_mu);
+ ++num_queued_frames;
+}
+
+void Player::release_queue_spot()
+{
+ unique_lock<mutex> lock(queue_state_mu);
+ assert(num_queued_frames > 0);
+ --num_queued_frames;
+ new_clip_changed.notify_all();
+}
--- /dev/null
+#ifndef _PLAYER_H
+#define _PLAYER_H 1
+
+#include "clip_list.h"
+#include "frame_on_disk.h"
+#include "queue_spot_holder.h"
+
+extern "C" {
+#include <libavformat/avio.h>
+}
+
+#include <condition_variable>
+#include <functional>
+#include <mutex>
+
+class JPEGFrameView;
+class VideoStream;
+class QSurface;
+class QSurfaceFormat;
+
+class Player : public QueueInterface {
+public:
+ Player(JPEGFrameView *destination, bool also_output_to_stream);
+
+ void play_clip(const Clip &clip, size_t clip_idx, unsigned stream_idx);
+ void override_angle(unsigned stream_idx); // For the current clip only.
+
+ // Not thread-safe to set concurrently with playing.
+ // Will be called back from the player thread.
+ using done_callback_func = std::function<void()>;
+ void set_done_callback(done_callback_func cb) { done_callback = cb; }
+
+ // Not thread-safe to set concurrently with playing.
+ // Will be called back from the player thread.
+ // The second parameter is the clip's position in the play list.
+ using next_clip_callback_func = std::function<std::pair<Clip, size_t>()>;
+ void set_next_clip_callback(next_clip_callback_func cb) { next_clip_callback = cb; }
+
+ // Not thread-safe to set concurrently with playing.
+ // Will be called back from the player thread.
+ using progress_callback_func = std::function<void(const std::map<size_t, double> &progress)>;
+ void set_progress_callback(progress_callback_func cb) { progress_callback = cb; }
+
+ // QueueInterface.
+ void take_queue_spot() override;
+ void release_queue_spot() override;
+
+private:
+ void thread_func(bool also_output_to_stream);
+ void open_output_stream();
+ static int write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
+ int write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
+
+ // Find the frame immediately before and after this point.
+ // Returns false if pts is after the last frame.
+ bool find_surrounding_frames(int64_t pts, int stream_idx, FrameOnDisk *frame_lower, FrameOnDisk *frame_upper);
+
+ JPEGFrameView *destination;
+ done_callback_func done_callback;
+ next_clip_callback_func next_clip_callback;
+ progress_callback_func progress_callback;
+
+ std::mutex mu;
+ Clip current_clip; // Under mu. Can have pts_in = -1 for no clip.
+ size_t current_clip_idx; // Under mu.
+ unsigned current_stream_idx; // Under mu.
+
+ std::mutex queue_state_mu;
+ std::condition_variable new_clip_changed;
+ bool new_clip_ready = false; // Under queue_state_mu.
+ bool playing = false; // Under queue_state_mu.
+ int override_stream_idx = -1; // Under queue_state_mu.
+
+ std::unique_ptr<VideoStream> video_stream; // Can be nullptr.
+
+ // under queue_state_mu. Part of this instead of VideoStream so that we own
+ // its lock and can sleep on it.
+ size_t num_queued_frames = 0;
+ static constexpr size_t max_queued_frames = 10;
+};
+
+#endif // !defined(_PLAYER_H)
--- /dev/null
+#version 450 core
+
+// Warps I_1 according to the flow, then computes the mean and difference to I_0.
+
+in vec3 tc;
+out float I, I_t;
+out vec2 normalized_flow;
+
+uniform sampler2DArray image_tex, flow_tex;
+
+void main()
+{
+ vec3 flow = texture(flow_tex, tc).xyz;
+ flow.xy /= flow.z; // Normalize the sum coming out of the densification.
+
+ float I_0 = texture(image_tex, tc).x;
+ float I_w = texture(image_tex, vec3(tc.xy + flow.xy, 1.0f - tc.z)).x; // NOTE: This is effectively a reverse warp since texture() is a gather operation and flow is conceptually scatter.
+
+ I = 0.5f * (I_0 + I_w);
+ I_t = I_w - I_0;
+ normalized_flow = flow.xy * textureSize(image_tex, 0).xy;
+}
--- /dev/null
+#ifndef _QUEUE_SPOT_HOLDER
+#define _QUEUE_SPOT_HOLDER 1
+
+// A RAII class to hold a shared resource, in our case an (unordered!) spot in a queue,
+// for as long as a frame is under computation.
+
+class QueueInterface {
+public:
+ virtual ~QueueInterface() {}
+ virtual void take_queue_spot() = 0;
+ virtual void release_queue_spot() = 0;
+};
+
+class QueueSpotHolder {
+public:
+ QueueSpotHolder() : queue(nullptr) {}
+
+ explicit QueueSpotHolder(QueueInterface *queue) : queue(queue) {
+ queue->take_queue_spot();
+ }
+
+ QueueSpotHolder(QueueSpotHolder &&other) : queue(other.queue) {
+ other.queue = nullptr;
+ }
+
+ QueueSpotHolder &operator=(QueueSpotHolder &&other) {
+ queue = other.queue;
+ other.queue = nullptr;
+ return *this;
+ }
+
+ ~QueueSpotHolder() {
+ if (queue != nullptr) {
+ queue->release_queue_spot();
+ }
+ }
+
+ // Movable only.
+ QueueSpotHolder(QueueSpotHolder &) = delete;
+ QueueSpotHolder &operator=(QueueSpotHolder &) = delete;
+
+private:
+ QueueInterface *queue;
+};
+
+#endif // !defined(_QUEUE_SPOT_HOLDER)
--- /dev/null
+#version 450 core
+
+in vec3 tc;
+out vec2 flow;
+
+uniform sampler2DArray flow_tex;
+uniform vec2 scale_factor;
+
+void main()
+{
+ flow = texture(flow_tex, tc).xy * scale_factor;
+}
--- /dev/null
+#version 450 core
+
+in vec3 tc;
+out uint packed_gradients;
+
+uniform sampler2DArray tex;
+
+uint pack_gradients(float x, float y, float v)
+{
+ x = clamp(x, -0.5f, 0.5f);
+ y = clamp(y, -0.5f, 0.5f);
+
+ uint vi = uint(round(v * 255.0f));
+ uint xi = uint(round((x + 0.5f) * 4095.0f));
+ uint yi = uint(round((y + 0.5f) * 4095.0f));
+ return vi | (xi << 8) | (yi << 20);
+}
+
+void main()
+{
+ // There are two common Sobel filters, horizontal and vertical
+ // (see e.g. Wikipedia, or the OpenCV documentation):
+ //
+ // [1 0 -1] [-1 -2 -1]
+ // [2 0 -2] [ 0 0 0]
+ // [1 0 -1] [ 1 2 1]
+ // Horizontal Vertical
+ //
+ // Note that Wikipedia and OpenCV gives entirely opposite definitions
+ // with regards to sign! This appears to be an error in the OpenCV
+ // documentation, forgetting that for convolution, the filters must be
+ // flipped. We have to flip the vertical matrix again comparing to
+ // Wikipedia, though, since we have bottom-left origin (y = up)
+ // and they define y as pointing downwards.
+ //
+ // Computing both directions at once allows us to get away with eight
+ // texture samples instead of twelve.
+
+ float top_left = textureOffset(tex, tc, ivec2(-1, 1)).x; // Note the bottom-left coordinate system.
+ float left = textureOffset(tex, tc, ivec2(-1, 0)).x;
+ float bottom_left = textureOffset(tex, tc, ivec2(-1, -1)).x;
+
+ float top = textureOffset(tex, tc, ivec2( 0, 1)).x;
+ float bottom = textureOffset(tex, tc, ivec2( 0, -1)).x;
+
+ float top_right = textureOffset(tex, tc, ivec2( 1, 1)).x;
+ float right = textureOffset(tex, tc, ivec2( 1, 0)).x;
+ float bottom_right = textureOffset(tex, tc, ivec2( 1, -1)).x;
+
+ vec2 gradients;
+ gradients.x = (top_right + 2.0f * right + bottom_right) - (top_left + 2.0f * left + bottom_left);
+ gradients.y = (top_left + 2.0 * top + top_right) - (bottom_left + 2.0f * bottom + bottom_right);
+
+ // Normalize so that we have a normalized unit of intensity levels per pixel.
+ gradients.x *= 0.125;
+ gradients.y *= 0.125;
+
+ // Also store the actual pixel value, so that we get it “for free”
+ // when we sample the gradients in motion_search.frag later.
+ float center = texture(tex, tc).x;
+
+ // Pack everything into a single 32-bit value, using simple fixed-point.
+ packed_gradients = pack_gradients(gradients.x, gradients.y, center);
+}
--- /dev/null
+#version 450 core
+
+in vec3 tc, tc_left, tc_down;
+in vec3 equation_tc_assuming_left, equation_tc_assuming_right;
+in float element_x_idx, element_sum_idx;
+out vec2 diff_flow;
+
+uniform sampler2DArray diff_flow_tex, diffusivity_tex;
+uniform usampler2DArray equation_red_tex, equation_black_tex;
+uniform int phase;
+
+uniform int num_nonzero_phases;
+
+// See pack_floats_shared() in equations.frag.
+vec2 unpack_floats_shared(uint c)
+{
+ // Recover the exponent, and multiply it in. Add one because
+ // we have denormalized mantissas, then another one because we
+ // already reduced the exponent by one. Then subtract 20, because
+ // we are going to shift up the number by 20 below to recover the sign bits.
+ float normalizer = uintBitsToFloat(((c >> 1) & 0x7f800000u) - (18 << 23));
+ normalizer *= (1.0 / 2047.0);
+
+ // Shift the values up so that we recover the sign bit, then normalize.
+ float a = int(uint(c & 0x000fffu) << 20) * normalizer;
+ float b = int(uint(c & 0xfff000u) << 8) * normalizer;
+
+ return vec2(a, b);
+}
+
+float zero_if_outside_border(vec4 val)
+{
+ if (val.w < 1.0f) {
+ // We hit the border (or more like half-way to it), so zero smoothness.
+ return 0.0f;
+ } else {
+ return val.x;
+ }
+}
+
+void main()
+{
+ // Red-black SOR: Every other pass, we update every other element in a
+ // checkerboard pattern. This is rather suboptimal for the GPU, as it
+ // just immediately throws away half of the warp, but it helps convergence
+ // a _lot_ (rough testing indicates that five iterations of SOR is as good
+ // as ~50 iterations of Jacobi). We could probably do better by reorganizing
+ // the data into two-values-per-pixel, so-called “twinned buffering”;
+ // seemingly, it helps Haswell by ~15% on the SOR code, but GTX 950 not at all
+ // (at least not on 720p). Presumably the latter is already bandwidth bound.
+ int color = int(round(element_sum_idx)) & 1;
+ if (color != phase) discard;
+
+ uvec4 equation;
+ vec3 equation_tc;
+ if ((int(round(element_x_idx)) & 1) == 0) {
+ equation_tc = equation_tc_assuming_left;
+ } else {
+ equation_tc = equation_tc_assuming_right;
+ }
+ if (phase == 0) {
+ equation = texture(equation_red_tex, equation_tc);
+ } else {
+ equation = texture(equation_black_tex, equation_tc);
+ }
+ float inv_A11 = uintBitsToFloat(equation.x);
+ float A12 = uintBitsToFloat(equation.y);
+ float inv_A22 = uintBitsToFloat(equation.z);
+ vec2 b = unpack_floats_shared(equation.w);
+
+ const float omega = 1.8; // Marginally better than 1.6, it seems.
+
+ if (num_nonzero_phases == 0) {
+ // Simplified version of the code below, assuming diff_flow == 0.0f everywhere.
+ diff_flow.x = omega * b.x * inv_A11;
+ diff_flow.y = omega * b.y * inv_A22;
+ } else {
+ // Subtract the missing terms from the right-hand side
+ // (it couldn't be done earlier, because we didn't know
+ // the values of the neighboring pixels; they change for
+ // each SOR iteration).
+ float smooth_l = zero_if_outside_border(texture(diffusivity_tex, tc_left));
+ float smooth_r = zero_if_outside_border(textureOffset(diffusivity_tex, tc_left, ivec2(1, 0)));
+ float smooth_d = zero_if_outside_border(texture(diffusivity_tex, tc_down));
+ float smooth_u = zero_if_outside_border(textureOffset(diffusivity_tex, tc_down, ivec2(0, 1)));
+ b += smooth_l * textureOffset(diff_flow_tex, tc, ivec2(-1, 0)).xy;
+ b += smooth_r * textureOffset(diff_flow_tex, tc, ivec2( 1, 0)).xy;
+ b += smooth_d * textureOffset(diff_flow_tex, tc, ivec2( 0, -1)).xy;
+ b += smooth_u * textureOffset(diff_flow_tex, tc, ivec2( 0, 1)).xy;
+
+ if (num_nonzero_phases == 1) {
+ diff_flow = vec2(0.0f);
+ } else {
+ diff_flow = texture(diff_flow_tex, tc).xy;
+ }
+
+ // From https://en.wikipedia.org/wiki/Successive_over-relaxation.
+ float sigma_u = A12 * diff_flow.y;
+ diff_flow.x += omega * ((b.x - sigma_u) * inv_A11 - diff_flow.x);
+ float sigma_v = A12 * diff_flow.x;
+ diff_flow.y += omega * ((b.y - sigma_v) * inv_A22 - diff_flow.y);
+ }
+}
--- /dev/null
+#version 450 core
+#extension GL_ARB_shader_viewport_layer_array : require
+
+layout(location=0) in vec2 position;
+out vec3 tc, tc_left, tc_down;
+out vec3 equation_tc_assuming_left, equation_tc_assuming_right;
+out float element_x_idx;
+out float element_sum_idx;
+
+uniform sampler2DArray diff_flow_tex, diffusivity_tex;
+uniform usampler2DArray equation_red_tex;
+
+void main()
+{
+ // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+ //
+ // 2.000 0.000 0.000 -1.000
+ // 0.000 2.000 0.000 -1.000
+ // 0.000 0.000 -2.000 -1.000
+ // 0.000 0.000 0.000 1.000
+ gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
+ gl_Layer = gl_InstanceID;
+
+ tc = vec3(position, gl_InstanceID);
+ tc_left = vec3(tc.x - 0.5f / textureSize(diffusivity_tex, 0).x, tc.y, gl_InstanceID);
+ tc_down = vec3(tc.x, tc.y - 0.5f / textureSize(diffusivity_tex, 0).y, gl_InstanceID);
+
+ // The equation textures have half the horizontal width, so we need to adjust the texel centers.
+ // It becomes extra tricky since the SOR texture might be of odd size, and then
+ // the equation texture is not exactly half the size.
+ vec2 element_idx = position * textureSize(diff_flow_tex, 0).xy - 0.5f;
+ float equation_texel_number_assuming_left = element_idx.x / 2.0f;
+ float equation_texel_number_assuming_right = (element_idx.x - 1.0f) / 2.0f;
+ equation_tc_assuming_left.x = (equation_texel_number_assuming_left + 0.5f) / textureSize(equation_red_tex, 0).x;
+ equation_tc_assuming_right.x = (equation_texel_number_assuming_right + 0.5f) / textureSize(equation_red_tex, 0).x;
+ equation_tc_assuming_left.y = tc.y;
+ equation_tc_assuming_right.y = tc.y;
+ equation_tc_assuming_left.z = gl_InstanceID;
+ equation_tc_assuming_right.z = gl_InstanceID;
+
+ element_x_idx = element_idx.x;
+ element_sum_idx = element_idx.x + element_idx.y;
+}
--- /dev/null
+#version 450 core
+
+in vec2 image_pos;
+flat in vec2 flow, I_0_check_offset, I_1_check_offset;
+out vec2 out_flow;
+
+uniform sampler2DArray gray_tex;
+
+void main()
+{
+ out_flow = flow;
+
+ // TODO: Check if we are sampling out-of-image.
+ float I_0 = texture(gray_tex, vec3(image_pos + I_0_check_offset, 0)).r;
+ float I_1 = texture(gray_tex, vec3(image_pos + I_1_check_offset, 1)).r;
+ float diff = abs(I_1 - I_0);
+ gl_FragDepth = 0.125 * diff.x; // Make sure we stay well under the 1.0 maximum.
+}
--- /dev/null
+#version 450 core
+
+layout(location=0) in vec2 position;
+out vec2 image_pos;
+flat out vec2 flow, I_0_check_offset, I_1_check_offset;
+
+uniform vec2 splat_size; // In 0..1 coordinates.
+uniform vec2 inv_flow_size;
+uniform float alpha;
+uniform sampler2DArray flow_tex; // 0 = forward flow, 1 = backward flow.
+
+void main()
+{
+ int instance = gl_InstanceID;
+ int num_pixels_per_layer = textureSize(flow_tex, 0).x * textureSize(flow_tex, 0).y;
+ int src_layer;
+ if (instance >= num_pixels_per_layer) {
+ instance -= num_pixels_per_layer;
+ src_layer = 1;
+ } else {
+ src_layer = 0;
+ }
+ int x = instance % textureSize(flow_tex, 0).x;
+ int y = instance / textureSize(flow_tex, 0).x;
+
+ // Find out where to splat this to.
+ vec2 full_flow = texelFetch(flow_tex, ivec3(x, y, src_layer), 0).xy;
+ float splat_alpha;
+ if (src_layer == 1) { // Reverse flow.
+ full_flow = -full_flow;
+ splat_alpha = 1.0f - alpha;
+ } else {
+ splat_alpha = alpha;
+ }
+ full_flow *= inv_flow_size;
+
+ vec2 patch_center = (ivec2(x, y) + 0.5) * inv_flow_size + full_flow * splat_alpha;
+ image_pos = patch_center + splat_size * (position - 0.5);
+
+ flow = full_flow;
+ I_0_check_offset = full_flow * -alpha;
+ I_1_check_offset = full_flow * (1.0f - alpha);
+
+ // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+ //
+ // 2.000 0.000 0.000 -1.000
+ // 0.000 2.000 0.000 -1.000
+ // 0.000 0.000 -2.000 -1.000
+ // 0.000 0.000 0.000 1.000
+ gl_Position = vec4(2.0 * image_pos.x - 1.0, 2.0 * image_pos.y - 1.0, -1.0, 1.0);
+}
--- /dev/null
+syntax = "proto3";
+
+// Corresponds to struct Clip.
+message ClipProto {
+ int64 pts_in = 1;
+ int64 pts_out = 2;
+ repeated string description = 3;
+ int64 stream_idx = 4;
+ double fade_time_seconds = 5;
+}
+
+message ClipListProto {
+ repeated ClipProto clip = 1;
+}
+
+message StateProto {
+ ClipListProto clip_list = 1;
+ ClipListProto play_list = 2;
+}
--- /dev/null
+#include "util.h"
+
+#include <assert.h>
+#include <memory>
+#include <stdio.h>
+
+using namespace std;
+
+Flow read_flow(const char *filename)
+{
+ FILE *flowfp = fopen(filename, "rb");
+ uint32_t hdr, width, height;
+ fread(&hdr, sizeof(hdr), 1, flowfp);
+ fread(&width, sizeof(width), 1, flowfp);
+ fread(&height, sizeof(height), 1, flowfp);
+
+ unique_ptr<Vec2[]> flow(new Vec2[width * height]);
+ fread(flow.get(), width * height * sizeof(Vec2), 1, flowfp);
+
+ Flow ret;
+ ret.width = width;
+ ret.height = height;
+ ret.flow = move(flow);
+ return ret;
+}
--- /dev/null
+#ifndef _UTIL_H
+#define _UTIL_H 1
+
+#include <algorithm>
+#include <math.h>
+#include <memory>
+#include <stdint.h>
+
+struct Vec2 {
+ float du, dv;
+};
+
+struct Flow {
+ uint32_t width, height;
+ std::unique_ptr<Vec2[]> flow;
+};
+
+Flow read_flow(const char *filename);
+
+// du and dv are in pixels.
+inline void flow2rgb(float du, float dv, uint8_t *rr, uint8_t *gg, uint8_t *bb)
+{
+ float angle = atan2(dv, du);
+ float magnitude = std::min(hypot(du, dv) / 20.0, 1.0);
+
+ // HSV to RGB (from Wikipedia). Saturation is 1.
+ float c = magnitude;
+ float h = (angle + M_PI) * 6.0 / (2.0 * M_PI);
+ float X = c * (1.0 - fabs(fmod(h, 2.0) - 1.0));
+ float r = 0.0f, g = 0.0f, b = 0.0f;
+ if (h <= 1.0f) {
+ r = c;
+ g = X;
+ } else if (h <= 2.0f) {
+ r = X;
+ g = c;
+ } else if (h <= 3.0f) {
+ g = c;
+ b = X;
+ } else if (h <= 4.0f) {
+ g = X;
+ b = c;
+ } else if (h <= 5.0f) {
+ r = X;
+ b = c;
+ } else if (h <= 6.0f) {
+ r = c;
+ b = X;
+ } else {
+ // h is NaN, so black is fine.
+ }
+ float m = magnitude - c;
+ r += m;
+ g += m;
+ b += m;
+ r = std::max(std::min(r, 1.0f), 0.0f);
+ g = std::max(std::min(g, 1.0f), 0.0f);
+ b = std::max(std::min(b, 1.0f), 0.0f);
+ *rr = lrintf(r * 255.0f);
+ *gg = lrintf(g * 255.0f);
+ *bb = lrintf(b * 255.0f);
+}
+
+#endif // !defined(_UTIL_H)
--- /dev/null
+#include "vaapi_jpeg_decoder.h"
+
+#include "jpeg_destroyer.h"
+#include "jpeg_frame.h"
+#include "shared/memcpy_interleaved.h"
+
+#include <X11/Xlib.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <glob.h>
+#include <jpeglib.h>
+#include <list>
+#include <mutex>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <string>
+#include <unistd.h>
+#include <va/va.h>
+#include <va/va_drm.h>
+#include <va/va_x11.h>
+
+using namespace std;
+
+static unique_ptr<VADisplayWithCleanup> va_dpy;
+static VAConfigID config_id;
+static VAImageFormat uyvy_format;
+bool vaapi_jpeg_decoding_usable = false;
+
+struct VAResources {
+ unsigned width, height;
+ VASurfaceID surface;
+ VAContextID context;
+ VAImage image;
+};
+static list<VAResources> va_resources_freelist;
+static mutex va_resources_mutex;
+
+#define CHECK_VASTATUS(va_status, func) \
+ if (va_status != VA_STATUS_SUCCESS) { \
+ fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
+ exit(1); \
+ }
+
+#define CHECK_VASTATUS_RET(va_status, func) \
+ if (va_status != VA_STATUS_SUCCESS) { \
+ fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
+ return nullptr; \
+ }
+
+// From libjpeg (although it's of course identical between implementations).
+static const int jpeg_natural_order[DCTSIZE2] = {
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63,
+};
+
+VAResources get_va_resources(unsigned width, unsigned height)
+{
+ {
+ lock_guard<mutex> lock(va_resources_mutex);
+ for (auto it = va_resources_freelist.begin(); it != va_resources_freelist.end(); ++it) {
+ if (it->width == width && it->height == height) {
+ VAResources ret = *it;
+ va_resources_freelist.erase(it);
+ return ret;
+ }
+ }
+ }
+
+ VAResources ret;
+
+ ret.width = width;
+ ret.height = height;
+
+ VAStatus va_status = vaCreateSurfaces(va_dpy->va_dpy, VA_RT_FORMAT_YUV422,
+ width, height,
+ &ret.surface, 1, nullptr, 0);
+ CHECK_VASTATUS(va_status, "vaCreateSurfaces");
+
+ va_status = vaCreateContext(va_dpy->va_dpy, config_id, width, height, 0, &ret.surface, 1, &ret.context);
+ CHECK_VASTATUS(va_status, "vaCreateContext");
+
+ va_status = vaCreateImage(va_dpy->va_dpy, &uyvy_format, width, height, &ret.image);
+ CHECK_VASTATUS(va_status, "vaCreateImage");
+
+ return ret;
+}
+
+void release_va_resources(VAResources resources)
+{
+ lock_guard<mutex> lock(va_resources_mutex);
+ if (va_resources_freelist.size() > 10) {
+ auto it = va_resources_freelist.end();
+ --it;
+
+ VAStatus va_status = vaDestroyImage(va_dpy->va_dpy, it->image.image_id);
+ CHECK_VASTATUS(va_status, "vaDestroyImage");
+
+ va_status = vaDestroyContext(va_dpy->va_dpy, it->context);
+ CHECK_VASTATUS(va_status, "vaDestroyContext");
+
+ va_status = vaDestroySurfaces(va_dpy->va_dpy, &it->surface, 1);
+ CHECK_VASTATUS(va_status, "vaDestroySurfaces");
+
+ va_resources_freelist.erase(it);
+ }
+
+ va_resources_freelist.push_front(resources);
+}
+
+// RAII wrapper to release VAResources on return (even on error).
+class ReleaseVAResources {
+public:
+ ReleaseVAResources(const VAResources &resources)
+ : resources(resources) {}
+ ~ReleaseVAResources()
+ {
+ if (!committed) {
+ release_va_resources(resources);
+ }
+ }
+
+ void commit() { committed = true; }
+
+private:
+ const VAResources &resources;
+ bool committed = false;
+};
+
+VADisplayWithCleanup::~VADisplayWithCleanup()
+{
+ if (va_dpy != nullptr) {
+ vaTerminate(va_dpy);
+ }
+ if (x11_display != nullptr) {
+ XCloseDisplay(x11_display);
+ }
+ if (drm_fd != -1) {
+ close(drm_fd);
+ }
+}
+
+unique_ptr<VADisplayWithCleanup> va_open_display(const string &va_display)
+{
+ if (va_display.empty() || va_display[0] != '/') { // An X display.
+ Display *x11_display = XOpenDisplay(va_display.empty() ? nullptr : va_display.c_str());
+ if (x11_display == nullptr) {
+ fprintf(stderr, "error: can't connect to X server!\n");
+ return nullptr;
+ }
+
+ unique_ptr<VADisplayWithCleanup> ret(new VADisplayWithCleanup);
+ ret->x11_display = x11_display;
+ ret->va_dpy = vaGetDisplay(x11_display);
+ if (ret->va_dpy == nullptr) {
+ return nullptr;
+ }
+ return ret;
+ } else { // A DRM node on the filesystem (e.g. /dev/dri/renderD128).
+ int drm_fd = open(va_display.c_str(), O_RDWR);
+ if (drm_fd == -1) {
+ perror(va_display.c_str());
+ return nullptr;
+ }
+ unique_ptr<VADisplayWithCleanup> ret(new VADisplayWithCleanup);
+ ret->drm_fd = drm_fd;
+ ret->va_dpy = vaGetDisplayDRM(drm_fd);
+ if (ret->va_dpy == nullptr) {
+ return nullptr;
+ }
+ return ret;
+ }
+}
+
+unique_ptr<VADisplayWithCleanup> try_open_va(const string &va_display, string *error)
+{
+ unique_ptr<VADisplayWithCleanup> va_dpy = va_open_display(va_display);
+ if (va_dpy == nullptr) {
+ if (error)
+ *error = "Opening VA display failed";
+ return nullptr;
+ }
+ int major_ver, minor_ver;
+ VAStatus va_status = vaInitialize(va_dpy->va_dpy, &major_ver, &minor_ver);
+ if (va_status != VA_STATUS_SUCCESS) {
+ char buf[256];
+ snprintf(buf, sizeof(buf), "vaInitialize() failed with status %d\n", va_status);
+ if (error != nullptr)
+ *error = buf;
+ return nullptr;
+ }
+
+ int num_entrypoints = vaMaxNumEntrypoints(va_dpy->va_dpy);
+ unique_ptr<VAEntrypoint[]> entrypoints(new VAEntrypoint[num_entrypoints]);
+ if (entrypoints == nullptr) {
+ if (error != nullptr)
+ *error = "Failed to allocate memory for VA entry points";
+ return nullptr;
+ }
+
+ vaQueryConfigEntrypoints(va_dpy->va_dpy, VAProfileJPEGBaseline, entrypoints.get(), &num_entrypoints);
+ for (int slice_entrypoint = 0; slice_entrypoint < num_entrypoints; slice_entrypoint++) {
+ if (entrypoints[slice_entrypoint] != VAEntrypointVLD) {
+ continue;
+ }
+
+ // We found a usable decode, so return it.
+ return va_dpy;
+ }
+
+ if (error != nullptr)
+ *error = "Can't find VAEntrypointVLD for the JPEG profile";
+ return nullptr;
+}
+
+string get_usable_va_display()
+{
+ // Reduce the amount of chatter while probing,
+ // unless the user has specified otherwise.
+ bool need_env_reset = false;
+ if (getenv("LIBVA_MESSAGING_LEVEL") == nullptr) {
+ setenv("LIBVA_MESSAGING_LEVEL", "0", true);
+ need_env_reset = true;
+ }
+
+ // First try the default (ie., whatever $DISPLAY is set to).
+ unique_ptr<VADisplayWithCleanup> va_dpy = try_open_va("", nullptr);
+ if (va_dpy != nullptr) {
+ if (need_env_reset) {
+ unsetenv("LIBVA_MESSAGING_LEVEL");
+ }
+ return "";
+ }
+
+ fprintf(stderr, "The X11 display did not expose a VA-API JPEG decoder.\n");
+
+ // Try all /dev/dri/render* in turn. TODO: Accept /dev/dri/card*, too?
+ glob_t g;
+ int err = glob("/dev/dri/renderD*", 0, nullptr, &g);
+ if (err != 0) {
+ fprintf(stderr, "Couldn't list render nodes (%s) when trying to autodetect a replacement.\n", strerror(errno));
+ } else {
+ for (size_t i = 0; i < g.gl_pathc; ++i) {
+ string path = g.gl_pathv[i];
+ va_dpy = try_open_va(path, nullptr);
+ if (va_dpy != nullptr) {
+ fprintf(stderr, "Autodetected %s as a suitable replacement; using it.\n",
+ path.c_str());
+ globfree(&g);
+ if (need_env_reset) {
+ unsetenv("LIBVA_MESSAGING_LEVEL");
+ }
+ return path;
+ }
+ }
+ }
+
+ fprintf(stderr, "No suitable VA-API JPEG decoders were found in /dev/dri; giving up.\n");
+ fprintf(stderr, "Note that if you are using an Intel CPU with an external GPU,\n");
+ fprintf(stderr, "you may need to enable the integrated Intel GPU in your BIOS\n");
+ fprintf(stderr, "to expose Quick Sync.\n");
+ return "none";
+}
+
+void init_jpeg_vaapi()
+{
+ string dpy = get_usable_va_display();
+ if (dpy == "none") {
+ return;
+ }
+
+ va_dpy = try_open_va(dpy, nullptr);
+ if (va_dpy == nullptr) {
+ return;
+ }
+
+ VAConfigAttrib attr = { VAConfigAttribRTFormat, VA_RT_FORMAT_YUV422 };
+
+ VAStatus va_status = vaCreateConfig(va_dpy->va_dpy, VAProfileJPEGBaseline, VAEntrypointVLD,
+ &attr, 1, &config_id);
+ CHECK_VASTATUS(va_status, "vaCreateConfig");
+
+ int num_formats = vaMaxNumImageFormats(va_dpy->va_dpy);
+ assert(num_formats > 0);
+
+ unique_ptr<VAImageFormat[]> formats(new VAImageFormat[num_formats]);
+ va_status = vaQueryImageFormats(va_dpy->va_dpy, formats.get(), &num_formats);
+ CHECK_VASTATUS(va_status, "vaQueryImageFormats");
+
+ bool found = false;
+ for (int i = 0; i < num_formats; ++i) {
+ // Seemingly VA_FOURCC_422H is no good for vaGetImage(). :-/
+ if (formats[i].fourcc == VA_FOURCC_UYVY) {
+ memcpy(&uyvy_format, &formats[i], sizeof(VAImageFormat));
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ return;
+ }
+
+ fprintf(stderr, "VA-API JPEG decoding initialized.\n");
+ vaapi_jpeg_decoding_usable = true;
+}
+
+class VABufferDestroyer {
+public:
+ VABufferDestroyer(VADisplay dpy, VABufferID buf)
+ : dpy(dpy), buf(buf) {}
+
+ ~VABufferDestroyer() {
+ VAStatus va_status = vaDestroyBuffer(dpy, buf);
+ CHECK_VASTATUS(va_status, "vaDestroyBuffer");
+ }
+
+private:
+ VADisplay dpy;
+ VABufferID buf;
+};
+
+shared_ptr<Frame> decode_jpeg_vaapi(const string &jpeg)
+{
+ jpeg_decompress_struct dinfo;
+ jpeg_error_mgr jerr;
+ dinfo.err = jpeg_std_error(&jerr);
+ jpeg_create_decompress(&dinfo);
+ JPEGDestroyer destroy_dinfo(&dinfo);
+
+ jpeg_mem_src(&dinfo, reinterpret_cast<const unsigned char *>(jpeg.data()), jpeg.size());
+ jpeg_read_header(&dinfo, true);
+
+ if (dinfo.num_components != 3) {
+ fprintf(stderr, "Not a color JPEG. (%d components, Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
+ dinfo.num_components,
+ dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
+ dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
+ dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
+ return nullptr;
+ }
+ if (dinfo.comp_info[0].h_samp_factor != 2 ||
+ dinfo.comp_info[1].h_samp_factor != 1 ||
+ dinfo.comp_info[1].v_samp_factor != dinfo.comp_info[0].v_samp_factor ||
+ dinfo.comp_info[2].h_samp_factor != 1 ||
+ dinfo.comp_info[2].v_samp_factor != dinfo.comp_info[0].v_samp_factor) {
+ fprintf(stderr, "Not 4:2:2. (Y=%dx%d, Cb=%dx%d, Cr=%dx%d)\n",
+ dinfo.comp_info[0].h_samp_factor, dinfo.comp_info[0].v_samp_factor,
+ dinfo.comp_info[1].h_samp_factor, dinfo.comp_info[1].v_samp_factor,
+ dinfo.comp_info[2].h_samp_factor, dinfo.comp_info[2].v_samp_factor);
+ return nullptr;
+ }
+
+ // Picture parameters.
+ VAPictureParameterBufferJPEGBaseline pic_param;
+ memset(&pic_param, 0, sizeof(pic_param));
+ pic_param.picture_width = dinfo.image_width;
+ pic_param.picture_height = dinfo.image_height;
+ for (int component_idx = 0; component_idx < dinfo.num_components; ++component_idx) {
+ const jpeg_component_info *comp = &dinfo.comp_info[component_idx];
+ pic_param.components[component_idx].component_id = comp->component_id;
+ pic_param.components[component_idx].h_sampling_factor = comp->h_samp_factor;
+ pic_param.components[component_idx].v_sampling_factor = comp->v_samp_factor;
+ pic_param.components[component_idx].quantiser_table_selector = comp->quant_tbl_no;
+ }
+ pic_param.num_components = dinfo.num_components;
+ pic_param.color_space = 0; // YUV.
+ pic_param.rotation = VA_ROTATION_NONE;
+
+ VABufferID pic_param_buffer;
+ VAStatus va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAPictureParameterBufferType, sizeof(pic_param), 1, &pic_param, &pic_param_buffer);
+ CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
+ VABufferDestroyer destroy_pic_param(va_dpy->va_dpy, pic_param_buffer);
+
+ // Quantization matrices.
+ VAIQMatrixBufferJPEGBaseline iq;
+ memset(&iq, 0, sizeof(iq));
+
+ for (int quant_tbl_idx = 0; quant_tbl_idx < min(4, NUM_QUANT_TBLS); ++quant_tbl_idx) {
+ const JQUANT_TBL *qtbl = dinfo.quant_tbl_ptrs[quant_tbl_idx];
+ if (qtbl == nullptr) {
+ iq.load_quantiser_table[quant_tbl_idx] = 0;
+ } else {
+ iq.load_quantiser_table[quant_tbl_idx] = 1;
+ for (int i = 0; i < 64; ++i) {
+ if (qtbl->quantval[i] > 255) {
+ fprintf(stderr, "Baseline JPEG only!\n");
+ return nullptr;
+ }
+ iq.quantiser_table[quant_tbl_idx][i] = qtbl->quantval[jpeg_natural_order[i]];
+ }
+ }
+ }
+
+ VABufferID iq_buffer;
+ va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAIQMatrixBufferType, sizeof(iq), 1, &iq, &iq_buffer);
+ CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
+ VABufferDestroyer destroy_iq(va_dpy->va_dpy, iq_buffer);
+
+ // Huffman tables (arithmetic is not supported).
+ VAHuffmanTableBufferJPEGBaseline huff;
+ memset(&huff, 0, sizeof(huff));
+
+ for (int huff_tbl_idx = 0; huff_tbl_idx < min(2, NUM_HUFF_TBLS); ++huff_tbl_idx) {
+ const JHUFF_TBL *ac_hufftbl = dinfo.ac_huff_tbl_ptrs[huff_tbl_idx];
+ const JHUFF_TBL *dc_hufftbl = dinfo.dc_huff_tbl_ptrs[huff_tbl_idx];
+ if (ac_hufftbl == nullptr) {
+ assert(dc_hufftbl == nullptr);
+ huff.load_huffman_table[huff_tbl_idx] = 0;
+ } else {
+ assert(dc_hufftbl != nullptr);
+ huff.load_huffman_table[huff_tbl_idx] = 1;
+
+ for (int i = 0; i < 16; ++i) {
+ huff.huffman_table[huff_tbl_idx].num_dc_codes[i] = dc_hufftbl->bits[i + 1];
+ }
+ for (int i = 0; i < 12; ++i) {
+ huff.huffman_table[huff_tbl_idx].dc_values[i] = dc_hufftbl->huffval[i];
+ }
+ for (int i = 0; i < 16; ++i) {
+ huff.huffman_table[huff_tbl_idx].num_ac_codes[i] = ac_hufftbl->bits[i + 1];
+ }
+ for (int i = 0; i < 162; ++i) {
+ huff.huffman_table[huff_tbl_idx].ac_values[i] = ac_hufftbl->huffval[i];
+ }
+ }
+ }
+
+ VABufferID huff_buffer;
+ va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VAHuffmanTableBufferType, sizeof(huff), 1, &huff, &huff_buffer);
+ CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
+ VABufferDestroyer destroy_huff(va_dpy->va_dpy, huff_buffer);
+
+ // Slice parameters (metadata about the slice).
+ VASliceParameterBufferJPEGBaseline parms;
+ memset(&parms, 0, sizeof(parms));
+ parms.slice_data_size = dinfo.src->bytes_in_buffer;
+ parms.slice_data_offset = 0;
+ parms.slice_data_flag = VA_SLICE_DATA_FLAG_ALL;
+ parms.slice_horizontal_position = 0;
+ parms.slice_vertical_position = 0;
+ for (int component_idx = 0; component_idx < dinfo.num_components; ++component_idx) {
+ const jpeg_component_info *comp = &dinfo.comp_info[component_idx];
+ parms.components[component_idx].component_selector = comp->component_id;
+ parms.components[component_idx].dc_table_selector = comp->dc_tbl_no;
+ parms.components[component_idx].ac_table_selector = comp->ac_tbl_no;
+ if (parms.components[component_idx].dc_table_selector > 1 ||
+ parms.components[component_idx].ac_table_selector > 1) {
+ fprintf(stderr, "Uses too many Huffman tables\n");
+ return nullptr;
+ }
+ }
+ parms.num_components = dinfo.num_components;
+ parms.restart_interval = dinfo.restart_interval;
+ int horiz_mcus = (dinfo.image_width + (DCTSIZE * 2) - 1) / (DCTSIZE * 2);
+ int vert_mcus = (dinfo.image_height + DCTSIZE - 1) / DCTSIZE;
+ parms.num_mcus = horiz_mcus * vert_mcus;
+
+ VABufferID slice_param_buffer;
+ va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VASliceParameterBufferType, sizeof(parms), 1, &parms, &slice_param_buffer);
+ CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
+ VABufferDestroyer destroy_slice_param(va_dpy->va_dpy, slice_param_buffer);
+
+ // The actual data. VA-API will destuff and all for us.
+ VABufferID data_buffer;
+ va_status = vaCreateBuffer(va_dpy->va_dpy, config_id, VASliceDataBufferType, dinfo.src->bytes_in_buffer, 1, const_cast<unsigned char *>(dinfo.src->next_input_byte), &data_buffer);
+ CHECK_VASTATUS_RET(va_status, "vaCreateBuffer");
+ VABufferDestroyer destroy_data(va_dpy->va_dpy, data_buffer);
+
+ VAResources resources = get_va_resources(dinfo.image_width, dinfo.image_height);
+ ReleaseVAResources release(resources);
+
+ va_status = vaBeginPicture(va_dpy->va_dpy, resources.context, resources.surface);
+ CHECK_VASTATUS_RET(va_status, "vaBeginPicture");
+ va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &pic_param_buffer, 1);
+ CHECK_VASTATUS_RET(va_status, "vaRenderPicture(pic_param)");
+ va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &iq_buffer, 1);
+ CHECK_VASTATUS_RET(va_status, "vaRenderPicture(iq)");
+ va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &huff_buffer, 1);
+ CHECK_VASTATUS_RET(va_status, "vaRenderPicture(huff)");
+ va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &slice_param_buffer, 1);
+ CHECK_VASTATUS_RET(va_status, "vaRenderPicture(slice_param)");
+ va_status = vaRenderPicture(va_dpy->va_dpy, resources.context, &data_buffer, 1);
+ CHECK_VASTATUS_RET(va_status, "vaRenderPicture(data)");
+ va_status = vaEndPicture(va_dpy->va_dpy, resources.context);
+ CHECK_VASTATUS_RET(va_status, "vaEndPicture");
+
+ // vaDeriveImage() works, but the resulting image seems to live in
+ // uncached memory, which makes copying data out from it very, very slow.
+ // Thanks to FFmpeg for the observation that you can vaGetImage() the
+ // surface onto your own image (although then, it can't be planar, which
+ // is unfortunate for us).
+#if 0
+ VAImage image;
+ va_status = vaDeriveImage(va_dpy->va_dpy, surf, &image);
+ CHECK_VASTATUS_RET(va_status, "vaDeriveImage");
+#else
+ va_status = vaSyncSurface(va_dpy->va_dpy, resources.surface);
+ CHECK_VASTATUS_RET(va_status, "vaSyncSurface");
+
+ va_status = vaGetImage(va_dpy->va_dpy, resources.surface, 0, 0, dinfo.image_width, dinfo.image_height, resources.image.image_id);
+ CHECK_VASTATUS_RET(va_status, "vaGetImage");
+#endif
+
+ void *mapped;
+ va_status = vaMapBuffer(va_dpy->va_dpy, resources.image.buf, &mapped);
+ CHECK_VASTATUS_RET(va_status, "vaMapBuffer");
+
+ shared_ptr<Frame> frame(new Frame);
+#if 0
+ // 4:2:2 planar (for vaDeriveImage).
+ frame->y.reset(new uint8_t[dinfo.image_width * dinfo.image_height]);
+ frame->cb.reset(new uint8_t[(dinfo.image_width / 2) * dinfo.image_height]);
+ frame->cr.reset(new uint8_t[(dinfo.image_width / 2) * dinfo.image_height]);
+ for (int component_idx = 0; component_idx < dinfo.num_components; ++component_idx) {
+ uint8_t *dptr;
+ size_t width;
+ if (component_idx == 0) {
+ dptr = frame->y.get();
+ width = dinfo.image_width;
+ } else if (component_idx == 1) {
+ dptr = frame->cb.get();
+ width = dinfo.image_width / 2;
+ } else if (component_idx == 2) {
+ dptr = frame->cr.get();
+ width = dinfo.image_width / 2;
+ } else {
+ assert(false);
+ }
+ const uint8_t *sptr = (const uint8_t *)mapped + image.offsets[component_idx];
+ size_t spitch = image.pitches[component_idx];
+ for (size_t y = 0; y < dinfo.image_height; ++y) {
+ memcpy(dptr + y * width, sptr + y * spitch, width);
+ }
+ }
+#else
+ // Convert Y'CbCr to separate Y' and CbCr.
+ frame->is_semiplanar = true;
+ frame->y.reset(new uint8_t[dinfo.image_width * dinfo.image_height]);
+ frame->cbcr.reset(new uint8_t[dinfo.image_width * dinfo.image_height]);
+ const uint8_t *src = (const uint8_t *)mapped + resources.image.offsets[0];
+ if (resources.image.pitches[0] == dinfo.image_width * 2) {
+ memcpy_interleaved(frame->cbcr.get(), frame->y.get(), src, dinfo.image_width * dinfo.image_height * 2);
+ } else {
+ for (unsigned y = 0; y < dinfo.image_height; ++y) {
+ memcpy_interleaved(frame->cbcr.get() + y * dinfo.image_width, frame->y.get() + y * dinfo.image_width,
+ src + y * resources.image.pitches[0], dinfo.image_width * 2);
+ }
+ }
+#endif
+ frame->width = dinfo.image_width;
+ frame->height = dinfo.image_height;
+ frame->chroma_subsampling_x = 2;
+ frame->chroma_subsampling_y = 1;
+ frame->pitch_y = dinfo.image_width;
+ frame->pitch_chroma = dinfo.image_width / 2;
+
+ va_status = vaUnmapBuffer(va_dpy->va_dpy, resources.image.buf);
+ CHECK_VASTATUS_RET(va_status, "vaUnmapBuffer");
+
+ return frame;
+}
--- /dev/null
+#ifndef _VAAPI_JPEG_DECODER_H
+#define _VAAPI_JPEG_DECODER_H 1
+
+#include <X11/Xlib.h>
+#include <memory>
+#include <string>
+#include <va/va.h>
+
+struct Frame;
+
+struct VADisplayWithCleanup {
+ ~VADisplayWithCleanup();
+
+ VADisplay va_dpy;
+ Display *x11_display = nullptr;
+ int drm_fd = -1;
+};
+std::unique_ptr<VADisplayWithCleanup> va_open_display(const std::string &va_display); // Can return nullptr on failure.
+std::string get_usable_va_display();
+
+void init_jpeg_vaapi();
+std::shared_ptr<Frame> decode_jpeg_vaapi(const std::string &jpeg);
+
+extern bool vaapi_jpeg_decoding_usable;
+
+#endif // !defined(_VAAPI_JPEG_DECODER_H)
--- /dev/null
+Variational refinement -- an introduction and derivation
+
+The variational refinement is probably the most difficult part of the
+algorithm to understand, in part because the description in most papers
+are very heavy on notation and rather light on exposition. I've tried
+to give a somewhat friendlier introduction to this specific algorithm
+below.
+
+The general idea is fairly simple; we try to optimize the flow field
+as a whole, by minimizing some mathematical notion of badness expressed
+as an energy function. The one used in the dense inverse search paper
+[Kroeger16; se references below] has this form:
+
+ E(U) = int( σ Ψ(E_I) + γ Ψ(E_G) + α Ψ(E_S) ) dx
+
+where Ψ(a²) = sqrt(a² + ε²) for some small constant ε = 0.001, and
+σ, γ, α are empirically set weighting constants. (We'll get to what the
+different enery terms are in a minute.) The integral is, for all practical
+purposes, just a sum over all the pixels in the flow.
+
+In general, such formulas are nonconvex and highly nonlinear, so we
+cannot hope to find a global minimum -- but if we start from the flow
+generated by the motion search, we can at least hope to make it somehow
+better by walking towards a local minimum. (In fact, there are many
+methods for optical flow that work _only_ by such minimization,
+so the word “refinement” is maybe not doing the method justice.
+One could just as well say that the motion search is a way of
+finding a reasonable starting point for the optimization.)
+
+The dense inverse search paper [Kroeger16; se references below] sets
+up the energy terms as described by some motion tensors and normalizations,
+then says simply that it is optimized by “θ_vo fixed point iterations
+and θ_vi iterations of Successive Over Relaxation (SOR) for the linear
+system”. It's not immediately obvious what this means, but it gives
+a reference to [Brox04]. However, that paper describes a numerical
+approximation scheme that is _far_ more complicated than what the DIS
+code actually does.
+
+Rather, one must look at the other main reference they are giving,
+which is [Weinzaepfel13], describing a system called DeepFlow.
+DIS borrows most of the exposition and code for its variational
+refinement from DeepFlow, just removing some terms and fixing up
+a few issues here and there. (There are some slight differences in
+the paper, like the use of ∂z instead of ∂t, but that looks mostly
+like an error to me.) Unfortunately, that paper in turn refers to
+[Brox11], which appears no more useful in clearing up the notation
+to me.
+
+However, digging down in the references, finally one finds [Zimmer11],
+which is where the tensor notation appears to come from. This allows
+us to look at the first term in the energy, E_I, which comes from the
+intensity constant assumption. The basic idea is optical flow nearly
+by definition should preserve intensity after the warp:
+
+ I_0(x + u) = I_1(x)
+
+where I_0 is the first picture, I_1 is the second, x is any 2D
+coordinate and u is the flow at x (which we are optimizing over).
+In general, we'll be optimizing over the entire field of u
+(potentially hundreds of thousands of values), but we'll be looking
+mostly at individual points, so we'll skip the coordinates when we
+can (e.g. we write u instead of or u(x, y)). u is of course the 2D
+flow, although often, we'll write its components separately as u and v
+instead of as a vector u.
+
+Before we go further, we need to add some more notation:
+
+ * I_x is the partial derivative of I with respect to x (at some
+ point), and similarly for I_y. These do not depend on u,
+ so they can be precalculated before the optimization.
+ * I_xx is the double partial derivative of I, and similar for
+ I_yy and I_xy (the latter is the same as I_yx).
+ * I_t is the temporal derivative of I, ie. in practice just
+ I_t(x) = I_1(x) - I_0(x).
+
+Returning now to our original assertion:
+
+ I_0(x + u) = I_1(x)
+
+Classically in optical flow, one assumes that the flow is smooth
+and linear around the point x, which allows one to approximate this
+equation by
+
+ I_x u + I_y v + I_t = 0
+
+This is usually simply called “the optical flow constraint”,
+and gives rise to a very natural part of the energy:
+
+ E_I = I_x u + I_y v + I_t
+
+Remember that we send E_I through the function Ψ(a²) = sqrt(a² + ε²),
+so clearly Ψ(E_I) will be minimized if indeed E_I is zero.
+
+At this point, many papers start talking about Euler-Lagrange
+multivariate equations, which is a fairly daunting concept
+(at least the Wikipedia page is suitable for scaring small children).
+However, for the first two terms, we don't need its general form,
+and it reduces to something much simpler; just differentiate the energy
+by u and equate the result to zero (finding some minimum; it can't be
+a maximum, since *wave hands intensely*). Then differentiate the energy
+by v and set that to zero, too; now you have two equations in two
+unknowns (or, since we're optimizing over a field, maybe 500k
+equations in 500k unknowns -- although the equation set will be
+very sparse), which is hopefully solvable using linear methods.
+We'll look at what this gives for E_I in a moment, then try to apply
+the same notions to E_G and E_S later.
+
+First we modify E_I a bit by adding some normalization:
+
+ E_I = β_0 (I_x u + I_y v + I_t)
+
+where β_0 = 1/(abs(∇I)² + 0.01). Note that β_0 depends on I only,
+so for the purposes of optimizing u, it's a constant and can be
+precomputed across I. (β_0 will, of course, depend on x, but so
+do all the other terms in the equation.)
+
+Now we give it to Maple, differentiating first by u and then by v:
+
+> M := (u,v) -> B_0 * (I_x * u + I_y * v + I_t);
+ M := (u, v) -> B_0 (I_x u + I_y v + I_t)
+
+> diff(sqrt(M(u,v)^2 + e), u);
+ 2
+ B_0 (I_x u + I_y v + I_t) I_x
+ ------------------------------------
+ 2 2 1/2
+ (B_0 (I_x u + I_y v + I_t) + e)
+
+> diff(sqrt(M(u,v)^2 + e), v);
+ 2
+ B_0 (I_x u + I_y v + I_t) I_y
+ ------------------------------------
+ 2 2 1/2
+ (B_0 (I_x u + I_y v + I_t) + e)
+
+
+So these are the two expressions to be set to zero (for each
+point). We'll notice immediately that this isn't very linear
+in u and v, so here's where the “fixed point iterations” come in;
+we simply assume that our previous values for u and v are
+approximately good enough for the denominator, and optimize
+them in the numerator only. Then we get new values that are
+hopefully a bit closer, which we can then use for the
+denominator, and so on. (This is seemingly an old technique;
+[Brox05] cites [Ciarlet78]. It is justifiable in the sense
+that the only thing really held constant is the derivative
+of the penalizer.) In other words, if we define the constant
+
+ k1 = β_0² / sqrt(β_0² (I_x u' + I_y v' + I_t)² + ε²)
+
+(where u' and v' are the guesses for u and v from the previous
+iteration)
+
+we have the much more manageable
+
+ k1 I_x² u + k1 I_x I_y v = - k1 I_t I_x
+ k1 I_x I_y u + k1 I_y² v = - k1 I_t I_y
+
+ie., two linear equations in u and v. Now, you will notice two
+immediate problems by this equation set:
+
+ * The factor k1 is completely useless, since it's just multiplied
+ in everywhere.
+ * The set of equations is colinear (the determinant of the matrix
+ is zero), and thus there is an infinite number of possible
+ solutions—this is known as the so-called “aperture problem”.
+ It shouldn't be surprising, though, as we cannot expect that
+ starting with a single constraint should allow us to solve
+ for two unknowns.
+
+However, both problems will go away as soon as we start adding
+more terms, so let's look at the gradient constancy term E_G next.
+It is fairly similar to the brightness constancy term, except it
+uses the (spatial) gradient instead of intensity:
+
+ ∇I_0(x + u) = ∇I_1(x)
+
+or equivalently (by definition):
+
+ (∂I/∂x)_0(x + u) = (∂I/∂x)_1(x)
+ (∂I/∂y)_0(x + u) = (∂I/∂y)_1(x)
+
+The idea is that this is more robust to changes in lighting.
+It doesn't replace the intensity term, but augments it; the weighting
+constants σ and γ control their relative importance. Also note that
+this actually gives us two independent equations, unlike the brightness
+constancy term.
+
+However, it is not obvious at all how to discretize this. In particular,
+most papers, including [Brox04], appear to want _not_ to make any linear
+assumptions of the flow in this case, and end up with tons of terms.
+(The DIS and DeepFlow papers do, again, use some tensor notation that
+I do not understand, but I'm not convinced it actually contains any
+of the discretization.)
+
+Yet more paper searching eventually turns up [Fahad07], which simply
+states that the discretized versions of these equations are:
+
+ I_xx u + I_xy v + I_xt = 0
+ I_yx u + I_yy v + I_yt = 0.
+
+which seems to match well what the DIS code uses. Note that even though
+this is an equation set equal to zero, we can't just solve for them;
+we need to make (penalized, normalized) energy terms and add them to
+the other terms. This gives
+
+ E_G = β_x (I_xx u + I_xy v + I_xt) + β_y (I_yx u + I_yy v + I_yt)
+
+with normalization terms
+
+ β_x = 1 / (abs(∇(I_x))² + 0.01) (∇(I_x) is the gradient of ∂I/∂x)
+ β_y = 1 / (abs(∇(I_y))² + 0.01)
+
+(The DIS paper writes ∇I_dx and ∇I_dy instead of ∇I_x and ∇I_y, but I believe
+that's a typo; the DeepFlow paper says ∇I_x and ∇I_y.)
+
+The papers both write that Ψ(E_G) is used, which would mean that the penalized
+term is
+
+ E_G = sqrt((β_x (I_xx u + I_xy v + I_xt) + β_y (I_yx u + I_yy v + I_yt))² + ε²)
+
+but that isn't what the code actually does. Instead, it seems that the two
+terms are squared independently:
+
+ E_G = sqrt((β_x (I_xx u + I_xy v + I_xt))² + (β_y (I_yx u + I_yy v + I_yt))² + ε²)
+
+Both are solvable just fine, and it probably does not matter all that much
+which we use in practice (although [Zimmer11] suggests that if we are using
+multichannel images, we should penalize the three channels separately),
+but we follow what the code actually does here.
+
+We can differentiate them and equate them to zero as before:
+
+> M_x := (u,v) -> B_x * (I_xx * u + I_xy * v + I_xt);
+ M_x := (u, v) -> B_x (I_xx u + I_xy v + I_xt)
+
+> M_y := (u,v) -> B_y * (I_xy * u + I_yy * v + I_yt);
+ M_y := (u, v) -> B_y (I_xy u + I_yy v + I_yt)
+
+> diff(sqrt(M_x(u,v)^2 + M_y(u,v)^2 + e), u);
+ 2 2
+ 2 (I_xx u + I_xy v + I_xt) B_x I_xx + 2 B_y (I_xy u + I_yy v + I_yt) I_xy
+ ---------------------------------------------------------------------------
+ 2 2 2 2 1/2
+ 2 ((I_xx u + I_xy v + I_xt) B_x + B_y (I_xy u + I_yy v + I_yt) + e)
+
+> diff(sqrt(M_x(u,v)^2 + M_y(u,v)^2 + e), v);
+ 2 2
+ 2 (I_xx u + I_xy v + I_xt) B_x I_xy + 2 B_y (I_xy u + I_yy v + I_yt) I_yy
+ ---------------------------------------------------------------------------
+ 2 2 2 2 1/2
+ 2 ((I_xx u + I_xy v + I_xt) B_x + B_y (I_xy u + I_yy v + I_yt) + e)
+
+Using the same fixed-point scheme where we hold the terms in the
+denominator constant and equal to last iteration's values, we get
+a new common constant
+
+ k2 = 1 / sqrt(β_x² (I_xx u' + I_xy v' + I_xt)² + β_y² (I_xy u' + I_yy v' + I_yt)²)
+
+and for brevity
+
+ k_x = k2 β_x²
+ k_y = k2 β_y²
+
+and thus, collecting terms for u and v, we get the two equations:
+
+ (k_x I_xx² + k_y I_xy²) u + (k_x I_xx I_xy + k_y I_xy I_yy) v = - k_x I_xx I_xt - k_y I_xy I_yt
+ (k_x I_xx I_xy + k_y I_xy I_yy) u + (k_x I_xy² + k_y I_yy²) v = - k_x I_xy I_xt - k_y I_yy I_yt
+
+which is linear in u and v, not colinear (unless we are extremely
+unlucky), and can be easily solved.
+
+Of course, for optimizing the weighted sum σ Ψ(E_I) + γ Ψ(E_G),
+we just add the two equation sets pairwise with appropriate weights.
+
+There's a small discrepancy here: The equations suggest that we should
+be be squaring the normalization terms β_0², β_x², β_y²; however, the
+code does not appear to do so. It's possible that they were intended to be
+added outside of the penalization, e.g. Ψ(a²) = sqrt(β a² + ε²), but given
+that these come from [Zimmer11], which mentions nothing of the sort,
+I'll just have to assume that this is an implementation mishap.
+
+The final smoothness term the one that binds the flow field together as a whole
+so that we don't have WxH completely independent equations (with its positive
+and negative sides, of course). It is the simplest in terms of notation,
+but it requires the full power of the Euler-Lagrange equations to minimize,
+so we'll need to figure that part out.
+
+ E_S = abs(∇u)² + abs(∇v)²
+
+or
+
+ E_S = (u_x² + u_y²) + (v_x² + v_y²)
+
+The penalized form used in the DeepFlow notation, contrary to what you'd expect
+from the paper, is:
+
+ E_S = sqrt(u_x² + u_y² + v_x² + v_y² + ε²)
+
+How would one go about to minimize such an expression by u? (We'll deal with v
+later.) It's perhaps no big surprise that the expression involves double
+derivatives, but the full form involves the Euler-Lagrange equations.
+They allow us to minimize expressions that contain x, y, u(x, y) _and_ the partial
+derivatives u_x(x, y) and u_y(x, y), although the answer becomes a differential
+equation.
+
+The Wikipedia page is, unfortunately, not very beginner-friendly,
+but the general idea is: Differentiate the expression by u_x
+(yes, differentiating by a partial derivative!), negate it, and then
+differentiate the result by x. Then do the same thing by u_y and y,
+add the two results together and equate to zero. Mathematically
+(https://en.wikipedia.org/wiki/Euler%E2%80%93Lagrange_equation#Several_functions_of_several_variables_with_single_derivative):
+
+ ∂E/∂u - ∂/∂x (∂E/∂u_x) - ∂/∂y (∂E/∂u_y) = 0
+
+The first term disappears, since we don't have a non-differentiated
+u(x, y) in E_S. (Previously, the two _other_ terms would disappear,
+because we didn't have u_x or u_y in E_I or E_G.) This means we get
+
+ - ∂/∂x (u_x / sqrt(u_x² + u_y² + v_x² + v_y² + ε²)) - ∂/∂y (u_y / sqrt(u_x² + u_y² + v_x² + v_y² + ε²)) = 0
+
+(We don't remove the minus signs since this is supposed to be added to
+all the other terms.)
+
+This is what's called an _anisotropic diffusion_ (or Perona–Malik diffusion)
+equation, and is extensively described in literature. It has the effect of
+smoothing the flow more in some places than others; in particular, it does
+not smooth as strongly near edges, so it is edge-preserving. (It's a bit odd to
+call it anisotropic, since it does smooth equally in all directions;
+[Brox05] calls it vector-valued diffusion.)
+
+We'd love to our usual trick of keeping the nonlinear terms in the denominator
+constant, but alas, we can't do that yet, since it's under the differentiation
+operator; this factor has to be discretized together with u before we can treat
+it as a constant. So instead, we'll define it as a function (called the
+_diffusivity_ at the given point):
+
+ g(x, y) = 1 / sqrt(u_x² + u_y² + v_x² + v_y² + ε²) = 0
+
+which gives us
+
+ - ∂/∂x ( g(x, y) u_x ) - ∂/∂y ( g(x, y) u_y ) = 0
+
+We'll also have a similar equation for minimizing v, of course:
+
+ - ∂/∂x ( g(x, y) v_x ) - ∂/∂y ( g(x, y) v_y ) = 0
+
+There's no normalization term β here, unlike the other terms; DeepFlow2
+adds one, but we're not including it here.
+
+At this point, we make a tweak. This seemingly goes back to at least
+[Brox04], which also makes the same tweak to all the other terms
+(which we don't, but see below). We split u (and v) into something
+based on the original value plus a differential du (and dv), and then
+solve for du (or dv) instead. (In math-speak, we are moving to an
+implicit method, which is often more numerically stable.) In other words,
+
+ u(x, y) = u0(x, y) + du(x, y)
+
+where u0(x, y) is the initial guess for the flow. (It's not the value
+from previous iteration, for reasons that will be clear later, it's
+the first one. [Brox04] differs here, but it does a number of things
+differently in the numerics anyway.)
+
+This gives us:
+
+ - ∂/∂x ( g(x, y) (u0 + du)_x ) - ∂/∂y ( g(x, y) (u0 + du)_y ) = 0
+
+or
+
+ - ∂/∂x ( g(x, y) du_x ) - ∂/∂y ( g(x, y) du_y ) = ∂/∂x ( g(x, y) u0_x ) + ∂/∂y ( g(x, y) u0_y )
+
+where the right-hand side is effectively a constant for these purposes
+(although it still needs to be calculated anew for each iteration,
+since g(x, y) changes).
+
+Of course, now we have a different problem; all the other terms are
+formulated in terms of u and v, not du and dv. DeepFlow solves this
+by not searching for the flow between I_0 and I_1, but between I_0 and
+a pre-warped I_1. In other words, before any of the derivatives involving
+I_t are calculated, we calculate an I_w with bilinear interpolation:
+
+ I_w(x, y) = I_1(x + u0(x, y), y + v0(x, y))
+
+and then redefine I_t (occasionally called I_z) as
+
+ I_t(x, y) = I_w(x, y) - I_0(x, y)
+
+Note that the plus sign effectively means inverting the flow, so if
+the u0 and v0 were already correctly estimated, perfectly smooth and linear
+everywhere, I_w = I_0. (All spatial derivatives are calculated on the mean
+between I_0 and I_w; the paper doesn't mention this.) After this, all the
+equations for E_I and E_G earlier will still hold, they will just be
+calculating du and dv instead. Note that this means we have three values
+for the flow; there's u0 for the initial guess, du for the current guess
+of delta from u0 (which makes u0 + du the current guess of the flow),
+and du' for the previous guess of delta from u0. (The initial values for
+du' and dv' will be zero.)
+
+Now back to our equations, as we look at practical implementation:
+
+ - ∂/∂x ( g(x, y) du_x ) - ∂/∂y ( g(x, y) du_y ) = ∂/∂x ( g(x, y) u0_x ) + ∂/∂y ( g(x, y) u0_y )
+ - ∂/∂x ( g(x, y) dv_x ) - ∂/∂y ( g(x, y) dv_y ) = ∂/∂x ( g(x, y) v0_x ) + ∂/∂y ( g(x, y) v0_y )
+
+We can discretize the left-hand and right-hand side identically (they differ
+only in signs and in variable), so let's look only at
+
+ - ∂/∂x ( g(x, y) du_x ) - ∂/∂y ( g(x, y) du_y )
+
+[Brox05] equation (2.14) (which refers to a 1998 book, although I couldn't
+immediately find the equation in question in that book) discretizes this as
+
+ - 1/2 (g(x+1, y) + g(x, y)) (du(x+1, y) - du(x, y))
+ + 1/2 (g(x-1, y) + g(x, y)) (du(x, y) - du(x-1, y))
+ - 1/2 (g(x, y+1) + g(x, y)) (du(x, y+1) - du(x, y))
+ + 1/2 (g(x, y-1) + g(x, y)) (du(x, y) - du(x, y-1))
+
+It also mentions that it would be better to sample g at the half-way points,
+e.g. g(x+0.5, y), but that begs the question exactly how we'd do that, and
+DeepFlow doesn't seem to care, so we stick with their version.
+
+Now we can finally let g use the values of the flow (note that this is the
+actual flow u and v, not du and dv!) from the previous iteration, as before:
+
+ g(x, y) = 1 / sqrt(u'_x² + u'_y² + v'_x² + v'_y² + ε²)
+
+The single derivatives in g(x) are approximated by standard central differences
+(see https://en.wikipedia.org/wiki/Finite_difference_coefficient), e.g.
+
+ u_x(x, y) = 1/2 (u(x + 1, y) - u(x - 1, y))
+
+although the derivatives of I are using the fancier
+
+ I_x(x, y) = 1/12 (-I(x - 2, y) + 8 I(x - 1, y) - 8 I(x - 1, y) + I(x - 2, y))
+
+I assume this is because I_x derivatives are calculated only once, so we can
+afford more accurate derivatives (or possibly simply because of influence
+from earlier papers).
+
+Let's now define a smoothness constant between the neighbors (x,y) and (x1,y1):
+
+ s(x1, y1) = 1/2 (g(x, y) + g(x1, y1))
+
+Collecting all the du(x, y) terms of the discretized equation above,
+ignoring the right-hand side, which is just a constant for us anyway:
+
+ - s(x+1, y) (du(x+1, y) - du(x, y))
+ + s(x-1, y) (du(x, y) - du(x-1, y))
+ - s(x, y+1) (du(x, y+1) - du(x, y))
+ + s(x, y-1) (du(x, y) - du(x, y-1)) = C
+
+ - s(x+1, y) du(x+1, y) + s(x+1, y) du(x, y)
+ + s(x-1, y) du(x, y) - s(x-1, y) du(x-1, y)
+ - s(x, y+1) du(x, y+1) + s(x, y+1) du(x, y)
+ + s(x, y-1) du(x, y) - s(x, y-1) du(x, y-1) = C
+
+ (s(x+1, y) + s(x-1, y) + s(x, y+1) + s(x, y-1)) du(x, y) =
+ s(x+1, y) du(x+1, y) + s(x-1, y) du(x-1, y) + s(x, y+1) du(x, y+1) + s(x, y-1) du(x, y-1) + C
+
+It is interesting to note that if s = 1 uniformly, which would be the case
+without our penalizer Ψ(a²), we would have the familiar discrete Laplacian,
+where du(x, y) would seek to simply become the average of its four immediate
+neighbors.
+
+Now our equation system is finally complete and linear, and the rest is
+fairly pedestrian. The last term connects all the unknowns together,
+but we still solve them mostly as 2x2 matrices. The most basic iterative
+method is Jacobi, where we solve du(x, y) and dv(x,y) using the
+previous iteration's value for all other du/dv values. (That this converges
+at all it beyond this text to prove, but it does. Not that we bother
+iterating until it converges; a few iterations is good enough.)
+Gauss-Seidel iterations improve on this in that (surprisingly!) using this
+iteration's computed du/dv values if they're ready; this improves convergence,
+but is hard to parallelize.
+
+Successive over-relaxation (SOR) improves further on this, in that it
+assumes that the solution moves towards the right value, so why not
+just go a bit further? That is, if Gauss-Seidel would tell you to increase
+the flow by 1.0 pixel to the right, perhaps go 1.5 pixels to the right
+instead (this value is called ω). Again, the convergence proof is beyond the
+scope here, but SOR converges for any ω between 1 and 2 (1 gives plain
+Gauss-Seidel, and over 2, we risk overshooting and never converging). Optimal
+ω depends on the equation system; DIS uses ω = 1.6, which presumably was
+measured, while we do ω = 1.8 (seems to be marginally better after some
+light testing).
+
+Efficient GPU implementation of SOR is not trivial; like noted before,
+Gauss-Seidel is inherently serial, which is a poor match for the GPU.
+Worse, doing SOR with Jacobi as base instead of Gauss-Seidel makes for
+an algorithm which simply does not converge. We solve this by using a
+method called red-black SOR (not to be confused with red-black binary
+trees). Conceptually, it assigns every unknown a color, with every other
+being red or black (similar to a checkerboard). Since red values now
+only depend on black values and vice versa, one can do all red values
+in parallel, then all black values, and so on. (This is equivalent to
+reordering the equation set; different such orderings can have different
+convergence speeds.)
+
+Our GPU SOR implementation is not overly efficient, so essentially one such
+half-iteration of red-black SOR costs the same as one full iteration of
+Jacobi but convergence is so much faster that it's worth it. Generally
+speaking, Gauss-Seidel converges twice as fast as Jacobi (ie., if Jacobi
+converges in N iterations, Gauss-Seidel does so in N/2), but SOR converges
+_geometrically_ faster, ie., in O(√N) iterations.
+
+Do note that the DeepFlow code does not fully use SOR or even Gauss-Seidel;
+it solves every 2x2 block (ie., single du/dv pair) using Cramer's rule,
+and then pushes that vector 60% further, SOR-style. This would be clearly
+more accurate if we didn't have SOR in the mix (since du and dv would
+converge immediately relative to each other, bar Cramer's numerical issues),
+but I'm not sure whether it's better given SOR. (DIS changes this to a more
+traditional SOR formulation, which we also use. It doesn't seem to be much
+different in practical testing; perhaps minutely worse, but I haven't done
+a deep analysis here.)
+
+And that's it. References:
+
+[Brox04]: Brox, Bruhn, Papenberg, Weickert: “High Accuracy Optical Flow
+ Estimation Based on a Theory for Warping”, in Proceedings of the European
+ Conference on Computer Vision (ECCV), 2004
+[Brox05]: Brox: “From Pixels to Regions: Partial Differential Equations in
+ Image Analysis”, PhD thesis, 2005
+[Brox11]: Brox, Malik: “Large Displacement Optical Flow: Descriptor Matching in
+ Variational Motion Estimation”, IEEE Transactions on Pattern Analysis and
+ Machine Intelligence, 2011
+[Ciarlet78]: Ciarlet: “The Finite Element Method for Elliptic Problems”, 1978
+[Fahad07]: Fahad, Morris: “Multiple Combined Constraints for Optical Flow
+ Estimation”, in Proceedings of the 3rd International Conference on Advances
+ in Visual Computing (ISVC), 2007
+[Kroeger16]: Kroeger, Timofte, Dai, van Gool: “Fast Optical Flow using Dense
+ Inverse Search”, in Proceedings of the European Conference on Computer Vision
+ (ECCV), 2016
+[Weinzaepfel13]: Weinzaepfel, Revaud, Harchaoui, Schmid: “DeepFlow: Large
+ displacement optical flow with deep matching”, in IEEE International Conference
+ on Computer Vision (ICCV), 2013
+[Zimmer11]: Zimmer, Bruhn, Weickert: “Optic Flow in Harmony”, International
+ Journal of Computer Vision, 2011
--- /dev/null
+#include "video_stream.h"
+
+extern "C" {
+#include <libavformat/avformat.h>
+#include <libavformat/avio.h>
+}
+
+#include "chroma_subsampler.h"
+#include "shared/context.h"
+#include "flags.h"
+#include "flow.h"
+#include "shared/httpd.h"
+#include "jpeg_frame_view.h"
+#include "movit/util.h"
+#include "shared/mux.h"
+#include "player.h"
+#include "util.h"
+#include "ycbcr_converter.h"
+
+#include <epoxy/glx.h>
+#include <jpeglib.h>
+#include <unistd.h>
+
+using namespace std;
+using namespace std::chrono;
+
+extern HTTPD *global_httpd;
+
+struct VectorDestinationManager {
+ jpeg_destination_mgr pub;
+ std::vector<uint8_t> dest;
+
+ VectorDestinationManager()
+ {
+ pub.init_destination = init_destination_thunk;
+ pub.empty_output_buffer = empty_output_buffer_thunk;
+ pub.term_destination = term_destination_thunk;
+ }
+
+ static void init_destination_thunk(j_compress_ptr ptr)
+ {
+ ((VectorDestinationManager *)(ptr->dest))->init_destination();
+ }
+
+ inline void init_destination()
+ {
+ make_room(0);
+ }
+
+ static boolean empty_output_buffer_thunk(j_compress_ptr ptr)
+ {
+ return ((VectorDestinationManager *)(ptr->dest))->empty_output_buffer();
+ }
+
+ inline bool empty_output_buffer()
+ {
+ make_room(dest.size()); // Should ignore pub.free_in_buffer!
+ return true;
+ }
+
+ inline void make_room(size_t bytes_used)
+ {
+ dest.resize(bytes_used + 4096);
+ dest.resize(dest.capacity());
+ pub.next_output_byte = dest.data() + bytes_used;
+ pub.free_in_buffer = dest.size() - bytes_used;
+ }
+
+ static void term_destination_thunk(j_compress_ptr ptr)
+ {
+ ((VectorDestinationManager *)(ptr->dest))->term_destination();
+ }
+
+ inline void term_destination()
+ {
+ dest.resize(dest.size() - pub.free_in_buffer);
+ }
+};
+static_assert(std::is_standard_layout<VectorDestinationManager>::value, "");
+
+vector<uint8_t> encode_jpeg(const uint8_t *y_data, const uint8_t *cb_data, const uint8_t *cr_data, unsigned width, unsigned height)
+{
+ VectorDestinationManager dest;
+
+ jpeg_compress_struct cinfo;
+ jpeg_error_mgr jerr;
+ cinfo.err = jpeg_std_error(&jerr);
+ jpeg_create_compress(&cinfo);
+
+ cinfo.dest = (jpeg_destination_mgr *)&dest;
+ cinfo.input_components = 3;
+ cinfo.in_color_space = JCS_RGB;
+ jpeg_set_defaults(&cinfo);
+ constexpr int quality = 90;
+ jpeg_set_quality(&cinfo, quality, /*force_baseline=*/false);
+
+ cinfo.image_width = width;
+ cinfo.image_height = height;
+ cinfo.raw_data_in = true;
+ jpeg_set_colorspace(&cinfo, JCS_YCbCr);
+ cinfo.comp_info[0].h_samp_factor = 2;
+ cinfo.comp_info[0].v_samp_factor = 1;
+ cinfo.comp_info[1].h_samp_factor = 1;
+ cinfo.comp_info[1].v_samp_factor = 1;
+ cinfo.comp_info[2].h_samp_factor = 1;
+ cinfo.comp_info[2].v_samp_factor = 1;
+ cinfo.CCIR601_sampling = true; // Seems to be mostly ignored by libjpeg, though.
+ jpeg_start_compress(&cinfo, true);
+
+ JSAMPROW yptr[8], cbptr[8], crptr[8];
+ JSAMPARRAY data[3] = { yptr, cbptr, crptr };
+ for (unsigned y = 0; y < height; y += 8) {
+ for (unsigned yy = 0; yy < 8; ++yy) {
+ yptr[yy] = const_cast<JSAMPROW>(&y_data[(y + yy) * width]);
+ cbptr[yy] = const_cast<JSAMPROW>(&cb_data[(y + yy) * width / 2]);
+ crptr[yy] = const_cast<JSAMPROW>(&cr_data[(y + yy) * width / 2]);
+ }
+
+ jpeg_write_raw_data(&cinfo, data, /*num_lines=*/8);
+ }
+
+ jpeg_finish_compress(&cinfo);
+ jpeg_destroy_compress(&cinfo);
+
+ return move(dest.dest);
+}
+
+VideoStream::VideoStream()
+{
+ ycbcr_converter.reset(new YCbCrConverter(YCbCrConverter::OUTPUT_TO_DUAL_YCBCR, /*resource_pool=*/nullptr));
+ ycbcr_semiplanar_converter.reset(new YCbCrConverter(YCbCrConverter::OUTPUT_TO_SEMIPLANAR, /*resource_pool=*/nullptr));
+
+ GLuint input_tex[num_interpolate_slots], gray_tex[num_interpolate_slots];
+ GLuint fade_y_output_tex[num_interpolate_slots], fade_cbcr_output_tex[num_interpolate_slots];
+ GLuint cb_tex[num_interpolate_slots], cr_tex[num_interpolate_slots];
+
+ glCreateTextures(GL_TEXTURE_2D_ARRAY, num_interpolate_slots, input_tex);
+ glCreateTextures(GL_TEXTURE_2D_ARRAY, num_interpolate_slots, gray_tex);
+ glCreateTextures(GL_TEXTURE_2D, num_interpolate_slots, fade_y_output_tex);
+ glCreateTextures(GL_TEXTURE_2D, num_interpolate_slots, fade_cbcr_output_tex);
+ glCreateTextures(GL_TEXTURE_2D, num_interpolate_slots, cb_tex);
+ glCreateTextures(GL_TEXTURE_2D, num_interpolate_slots, cr_tex);
+ check_error();
+
+ constexpr size_t width = 1280, height = 720; // FIXME: adjustable width, height
+ int levels = find_num_levels(width, height);
+ for (size_t i = 0; i < num_interpolate_slots; ++i) {
+ glTextureStorage3D(input_tex[i], levels, GL_RGBA8, width, height, 2);
+ check_error();
+ glTextureStorage3D(gray_tex[i], levels, GL_R8, width, height, 2);
+ check_error();
+ glTextureStorage2D(fade_y_output_tex[i], 1, GL_R8, width, height);
+ check_error();
+ glTextureStorage2D(fade_cbcr_output_tex[i], 1, GL_RG8, width, height);
+ check_error();
+ glTextureStorage2D(cb_tex[i], 1, GL_R8, width / 2, height);
+ check_error();
+ glTextureStorage2D(cr_tex[i], 1, GL_R8, width / 2, height);
+ check_error();
+
+ unique_ptr<InterpolatedFrameResources> resource(new InterpolatedFrameResources);
+ resource->owner = this;
+ resource->input_tex = input_tex[i];
+ resource->gray_tex = gray_tex[i];
+ resource->fade_y_output_tex = fade_y_output_tex[i];
+ resource->fade_cbcr_output_tex = fade_cbcr_output_tex[i];
+ resource->cb_tex = cb_tex[i];
+ resource->cr_tex = cr_tex[i];
+ glCreateFramebuffers(2, resource->input_fbos);
+ check_error();
+ glCreateFramebuffers(1, &resource->fade_fbo);
+ check_error();
+
+ glNamedFramebufferTextureLayer(resource->input_fbos[0], GL_COLOR_ATTACHMENT0, input_tex[i], 0, 0);
+ check_error();
+ glNamedFramebufferTextureLayer(resource->input_fbos[0], GL_COLOR_ATTACHMENT1, gray_tex[i], 0, 0);
+ check_error();
+ glNamedFramebufferTextureLayer(resource->input_fbos[1], GL_COLOR_ATTACHMENT0, input_tex[i], 0, 1);
+ check_error();
+ glNamedFramebufferTextureLayer(resource->input_fbos[1], GL_COLOR_ATTACHMENT1, gray_tex[i], 0, 1);
+ check_error();
+ glNamedFramebufferTexture(resource->fade_fbo, GL_COLOR_ATTACHMENT0, fade_y_output_tex[i], 0);
+ check_error();
+ glNamedFramebufferTexture(resource->fade_fbo, GL_COLOR_ATTACHMENT1, fade_cbcr_output_tex[i], 0);
+ check_error();
+
+ GLuint bufs[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1 };
+ glNamedFramebufferDrawBuffers(resource->input_fbos[0], 2, bufs);
+ check_error();
+ glNamedFramebufferDrawBuffers(resource->input_fbos[1], 2, bufs);
+ check_error();
+ glNamedFramebufferDrawBuffers(resource->fade_fbo, 2, bufs);
+ check_error();
+
+ glCreateBuffers(1, &resource->pbo);
+ check_error();
+ glNamedBufferStorage(resource->pbo, width * height * 4, nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+ check_error();
+ resource->pbo_contents = glMapNamedBufferRange(resource->pbo, 0, width * height * 4, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+ interpolate_resources.push_back(move(resource));
+ }
+
+ check_error();
+
+ OperatingPoint op;
+ if (global_flags.interpolation_quality == 1) {
+ op = operating_point1;
+ } else if (global_flags.interpolation_quality == 2) {
+ op = operating_point2;
+ } else if (global_flags.interpolation_quality == 3) {
+ op = operating_point3;
+ } else if (global_flags.interpolation_quality == 4) {
+ op = operating_point4;
+ } else {
+ assert(false);
+ }
+
+ compute_flow.reset(new DISComputeFlow(width, height, op));
+ interpolate.reset(new Interpolate(op, /*split_ycbcr_output=*/true));
+ interpolate_no_split.reset(new Interpolate(op, /*split_ycbcr_output=*/false));
+ chroma_subsampler.reset(new ChromaSubsampler);
+ check_error();
+
+ // The “last frame” is initially black.
+ unique_ptr<uint8_t[]> y(new uint8_t[1280 * 720]);
+ unique_ptr<uint8_t[]> cb_or_cr(new uint8_t[640 * 720]);
+ memset(y.get(), 16, 1280 * 720);
+ memset(cb_or_cr.get(), 128, 640 * 720);
+ last_frame = encode_jpeg(y.get(), cb_or_cr.get(), cb_or_cr.get(), 1280, 720);
+}
+
+VideoStream::~VideoStream() {}
+
+void VideoStream::start()
+{
+ AVFormatContext *avctx = avformat_alloc_context();
+ avctx->oformat = av_guess_format("nut", nullptr, nullptr);
+
+ uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE);
+ avctx->pb = avio_alloc_context(buf, MUX_BUFFER_SIZE, 1, this, nullptr, nullptr, nullptr);
+ avctx->pb->write_data_type = &VideoStream::write_packet2_thunk;
+ avctx->pb->ignore_boundary_point = 1;
+
+ Mux::Codec video_codec = Mux::CODEC_MJPEG;
+
+ avctx->flags = AVFMT_FLAG_CUSTOM_IO;
+
+ string video_extradata;
+
+ constexpr int width = 1280, height = 720; // Doesn't matter for MJPEG.
+ stream_mux.reset(new Mux(avctx, width, height, video_codec, video_extradata, /*audio_codec_parameters=*/nullptr,
+ AVCOL_SPC_BT709, Mux::WITHOUT_AUDIO,
+ COARSE_TIMEBASE, /*write_callback=*/nullptr, Mux::WRITE_FOREGROUND, {}));
+
+
+ encode_thread = thread(&VideoStream::encode_thread_func, this);
+}
+
+void VideoStream::stop()
+{
+ encode_thread.join();
+}
+
+void VideoStream::clear_queue()
+{
+ deque<QueuedFrame> q;
+
+ {
+ unique_lock<mutex> lock(queue_lock);
+ q = move(frame_queue);
+ }
+
+ // These are not RAII-ed, unfortunately, so we'll need to clean them ourselves.
+ // Note that release_texture() is thread-safe.
+ for (const QueuedFrame &qf : q) {
+ if (qf.type == QueuedFrame::INTERPOLATED ||
+ qf.type == QueuedFrame::FADED_INTERPOLATED) {
+ compute_flow->release_texture(qf.flow_tex);
+ }
+ if (qf.type == QueuedFrame::INTERPOLATED) {
+ interpolate->release_texture(qf.output_tex);
+ interpolate->release_texture(qf.cbcr_tex);
+ }
+ }
+
+ // Destroy q outside the mutex, as that would be a double-lock.
+}
+
+void VideoStream::schedule_original_frame(steady_clock::time_point local_pts,
+ int64_t output_pts, function<void()> &&display_func,
+ QueueSpotHolder &&queue_spot_holder,
+ FrameOnDisk frame)
+{
+ fprintf(stderr, "output_pts=%ld original input_pts=%ld\n", output_pts, frame.pts);
+
+ // Preload the file from disk, so that the encoder thread does not get stalled.
+ // TODO: Consider sending it through the queue instead.
+ (void)frame_reader.read_frame(frame);
+
+ QueuedFrame qf;
+ qf.local_pts = local_pts;
+ qf.type = QueuedFrame::ORIGINAL;
+ qf.output_pts = output_pts;
+ qf.frame1 = frame;
+ qf.display_func = move(display_func);
+ qf.queue_spot_holder = move(queue_spot_holder);
+
+ unique_lock<mutex> lock(queue_lock);
+ frame_queue.push_back(move(qf));
+ queue_changed.notify_all();
+}
+
+void VideoStream::schedule_faded_frame(steady_clock::time_point local_pts, int64_t output_pts,
+ function<void()> &&display_func,
+ QueueSpotHolder &&queue_spot_holder,
+ FrameOnDisk frame1_spec, FrameOnDisk frame2_spec,
+ float fade_alpha)
+{
+ fprintf(stderr, "output_pts=%ld faded input_pts=%ld,%ld fade_alpha=%.2f\n", output_pts, frame1_spec.pts, frame2_spec.pts, fade_alpha);
+
+ // Get the temporary OpenGL resources we need for doing the fade.
+ // (We share these with interpolated frames, which is slightly
+ // overkill, but there's no need to waste resources on keeping
+ // separate pools around.)
+ BorrowedInterpolatedFrameResources resources;
+ {
+ unique_lock<mutex> lock(queue_lock);
+ if (interpolate_resources.empty()) {
+ fprintf(stderr, "WARNING: Too many interpolated frames already in transit; dropping one.\n");
+ return;
+ }
+ resources = BorrowedInterpolatedFrameResources(interpolate_resources.front().release());
+ interpolate_resources.pop_front();
+ }
+
+ bool did_decode;
+
+ shared_ptr<Frame> frame1 = decode_jpeg_with_cache(frame1_spec, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
+ shared_ptr<Frame> frame2 = decode_jpeg_with_cache(frame2_spec, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
+
+ ycbcr_semiplanar_converter->prepare_chain_for_fade(frame1, frame2, fade_alpha)->render_to_fbo(resources->fade_fbo, 1280, 720);
+
+ QueuedFrame qf;
+ qf.local_pts = local_pts;
+ qf.type = QueuedFrame::FADED;
+ qf.output_pts = output_pts;
+ qf.frame1 = frame1_spec;
+ qf.display_func = move(display_func);
+ qf.queue_spot_holder = move(queue_spot_holder);
+
+ qf.secondary_frame = frame2_spec;
+
+ // Subsample and split Cb/Cr.
+ chroma_subsampler->subsample_chroma(resources->fade_cbcr_output_tex, 1280, 720, resources->cb_tex, resources->cr_tex);
+
+ // Read it down (asynchronously) to the CPU.
+ glPixelStorei(GL_PACK_ROW_LENGTH, 0);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, resources->pbo);
+ check_error();
+ glGetTextureImage(resources->fade_y_output_tex, 0, GL_RED, GL_UNSIGNED_BYTE, 1280 * 720 * 4, BUFFER_OFFSET(0));
+ check_error();
+ glGetTextureImage(resources->cb_tex, 0, GL_RED, GL_UNSIGNED_BYTE, 1280 * 720 * 3, BUFFER_OFFSET(1280 * 720));
+ check_error();
+ glGetTextureImage(resources->cr_tex, 0, GL_RED, GL_UNSIGNED_BYTE, 1280 * 720 * 3 - 640 * 720, BUFFER_OFFSET(1280 * 720 + 640 * 720));
+ check_error();
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+
+ // Set a fence we can wait for to make sure the CPU sees the read.
+ glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
+ check_error();
+ qf.fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
+ check_error();
+ qf.resources = move(resources);
+ qf.local_pts = local_pts;
+
+ unique_lock<mutex> lock(queue_lock);
+ frame_queue.push_back(move(qf));
+ queue_changed.notify_all();
+}
+
+void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts,
+ int64_t output_pts, function<void(shared_ptr<Frame>)> &&display_func,
+ QueueSpotHolder &&queue_spot_holder,
+ FrameOnDisk frame1, FrameOnDisk frame2,
+ float alpha, FrameOnDisk secondary_frame, float fade_alpha)
+{
+ if (secondary_frame.pts != -1) {
+ fprintf(stderr, "output_pts=%ld interpolated input_pts1=%ld input_pts2=%ld alpha=%.3f secondary_pts=%ld fade_alpha=%.2f\n", output_pts, frame1.pts, frame2.pts, alpha, secondary_frame.pts, fade_alpha);
+ } else {
+ fprintf(stderr, "output_pts=%ld interpolated input_pts1=%ld input_pts2=%ld alpha=%.3f\n", output_pts, frame1.pts, frame2.pts, alpha);
+ }
+
+ // Get the temporary OpenGL resources we need for doing the interpolation.
+ BorrowedInterpolatedFrameResources resources;
+ {
+ unique_lock<mutex> lock(queue_lock);
+ if (interpolate_resources.empty()) {
+ fprintf(stderr, "WARNING: Too many interpolated frames already in transit; dropping one.\n");
+ return;
+ }
+ resources = BorrowedInterpolatedFrameResources(interpolate_resources.front().release());
+ interpolate_resources.pop_front();
+ }
+
+ QueuedFrame qf;
+ qf.type = (secondary_frame.pts == -1) ? QueuedFrame::INTERPOLATED : QueuedFrame::FADED_INTERPOLATED;
+ qf.output_pts = output_pts;
+ qf.display_decoded_func = move(display_func);
+ qf.queue_spot_holder = move(queue_spot_holder);
+ qf.local_pts = local_pts;
+
+ check_error();
+
+ // Convert frame0 and frame1 to OpenGL textures.
+ for (size_t frame_no = 0; frame_no < 2; ++frame_no) {
+ FrameOnDisk frame_spec = frame_no == 1 ? frame2 : frame1;
+ bool did_decode;
+ shared_ptr<Frame> frame = decode_jpeg_with_cache(frame_spec, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
+ ycbcr_converter->prepare_chain_for_conversion(frame)->render_to_fbo(resources->input_fbos[frame_no], 1280, 720);
+ }
+
+ glGenerateTextureMipmap(resources->input_tex);
+ check_error();
+ glGenerateTextureMipmap(resources->gray_tex);
+ check_error();
+
+ // Compute the interpolated frame.
+ qf.flow_tex = compute_flow->exec(resources->gray_tex, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
+ check_error();
+
+ if (secondary_frame.pts != -1) {
+ // Fade. First kick off the interpolation.
+ tie(qf.output_tex, ignore) = interpolate_no_split->exec(resources->input_tex, resources->gray_tex, qf.flow_tex, 1280, 720, alpha);
+ check_error();
+
+ // Now decode the image we are fading against.
+ bool did_decode;
+ shared_ptr<Frame> frame2 = decode_jpeg_with_cache(secondary_frame, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
+
+ // Then fade against it, putting it into the fade Y' and CbCr textures.
+ ycbcr_semiplanar_converter->prepare_chain_for_fade_from_texture(qf.output_tex, frame2, fade_alpha)->render_to_fbo(resources->fade_fbo, 1280, 720);
+
+ // Subsample and split Cb/Cr.
+ chroma_subsampler->subsample_chroma(resources->fade_cbcr_output_tex, 1280, 720, resources->cb_tex, resources->cr_tex);
+
+ interpolate_no_split->release_texture(qf.output_tex);
+ } else {
+ tie(qf.output_tex, qf.cbcr_tex) = interpolate->exec(resources->input_tex, resources->gray_tex, qf.flow_tex, 1280, 720, alpha);
+ check_error();
+
+ // Subsample and split Cb/Cr.
+ chroma_subsampler->subsample_chroma(qf.cbcr_tex, 1280, 720, resources->cb_tex, resources->cr_tex);
+ }
+
+ // We could have released qf.flow_tex here, but to make sure we don't cause a stall
+ // when trying to reuse it for the next frame, we can just as well hold on to it
+ // and release it only when the readback is done.
+
+ // Read it down (asynchronously) to the CPU.
+ glPixelStorei(GL_PACK_ROW_LENGTH, 0);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, resources->pbo);
+ check_error();
+ if (secondary_frame.pts != -1) {
+ glGetTextureImage(resources->fade_y_output_tex, 0, GL_RED, GL_UNSIGNED_BYTE, 1280 * 720 * 4, BUFFER_OFFSET(0));
+ } else {
+ glGetTextureImage(qf.output_tex, 0, GL_RED, GL_UNSIGNED_BYTE, 1280 * 720 * 4, BUFFER_OFFSET(0));
+ }
+ check_error();
+ glGetTextureImage(resources->cb_tex, 0, GL_RED, GL_UNSIGNED_BYTE, 1280 * 720 * 3, BUFFER_OFFSET(1280 * 720));
+ check_error();
+ glGetTextureImage(resources->cr_tex, 0, GL_RED, GL_UNSIGNED_BYTE, 1280 * 720 * 3 - 640 * 720, BUFFER_OFFSET(1280 * 720 + 640 * 720));
+ check_error();
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+
+ // Set a fence we can wait for to make sure the CPU sees the read.
+ glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
+ check_error();
+ qf.fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
+ check_error();
+ qf.resources = move(resources);
+
+ unique_lock<mutex> lock(queue_lock);
+ frame_queue.push_back(move(qf));
+ queue_changed.notify_all();
+}
+
+void VideoStream::schedule_refresh_frame(steady_clock::time_point local_pts,
+ int64_t output_pts, function<void()> &&display_func,
+ QueueSpotHolder &&queue_spot_holder)
+{
+ QueuedFrame qf;
+ qf.type = QueuedFrame::REFRESH;
+ qf.output_pts = output_pts;
+ qf.display_func = move(display_func);
+ qf.queue_spot_holder = move(queue_spot_holder);
+
+ unique_lock<mutex> lock(queue_lock);
+ frame_queue.push_back(move(qf));
+ queue_changed.notify_all();
+}
+
+namespace {
+
+shared_ptr<Frame> frame_from_pbo(void *contents, size_t width, size_t height)
+{
+ size_t chroma_width = width / 2;
+
+ const uint8_t *y = (const uint8_t *)contents;
+ const uint8_t *cb = (const uint8_t *)contents + width * height;
+ const uint8_t *cr = (const uint8_t *)contents + width * height + chroma_width * height;
+
+ shared_ptr<Frame> frame(new Frame);
+ frame->y.reset(new uint8_t[width * height]);
+ frame->cb.reset(new uint8_t[chroma_width * height]);
+ frame->cr.reset(new uint8_t[chroma_width * height]);
+ for (unsigned yy = 0; yy < height; ++yy) {
+ memcpy(frame->y.get() + width * yy, y + width * yy, width);
+ memcpy(frame->cb.get() + chroma_width * yy, cb + chroma_width * yy, chroma_width);
+ memcpy(frame->cr.get() + chroma_width * yy, cr + chroma_width * yy, chroma_width);
+ }
+ frame->is_semiplanar = false;
+ frame->width = width;
+ frame->height = height;
+ frame->chroma_subsampling_x = 2;
+ frame->chroma_subsampling_y = 1;
+ frame->pitch_y = width;
+ frame->pitch_chroma = chroma_width;
+ return frame;
+}
+
+} // namespace
+
+void VideoStream::encode_thread_func()
+{
+ pthread_setname_np(pthread_self(), "VideoStream");
+ QSurface *surface = create_surface();
+ QOpenGLContext *context = create_context(surface);
+ bool ok = make_current(context, surface);
+ if (!ok) {
+ fprintf(stderr, "Video stream couldn't get an OpenGL context\n");
+ exit(1);
+ }
+
+ for ( ;; ) {
+ QueuedFrame qf;
+ {
+ unique_lock<mutex> lock(queue_lock);
+
+ // Wait until we have a frame to play.
+ queue_changed.wait(lock, [this]{
+ return !frame_queue.empty();
+ });
+ steady_clock::time_point frame_start = frame_queue.front().local_pts;
+
+ // Now sleep until the frame is supposed to start (the usual case),
+ // _or_ clear_queue() happened.
+ bool aborted = queue_changed.wait_until(lock, frame_start, [this, frame_start]{
+ return frame_queue.empty() || frame_queue.front().local_pts != frame_start;
+ });
+ if (aborted) {
+ // clear_queue() happened, so don't play this frame after all.
+ continue;
+ }
+ qf = move(frame_queue.front());
+ frame_queue.pop_front();
+ }
+
+ if (qf.type == QueuedFrame::ORIGINAL) {
+ // Send the JPEG frame on, unchanged.
+ string jpeg = frame_reader.read_frame(qf.frame1);
+ AVPacket pkt;
+ av_init_packet(&pkt);
+ pkt.stream_index = 0;
+ pkt.data = (uint8_t *)jpeg.data();
+ pkt.size = jpeg.size();
+ stream_mux->add_packet(pkt, qf.output_pts, qf.output_pts);
+
+ last_frame.assign(&jpeg[0], &jpeg[0] + jpeg.size());
+ } else if (qf.type == QueuedFrame::FADED) {
+ glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
+
+ shared_ptr<Frame> frame = frame_from_pbo(qf.resources->pbo_contents, 1280, 720);
+
+ // Now JPEG encode it, and send it on to the stream.
+ vector<uint8_t> jpeg = encode_jpeg(frame->y.get(), frame->cb.get(), frame->cr.get(), 1280, 720);
+
+ AVPacket pkt;
+ av_init_packet(&pkt);
+ pkt.stream_index = 0;
+ pkt.data = (uint8_t *)jpeg.data();
+ pkt.size = jpeg.size();
+ stream_mux->add_packet(pkt, qf.output_pts, qf.output_pts);
+ last_frame = move(jpeg);
+ } else if (qf.type == QueuedFrame::INTERPOLATED || qf.type == QueuedFrame::FADED_INTERPOLATED) {
+ glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
+
+ // Send it on to display.
+ shared_ptr<Frame> frame = frame_from_pbo(qf.resources->pbo_contents, 1280, 720);
+ if (qf.display_decoded_func != nullptr) {
+ qf.display_decoded_func(frame);
+ }
+
+ // Now JPEG encode it, and send it on to the stream.
+ vector<uint8_t> jpeg = encode_jpeg(frame->y.get(), frame->cb.get(), frame->cr.get(), 1280, 720);
+ compute_flow->release_texture(qf.flow_tex);
+ if (qf.type != QueuedFrame::FADED_INTERPOLATED) {
+ interpolate->release_texture(qf.output_tex);
+ interpolate->release_texture(qf.cbcr_tex);
+ }
+
+ AVPacket pkt;
+ av_init_packet(&pkt);
+ pkt.stream_index = 0;
+ pkt.data = (uint8_t *)jpeg.data();
+ pkt.size = jpeg.size();
+ stream_mux->add_packet(pkt, qf.output_pts, qf.output_pts);
+ last_frame = move(jpeg);
+ } else if (qf.type == QueuedFrame::REFRESH) {
+ AVPacket pkt;
+ av_init_packet(&pkt);
+ pkt.stream_index = 0;
+ pkt.data = (uint8_t *)last_frame.data();
+ pkt.size = last_frame.size();
+ stream_mux->add_packet(pkt, qf.output_pts, qf.output_pts);
+ } else {
+ assert(false);
+ }
+ if (qf.display_func != nullptr) {
+ qf.display_func();
+ }
+ }
+}
+
+int VideoStream::write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
+{
+ VideoStream *video_stream = (VideoStream *)opaque;
+ return video_stream->write_packet2(buf, buf_size, type, time);
+}
+
+int VideoStream::write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
+{
+ if (type == AVIO_DATA_MARKER_SYNC_POINT || type == AVIO_DATA_MARKER_BOUNDARY_POINT) {
+ seen_sync_markers = true;
+ } else if (type == AVIO_DATA_MARKER_UNKNOWN && !seen_sync_markers) {
+ // We don't know if this is a keyframe or not (the muxer could
+ // avoid marking it), so we just have to make the best of it.
+ type = AVIO_DATA_MARKER_SYNC_POINT;
+ }
+
+ if (type == AVIO_DATA_MARKER_HEADER) {
+ stream_mux_header.append((char *)buf, buf_size);
+ global_httpd->set_header(HTTPD::MAIN_STREAM, stream_mux_header);
+ } else {
+ global_httpd->add_data(HTTPD::MAIN_STREAM, (char *)buf, buf_size, type == AVIO_DATA_MARKER_SYNC_POINT, time, AVRational{ AV_TIME_BASE, 1 });
+ }
+ return buf_size;
+}
--- /dev/null
+#ifndef _VIDEO_STREAM_H
+#define _VIDEO_STREAM_H 1
+
+#include <epoxy/gl.h>
+#include <stdint.h>
+
+extern "C" {
+#include <libavformat/avio.h>
+}
+
+#include "frame_on_disk.h"
+#include "jpeg_frame_view.h"
+#include "shared/ref_counted_gl_sync.h"
+#include "queue_spot_holder.h"
+
+#include <chrono>
+#include <condition_variable>
+#include <deque>
+#include <functional>
+#include <movit/effect_chain.h>
+#include <movit/mix_effect.h>
+#include <movit/ycbcr_input.h>
+#include <mutex>
+#include <string>
+#include <thread>
+
+class ChromaSubsampler;
+class DISComputeFlow;
+class Interpolate;
+class Mux;
+class QSurface;
+class QSurfaceFormat;
+class YCbCrConverter;
+
+class VideoStream {
+public:
+ VideoStream();
+ ~VideoStream();
+ void start();
+ void stop();
+ void clear_queue();
+
+ // “display_func” is called after the frame has been calculated (if needed)
+ // and has gone out to the stream.
+ void schedule_original_frame(std::chrono::steady_clock::time_point,
+ int64_t output_pts, std::function<void()> &&display_func,
+ QueueSpotHolder &&queue_spot_holder,
+ FrameOnDisk frame);
+ void schedule_faded_frame(std::chrono::steady_clock::time_point, int64_t output_pts,
+ std::function<void()> &&display_func,
+ QueueSpotHolder &&queue_spot_holder,
+ FrameOnDisk frame1, FrameOnDisk frame2,
+ float fade_alpha);
+ void schedule_interpolated_frame(std::chrono::steady_clock::time_point, int64_t output_pts,
+ std::function<void(std::shared_ptr<Frame>)> &&display_func,
+ QueueSpotHolder &&queue_spot_holder,
+ FrameOnDisk frame1, FrameOnDisk frame2,
+ float alpha, FrameOnDisk secondary_frame = {}, // Empty = no secondary (fade) frame.
+ float fade_alpha = 0.0f);
+ void schedule_refresh_frame(std::chrono::steady_clock::time_point, int64_t output_pts,
+ std::function<void()> &&display_func,
+ QueueSpotHolder &&queue_spot_holder);
+
+private:
+ FrameReader frame_reader;
+
+ void encode_thread_func();
+ std::thread encode_thread;
+
+ static int write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
+ int write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
+
+ // Allocated at the very start; if we're empty, we start dropping frames
+ // (so that we don't build up an infinite interpolation backlog).
+ struct InterpolatedFrameResources {
+ VideoStream *owner; // Used only for IFRReleaser, below.
+
+ GLuint input_tex; // Layered (contains both input frames), Y'CbCr.
+ GLuint gray_tex; // Same, but Y only.
+ GLuint input_fbos[2]; // For rendering to the two layers of input_tex.
+
+ // Destination textures and FBO if there is a fade.
+ GLuint fade_y_output_tex, fade_cbcr_output_tex;
+ GLuint fade_fbo;
+
+ GLuint cb_tex, cr_tex; // Subsampled, final output.
+
+ GLuint pbo; // For reading the data back.
+ void *pbo_contents; // Persistently mapped.
+ };
+ std::mutex queue_lock;
+ std::deque<std::unique_ptr<InterpolatedFrameResources>> interpolate_resources; // Under <queue_lock>.
+ static constexpr size_t num_interpolate_slots = 15; // Should be larger than Player::max_queued_frames, or we risk mass-dropping frames.
+
+ struct IFRReleaser {
+ void operator() (InterpolatedFrameResources *ifr) const
+ {
+ if (ifr != nullptr) {
+ std::unique_lock<std::mutex> lock(ifr->owner->queue_lock);
+ ifr->owner->interpolate_resources.emplace_back(ifr);
+ }
+ }
+ };
+ using BorrowedInterpolatedFrameResources = std::unique_ptr<InterpolatedFrameResources, IFRReleaser>;
+
+ struct QueuedFrame {
+ std::chrono::steady_clock::time_point local_pts;
+
+ int64_t output_pts;
+ enum Type { ORIGINAL, FADED, INTERPOLATED, FADED_INTERPOLATED, REFRESH } type;
+ FrameOnDisk frame1; // The only frame for original frames.
+
+ // For fades only (including fades against interpolated frames).
+ FrameOnDisk secondary_frame;
+
+ // For interpolated frames only.
+ FrameOnDisk frame2;
+ float alpha;
+ BorrowedInterpolatedFrameResources resources;
+ RefCountedGLsync fence; // Set when the interpolated image is read back to the CPU.
+ GLuint flow_tex, output_tex, cbcr_tex; // Released in the receiving thread; not really used for anything else.
+ FrameOnDisk id;
+
+ std::function<void()> display_func; // Called when the image is done decoding.
+ std::function<void(std::shared_ptr<Frame>)> display_decoded_func; // Same, except for INTERPOLATED and FADED_INTERPOLATED.
+
+ QueueSpotHolder queue_spot_holder;
+ };
+ std::deque<QueuedFrame> frame_queue; // Under <queue_lock>.
+ std::condition_variable queue_changed;
+
+ std::unique_ptr<Mux> stream_mux; // To HTTP.
+ std::string stream_mux_header;
+ bool seen_sync_markers = false;
+
+ std::unique_ptr<YCbCrConverter> ycbcr_converter;
+ std::unique_ptr<YCbCrConverter> ycbcr_semiplanar_converter;
+
+ // Frame interpolation.
+ std::unique_ptr<DISComputeFlow> compute_flow;
+ std::unique_ptr<Interpolate> interpolate, interpolate_no_split;
+ std::unique_ptr<ChromaSubsampler> chroma_subsampler;
+
+ std::vector<uint8_t> last_frame;
+};
+
+#endif // !defined(_VIDEO_STREAM_H)
--- /dev/null
+// Visualize a .flo file.
+
+#include "util.h"
+
+#include <assert.h>
+#include <memory>
+#include <stdio.h>
+
+using namespace std;
+
+int main(int argc, char **argv)
+{
+ if (argc != 3) {
+ fprintf(stderr, "Usage: ./vis input.flo out.ppm\n");
+ exit(1);
+ }
+
+ Flow flow = read_flow(argv[1]);
+
+ FILE *fp = fopen(argv[2], "wb");
+ fprintf(fp, "P6\n%d %d\n255\n", flow.width, flow.height);
+ for (unsigned y = 0; y < unsigned(flow.height); ++y) {
+ for (unsigned x = 0; x < unsigned(flow.width); ++x) {
+ float du = flow.flow[y * flow.width + x].du;
+ float dv = flow.flow[y * flow.width + x].dv;
+
+ uint8_t r, g, b;
+ flow2rgb(du, dv, &r, &g, &b);
+ putc(r, fp);
+ putc(g, fp);
+ putc(b, fp);
+ }
+ }
+ fclose(fp);
+}
--- /dev/null
+#version 450 core
+#extension GL_ARB_shader_viewport_layer_array : require
+
+layout(location=0) in vec2 position;
+out vec3 tc;
+
+void main()
+{
+ // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+ //
+ // 2.000 0.000 0.000 -1.000
+ // 0.000 2.000 0.000 -1.000
+ // 0.000 0.000 -2.000 -1.000
+ // 0.000 0.000 0.000 1.000
+ gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
+ tc.xy = position;
+ tc.z = gl_InstanceID;
+
+ gl_Layer = gl_InstanceID;
+}
--- /dev/null
+#include "ycbcr_converter.h"
+
+#include "jpeg_frame.h"
+
+#include <movit/mix_effect.h>
+#include <movit/ycbcr_input.h>
+
+using namespace std;
+using namespace movit;
+
+namespace {
+
+void setup_outputs(YCbCrConverter::OutputMode output_mode, const ImageFormat &output_format, const YCbCrFormat &ycbcr_output_format, EffectChain *chain)
+{
+ if (output_mode == YCbCrConverter::OUTPUT_TO_RGBA) {
+ chain->add_output(output_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED);
+ chain->set_output_origin(OUTPUT_ORIGIN_BOTTOM_LEFT);
+ } else if (output_mode == YCbCrConverter::OUTPUT_TO_SEMIPLANAR) {
+ chain->add_ycbcr_output(output_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, ycbcr_output_format, YCBCR_OUTPUT_SPLIT_Y_AND_CBCR);
+ chain->set_output_origin(OUTPUT_ORIGIN_TOP_LEFT);
+ } else {
+ assert(output_mode == YCbCrConverter::OUTPUT_TO_DUAL_YCBCR);
+
+ // One full Y'CbCr texture (for interpolation), one that's just Y (throwing away the
+ // Cb and Cr channels). The second copy is sort of redundant, but it's the easiest way
+ // of getting the gray data into a layered texture.
+ chain->add_ycbcr_output(output_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, ycbcr_output_format);
+ chain->add_ycbcr_output(output_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, ycbcr_output_format);
+ chain->set_output_origin(OUTPUT_ORIGIN_TOP_LEFT);
+ }
+}
+
+} // namespace
+
+YCbCrConverter::YCbCrConverter(YCbCrConverter::OutputMode output_mode, ResourcePool *resource_pool)
+{
+ ImageFormat inout_format;
+ inout_format.color_space = COLORSPACE_sRGB;
+ inout_format.gamma_curve = GAMMA_sRGB;
+
+ ycbcr_format.luma_coefficients = YCBCR_REC_709;
+ ycbcr_format.num_levels = 256;
+ ycbcr_format.chroma_subsampling_x = 2;
+ ycbcr_format.chroma_subsampling_y = 1;
+ ycbcr_format.cb_x_position = 0.0f; // H.264 -- _not_ JPEG, even though our input is MJPEG-encoded
+ ycbcr_format.cb_y_position = 0.5f; // Irrelevant.
+ ycbcr_format.cr_x_position = 0.0f;
+ ycbcr_format.cr_y_position = 0.5f;
+
+ // This is a hack. Even though we're sending MJPEG around, which is
+ // full-range, it's mostly transporting signals from limited-range
+ // sources with no conversion, so we ought to have had false here.
+ // However, in the off chance that we're actually getting real MJPEG,
+ // we don't want to crush its blacks (or whites) by clamping. All of
+ // our processing is fades, so if we're in limited-range input, we'll
+ // stay in limited-range output. (Fading between limited-range and
+ // full-range sources will be broken, of course.) There will be some
+ // slight confusion in the parts of the algorithms dealing with RGB,
+ // but they're small and we'll manage.
+ ycbcr_format.full_range = true;
+
+ YCbCrFormat ycbcr_output_format = ycbcr_format;
+ ycbcr_output_format.chroma_subsampling_x = 1;
+
+ // Planar Y'CbCr decoding chain.
+ planar_chain.reset(new EffectChain(1280, 720, resource_pool));
+ ycbcr_planar_input = (YCbCrInput *)planar_chain->add_input(new YCbCrInput(inout_format, ycbcr_format, 1280, 720, YCBCR_INPUT_PLANAR));
+ setup_outputs(output_mode, inout_format, ycbcr_output_format, planar_chain.get());
+ planar_chain->set_dither_bits(8);
+ planar_chain->finalize();
+
+ // Semiplanar Y'CbCr decoding chain (for images coming from VA-API).
+ semiplanar_chain.reset(new EffectChain(1280, 720, resource_pool));
+ ycbcr_semiplanar_input = (YCbCrInput *)semiplanar_chain->add_input(new YCbCrInput(inout_format, ycbcr_format, 1280, 720, YCBCR_INPUT_SPLIT_Y_AND_CBCR));
+ setup_outputs(output_mode, inout_format, ycbcr_output_format, semiplanar_chain.get());
+ semiplanar_chain->set_dither_bits(8);
+ semiplanar_chain->finalize();
+
+ // Fade chains.
+ for (bool first_input_is_semiplanar : { false, true }) {
+ for (bool second_input_is_semiplanar : { false, true }) {
+ FadeChain &fade_chain = fade_chains[first_input_is_semiplanar][second_input_is_semiplanar];
+ fade_chain.chain.reset(new EffectChain(1280, 720, resource_pool));
+ fade_chain.input[0] = (movit::YCbCrInput *)fade_chain.chain->add_input(
+ new YCbCrInput(inout_format, ycbcr_format, 1280, 720,
+ first_input_is_semiplanar ? YCBCR_INPUT_SPLIT_Y_AND_CBCR : YCBCR_INPUT_PLANAR));
+ fade_chain.input[1] = (movit::YCbCrInput *)fade_chain.chain->add_input(
+ new YCbCrInput(inout_format, ycbcr_format, 1280, 720,
+ second_input_is_semiplanar ? YCBCR_INPUT_SPLIT_Y_AND_CBCR : YCBCR_INPUT_PLANAR));
+ fade_chain.mix_effect = (movit::MixEffect *)fade_chain.chain->add_effect(
+ new MixEffect, fade_chain.input[0], fade_chain.input[1]);
+ setup_outputs(output_mode, inout_format, ycbcr_output_format, fade_chain.chain.get());
+ fade_chain.chain->set_dither_bits(8);
+ fade_chain.chain->finalize();
+ }
+ }
+
+ // Fade from interleaved chain (ie., first input is interleaved, since it comes
+ // directly from the GPU anyway).
+ for (bool second_input_is_semiplanar : { false, true }) {
+ FadeChain &fade_chain = interleaved_fade_chains[second_input_is_semiplanar];
+ fade_chain.chain.reset(new EffectChain(1280, 720, resource_pool));
+
+ ycbcr_format.chroma_subsampling_x = 1;
+ fade_chain.input[0] = (movit::YCbCrInput *)fade_chain.chain->add_input(
+ new YCbCrInput(inout_format, ycbcr_format, 1280, 720,
+ YCBCR_INPUT_INTERLEAVED));
+
+ ycbcr_format.chroma_subsampling_x = 2;
+ fade_chain.input[1] = (movit::YCbCrInput *)fade_chain.chain->add_input(
+ new YCbCrInput(inout_format, ycbcr_format, 1280, 720,
+ second_input_is_semiplanar ? YCBCR_INPUT_SPLIT_Y_AND_CBCR : YCBCR_INPUT_PLANAR));
+
+ fade_chain.mix_effect = (movit::MixEffect *)fade_chain.chain->add_effect(
+ new MixEffect, fade_chain.input[0], fade_chain.input[1]);
+ setup_outputs(output_mode, inout_format, ycbcr_output_format, fade_chain.chain.get());
+ fade_chain.chain->set_dither_bits(8);
+ fade_chain.chain->finalize();
+ }
+}
+
+EffectChain *YCbCrConverter::prepare_chain_for_conversion(shared_ptr<Frame> frame)
+{
+ if (frame->is_semiplanar) {
+ setup_input_for_frame(frame, ycbcr_format, ycbcr_semiplanar_input);
+ return semiplanar_chain.get();
+ } else {
+ setup_input_for_frame(frame, ycbcr_format, ycbcr_planar_input);
+ return planar_chain.get();
+ }
+}
+
+EffectChain *YCbCrConverter::prepare_chain_for_fade(shared_ptr<Frame> frame, shared_ptr<Frame> secondary_frame, float fade_alpha)
+{
+ const FadeChain &fade_chain = fade_chains[frame->is_semiplanar][secondary_frame->is_semiplanar];
+ setup_input_for_frame(frame, ycbcr_format, fade_chain.input[0]);
+ setup_input_for_frame(secondary_frame, ycbcr_format, fade_chain.input[1]);
+ bool ok = fade_chain.mix_effect->set_float("strength_first", 1.0f - fade_alpha);
+ ok |= fade_chain.mix_effect->set_float("strength_second", fade_alpha);
+ assert(ok);
+ return fade_chain.chain.get();
+}
+
+EffectChain *YCbCrConverter::prepare_chain_for_fade_from_texture(GLuint tex, std::shared_ptr<Frame> secondary_frame, float fade_alpha)
+{
+ const FadeChain &fade_chain = interleaved_fade_chains[secondary_frame->is_semiplanar];
+ {
+ YCbCrFormat format_copy = ycbcr_format;
+ format_copy.chroma_subsampling_x = 1;
+ format_copy.chroma_subsampling_y = 1;
+ fade_chain.input[0]->change_ycbcr_format(format_copy);
+
+ fade_chain.input[0]->set_width(1280); // FIXME
+ fade_chain.input[0]->set_height(720);
+ fade_chain.input[0]->set_texture_num(0, tex);
+
+ glTextureParameteri(tex, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ glTextureParameteri(tex, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ glTextureParameteri(tex, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
+ glTextureParameteri(tex, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
+ }
+ setup_input_for_frame(secondary_frame, ycbcr_format, fade_chain.input[1]);
+ bool ok = fade_chain.mix_effect->set_float("strength_first", 1.0f - fade_alpha);
+ ok |= fade_chain.mix_effect->set_float("strength_second", fade_alpha);
+ assert(ok);
+ return fade_chain.chain.get();
+}
+
+void setup_input_for_frame(shared_ptr<Frame> frame, const YCbCrFormat &ycbcr_format, YCbCrInput *input)
+{
+ YCbCrFormat format_copy = ycbcr_format;
+ format_copy.chroma_subsampling_x = frame->chroma_subsampling_x;
+ format_copy.chroma_subsampling_y = frame->chroma_subsampling_y;
+ input->change_ycbcr_format(format_copy);
+
+ input->set_width(frame->width);
+ input->set_height(frame->height);
+ input->set_pixel_data(0, frame->y.get());
+ input->set_pitch(0, frame->pitch_y);
+ if (frame->is_semiplanar) {
+ input->set_pixel_data(1, frame->cbcr.get());
+ input->set_pitch(1, frame->pitch_chroma);
+ } else {
+ input->set_pixel_data(1, frame->cb.get());
+ input->set_pixel_data(2, frame->cr.get());
+ input->set_pitch(1, frame->pitch_chroma);
+ input->set_pitch(2, frame->pitch_chroma);
+ }
+}
--- /dev/null
+#ifndef _YCBCR_CONVERTER_H
+#define _YCBCR_CONVERTER_H 1
+
+#include <epoxy/gl.h>
+#include <memory>
+#include <movit/ycbcr_input.h>
+
+namespace movit {
+
+class EffectChain;
+class MixEffect;
+class ResourcePool;
+struct YCbCrFormat;
+
+} // namespace movit
+
+struct Frame;
+
+class YCbCrConverter {
+public:
+ enum OutputMode {
+ OUTPUT_TO_RGBA, // One texture (bottom-left origin): RGBA
+ OUTPUT_TO_SEMIPLANAR, // Two textures (top-left origin): Y, CbCr
+ OUTPUT_TO_DUAL_YCBCR // Two textures (top-left origin): Y'CbCr, Y'CbCr
+ };
+ YCbCrConverter(OutputMode output_mode, movit::ResourcePool *resource_pool);
+
+ // Returns the appropriate chain for rendering.
+ movit::EffectChain *prepare_chain_for_conversion(std::shared_ptr<Frame> frame);
+ movit::EffectChain *prepare_chain_for_fade(std::shared_ptr<Frame> frame, std::shared_ptr<Frame> secondary_frame, float fade_alpha);
+
+ // <tex> must be interleaved Y'CbCr.
+ movit::EffectChain *prepare_chain_for_fade_from_texture(GLuint tex, std::shared_ptr<Frame> secondary_frame, float fade_alpha);
+
+private:
+ movit::YCbCrFormat ycbcr_format;
+
+ // Effectively only converts from 4:2:2 to 4:4:4.
+ // TODO: Have a separate version with ResampleEffect, for scaling?
+ std::unique_ptr<movit::EffectChain> planar_chain, semiplanar_chain;
+ movit::YCbCrInput *ycbcr_planar_input, *ycbcr_semiplanar_input;
+
+ // These do fades, parametrized on whether the two inputs are planar
+ // or semiplanar.
+ struct FadeChain {
+ std::unique_ptr<movit::EffectChain> chain;
+ movit::YCbCrInput *input[2];
+ movit::MixEffect *mix_effect;
+ };
+ FadeChain fade_chains[2][2];
+
+ // These do fades, where the first input is interleaved and the second is
+ // either planar or semiplanar.
+ FadeChain interleaved_fade_chains[2];
+};
+
+// TODO: make private
+void setup_input_for_frame(std::shared_ptr<Frame> frame, const movit::YCbCrFormat &ycbcr_format, movit::YCbCrInput *input);
+
+#endif // !defined(_YCBCR_CONVERTER_H)
project('nageru', 'cpp', default_options: ['buildtype=debugoptimized'])
-qt5 = import('qt5')
-protoc = find_program('protoc')
+
cxx = meson.get_compiler('cpp')
# Use lld if we can; it links a lot faster than ld.bfd or gold.
-nageru_link_args = []
code = '''#include <stdio.h>
int main() { printf("Hello, world!\n"); return 0; }
'''
if cxx.links(code, args: '-fuse-ld=lld', name: 'check for LLD')
- nageru_link_args += '-fuse-ld=lld'
+ add_project_link_arguments('-fuse-ld=lld')
endif
-embedded_bmusb = get_option('embedded_bmusb')
-
-alsadep = dependency('alsa')
-bmusbdep = dependency('bmusb', required: not embedded_bmusb)
-dldep = cxx.find_library('dl')
-epoxydep = dependency('epoxy')
-libavcodecdep = dependency('libavcodec')
-libavformatdep = dependency('libavformat')
-libavresampledep = dependency('libavresample')
-libavutildep = dependency('libavutil')
-libjpegdep = dependency('libjpeg')
-libmicrohttpddep = dependency('libmicrohttpd')
-libswscaledep = dependency('libswscale')
-libusbdep = dependency('libusb-1.0')
-luajitdep = dependency('luajit')
-movitdep = dependency('movit')
-protobufdep = dependency('protobuf')
-qcustomplotdep = cxx.find_library('qcustomplot')
-qt5deps = dependency('qt5', modules: ['Core', 'Gui', 'Widgets', 'OpenGLExtensions', 'OpenGL', 'PrintSupport'])
-threaddep = dependency('threads')
-vadrmdep = dependency('libva-drm')
-vax11dep = dependency('libva-x11')
-x11dep = dependency('x11')
-x264dep = dependency('x264')
-zitaresamplerdep = cxx.find_library('zita-resampler')
-
-srcs = []
-nageru_deps = [qt5deps, libjpegdep, movitdep, libmicrohttpddep, protobufdep,
- vax11dep, vadrmdep, x11dep, libavformatdep, libavresampledep, libavcodecdep, libavutildep,
- libswscaledep, libusbdep, luajitdep, dldep, x264dep, alsadep, zitaresamplerdep,
- qcustomplotdep, threaddep]
-nageru_include_dirs = []
-nageru_link_with = []
-nageru_build_rpath = ''
-nageru_install_rpath = ''
-
-kaeru_link_with = []
-kaeru_extra_deps = []
+# Add the right MOVIT_SHADER_DIR definition.
+r = run_command('pkg-config', '--variable=shaderdir', 'movit')
+if r.returncode() != 0
+ error('Movit pkg-config installation is broken.')
+endif
+add_project_arguments('-DMOVIT_SHADER_DIR="' + r.stdout().strip() + '"', language: 'cpp')
# DeckLink has these issues, and we include it from various places.
if cxx.has_argument('-Wno-non-virtual-dtor')
add_project_arguments('-Wno-deprecated-declarations', language: 'cpp')
endif
-# Add the right MOVIT_SHADER_DIR definition.
-r = run_command('pkg-config', '--variable=shaderdir', 'movit')
-if r.returncode() != 0
- error('Movit pkg-config installation is broken.')
-endif
-add_project_arguments('-DMOVIT_SHADER_DIR="' + r.stdout().strip() + '"', language: 'cpp')
-
-# CEF.
-exe_dir = join_paths(get_option('prefix'), 'lib/nageru')
-cef_dir = get_option('cef_dir')
-cef_build_type = get_option('cef_build_type')
-have_cef = (cef_dir != '')
-if have_cef
+# This needs to be done before declaring any build targets.
+if get_option('cef_dir') != ''
add_project_arguments('-DHAVE_CEF=1', language: 'cpp')
-
- system_cef = (cef_build_type == 'system')
- if system_cef
- cef_lib_dir = cef_dir
- cef_resource_dir = '/usr/share/cef/Resources'
- else
- cef_lib_dir = join_paths(cef_dir, cef_build_type)
- cef_resource_dir = join_paths(cef_dir, 'Resources')
-
- nageru_include_dirs += include_directories(cef_dir)
- nageru_include_dirs += include_directories(join_paths(cef_dir, 'include'))
- nageru_build_rpath = cef_lib_dir
- nageru_install_rpath = '$ORIGIN/'
- endif
-
- cefdep = cxx.find_library('cef')
- nageru_deps += cefdep
-
- # CEF wrapper library; not built as part of the CEF binary distribution,
- # but should be if CEF is installed as a system library.
- if system_cef
- cefdlldep = cxx.find_library('cef_dll_wrapper')
- nageru_deps += cefdlldep
- else
- cmake = find_program('cmake')
- cef_compile_script = find_program('scripts/compile_cef_dll_wrapper.sh')
-
- cef_dll_target = custom_target('libcef_dll_wrapper',
- input: join_paths(cef_dir, 'libcef_dll/CMakeLists.txt'),
- output: ['libcef_dll_wrapper.a', 'cef-stamp'],
- command: [cef_compile_script, '@BUILD_DIR@', cef_dir, cmake, '@OUTPUT@'])
-
- # Putting the .a in sources seemingly hits a bug where the .a files get sorted
- # in the wrong order. This is a workaround; see
- # https://github.com/mesonbuild/meson/issues/3613#issuecomment-408276296 .
- cefdlldep = declare_dependency(sources: cef_dll_target[1], link_args: cef_dll_target.full_path())
- nageru_deps += cefdlldep
- endif
-
- cef_libs = ['libEGL.so', 'libGLESv2.so', 'natives_blob.bin', 'snapshot_blob.bin', 'v8_context_snapshot.bin']
- cef_resources = ['cef.pak', 'cef_100_percent.pak', 'cef_200_percent.pak', 'cef_extensions.pak', 'devtools_resources.pak']
- if not get_option('cef_no_icudtl')
- cef_resources += ['icudtl.dat']
- endif
- if cef_build_type != 'system'
- cef_libs += ['libcef.so']
- endif
-
- # Symlink the files into the build directory, so that running nageru without ninja install works.
- run_command('mkdir', join_paths(meson.current_build_dir(), 'locales/'))
- foreach file : cef_libs
- run_command('ln', '-s', join_paths(cef_lib_dir, file), meson.current_build_dir())
- install_data(join_paths(cef_lib_dir, file), install_dir: exe_dir)
- endforeach
- foreach file : cef_resources
- run_command('ln', '-s', join_paths(cef_resource_dir, file), meson.current_build_dir())
- install_data(join_paths(cef_resource_dir, file), install_dir: exe_dir)
- endforeach
- run_command('ln', '-s', join_paths(cef_resource_dir, 'locales/en-US.pak'), join_paths(meson.current_build_dir(), 'locales/'))
- install_data(join_paths(cef_resource_dir, 'locales/en-US.pak'), install_dir: join_paths(exe_dir, 'locales'))
endif
-# bmusb.
-if embedded_bmusb
- bmusb_dir = include_directories('bmusb')
- nageru_include_dirs += bmusb_dir
-
- bmusb = static_library('bmusb', 'bmusb/bmusb.cpp', 'bmusb/fake_capture.cpp',
- dependencies: [libusbdep],
- include_directories: [bmusb_dir])
- nageru_link_with += bmusb
- kaeru_link_with += bmusb
-else
- nageru_deps += bmusbdep
- kaeru_extra_deps += bmusbdep
-endif
-
-# Protobuf compilation.
-gen = generator(protoc, \
- output : ['@BASENAME@.pb.cc', '@BASENAME@.pb.h'],
- arguments : ['--proto_path=@CURRENT_SOURCE_DIR@', '--cpp_out=@BUILD_DIR@', '@INPUT@'])
-proto_generated = gen.process(['state.proto', 'midi_mapping.proto', 'json.proto'])
-protobuf_lib = static_library('protobufs', proto_generated, dependencies: nageru_deps, include_directories: nageru_include_dirs)
-protobuf_hdrs = declare_dependency(sources: proto_generated)
-nageru_link_with += protobuf_lib
-
-# Preprocess Qt as needed.
-qt_files = qt5.preprocess(
- moc_headers: ['aboutdialog.h', 'analyzer.h', 'clickable_label.h', 'compression_reduction_meter.h', 'correlation_meter.h',
- 'ellipsis_label.h', 'glwidget.h', 'input_mapping_dialog.h', 'lrameter.h', 'mainwindow.h', 'midi_mapping_dialog.h',
- 'nonlinear_fader.h', 'vumeter.h'],
- ui_files: ['aboutdialog.ui', 'analyzer.ui', 'audio_expanded_view.ui', 'audio_miniview.ui', 'display.ui',
- 'input_mapping.ui', 'mainwindow.ui', 'midi_mapping.ui'],
- dependencies: qt5deps)
-
-# Qt objects.
-srcs += ['glwidget.cpp', 'mainwindow.cpp', 'vumeter.cpp', 'lrameter.cpp', 'compression_reduction_meter.cpp',
- 'correlation_meter.cpp', 'aboutdialog.cpp', 'analyzer.cpp', 'input_mapping_dialog.cpp', 'midi_mapping_dialog.cpp',
- 'nonlinear_fader.cpp', 'context_menus.cpp', 'vu_common.cpp', 'piecewise_interpolator.cpp', 'midi_mapper.cpp']
-
-# Auxiliary objects used for nearly everything.
-aux_srcs = ['metrics.cpp', 'flags.cpp']
-aux = static_library('aux', aux_srcs, dependencies: nageru_deps, include_directories: nageru_include_dirs)
-nageru_link_with += aux
-
-# Audio objects.
-audio_mixer_srcs = ['audio_mixer.cpp', 'alsa_input.cpp', 'alsa_pool.cpp', 'ebu_r128_proc.cc', 'stereocompressor.cpp',
- 'resampling_queue.cpp', 'flags.cpp', 'correlation_measurer.cpp', 'filter.cpp', 'input_mapping.cpp']
-audio = static_library('audio', audio_mixer_srcs, dependencies: [nageru_deps, protobuf_hdrs], include_directories: nageru_include_dirs)
-nageru_link_with += audio
-
-# Mixer objects.
-srcs += ['chroma_subsampler.cpp', 'v210_converter.cpp', 'mixer.cpp', 'pbo_frame_allocator.cpp',
- 'context.cpp', 'theme.cpp', 'image_input.cpp', 'alsa_output.cpp',
- 'disk_space_estimator.cpp', 'timecode_renderer.cpp', 'tweaked_inputs.cpp', 'mjpeg_encoder.cpp']
-
-# Streaming and encoding objects (largely the set that is shared between Nageru and Kaeru).
-stream_srcs = ['quicksync_encoder.cpp', 'x264_encoder.cpp', 'x264_dynamic.cpp', 'x264_speed_control.cpp', 'video_encoder.cpp',
- 'metacube2.cpp', 'mux.cpp', 'audio_encoder.cpp', 'ffmpeg_raii.cpp', 'ffmpeg_util.cpp', 'httpd.cpp', 'ffmpeg_capture.cpp',
- 'print_latency.cpp', 'basic_stats.cpp', 'ref_counted_frame.cpp']
-stream = static_library('stream', stream_srcs, dependencies: nageru_deps, include_directories: nageru_include_dirs)
-nageru_link_with += stream
-
-# DeckLink.
-srcs += ['decklink_capture.cpp', 'decklink_util.cpp', 'decklink_output.cpp', 'memcpy_interleaved.cpp',
- 'decklink/DeckLinkAPIDispatch.cpp']
-decklink_dir = include_directories('decklink')
-nageru_include_dirs += decklink_dir
-
-# CEF input.
-if have_cef
- srcs += ['nageru_cef_app.cpp', 'cef_capture.cpp']
-endif
-
-srcs += qt_files
-srcs += proto_generated
-
-# Everything except main.cpp. (We do this because if you specify a .cpp file in
-# both Nageru and Kaeru, it gets compiled twice. In the older Makefiles, Kaeru
-# depended on a smaller set of objects.)
-core = static_library('core', srcs, dependencies: nageru_deps, include_directories: nageru_include_dirs)
-nageru_link_with += core
-
-# Nageru executable; it goes into /usr/lib/nageru since CEF files go there, too
-# (we can't put them straight into /usr/bin).
-executable('nageru', 'main.cpp',
- dependencies: nageru_deps,
- include_directories: nageru_include_dirs,
- link_with: nageru_link_with,
- link_args: nageru_link_args,
- build_rpath: nageru_build_rpath,
- install_rpath: nageru_install_rpath,
- install: true,
- install_dir: exe_dir
-)
-meson.add_install_script('scripts/setup_nageru_symlink.sh')
-
-# Kaeru executable.
-executable('kaeru', 'kaeru.cpp',
- dependencies: [nageru_deps, kaeru_extra_deps],
- include_directories: nageru_include_dirs,
- link_with: [stream, aux, kaeru_link_with],
- link_args: nageru_link_args,
- install: true)
-
-# Audio mixer microbenchmark.
-executable('benchmark_audio_mixer', 'benchmark_audio_mixer.cpp', dependencies: nageru_deps, include_directories: nageru_include_dirs, link_args: nageru_link_args, link_with: [audio, aux])
+top_include = include_directories('.')
-# These are needed for a default run.
-data_files = ['theme.lua', 'simple.lua', 'bg.jpeg', 'akai_midimix.midimapping']
-install_data(data_files, install_dir: join_paths(get_option('prefix'), 'share/nageru'))
-foreach file : data_files
- run_command('ln', '-s', join_paths(meson.current_source_dir(), file), meson.current_build_dir())
-endforeach
+subdir('shared')
+subdir('nageru')
+subdir('futatabi')
<item>
<widget class="QLabel" name="label">
<property name="text">
- <string><p><b>Nageru 1.7.4</b></p>
+ <string><p><b>Nageru 1.7.5</b></p>
<p>Realtime video mixer</p></string>
</property>
#include "alsa_pool.h"
#include "bmusb/bmusb.h"
-#include "timebase.h"
+#include "shared/timebase.h"
using namespace std;
using namespace std::chrono;
#include <movit/resource_pool.h>
#include <movit/util.h>
-#include "context.h"
+#include "shared/context.h"
#include "flags.h"
#include "mixer.h"
#include "ui_analyzer.h"
#include <vector>
#include "defs.h"
-#include "mux.h"
-#include "timebase.h"
+#include "shared/mux.h"
+#include "shared/timebase.h"
using namespace std;
#include <libavutil/frame.h>
}
-#include "ffmpeg_raii.h"
+#include "shared/ffmpeg_raii.h"
class Mux;
#include <limits>
#include <utility>
-#include "db.h"
+#include "decibel.h"
#include "flags.h"
-#include "metrics.h"
+#include "shared/metrics.h"
#include "state.pb.h"
-#include "timebase.h"
+#include "shared/timebase.h"
using namespace bmusb;
using namespace std;
#include "alsa_pool.h"
#include "correlation_measurer.h"
-#include "db.h"
+#include "decibel.h"
#include "defs.h"
#include "ebu_r128_proc.h"
#include "filter.h"
#include "basic_stats.h"
-#include "metrics.h"
+#include "shared/metrics.h"
#include <assert.h>
#include <sys/resource.h>
#include <vector>
#include "audio_mixer.h"
-#include "db.h"
+#include "decibel.h"
#include "defs.h"
#include "input_mapping.h"
#include "resampling_queue.h"
-#include "timebase.h"
+#include "shared/timebase.h"
#define NUM_BENCHMARK_CARDS 4
#define NUM_WARMUP_FRAMES 100
--- /dev/null
+Subproject commit 5163d25c65c3028090db1aea6587ec2fb4cb823e
--- /dev/null
+#version 130
+
+in vec2 tc0, tc1;
+uniform sampler2D cbcr_tex;
+out vec4 FragColor, FragColor2;
+void main() {
+ FragColor = 0.5 * (texture(cbcr_tex, tc0) + texture(cbcr_tex, tc1));
+ FragColor2 = FragColor;
+}
--- /dev/null
+#version 130
+
+in vec2 position;
+in vec2 texcoord;
+out vec2 tc0, tc1;
+uniform vec2 foo_chroma_offset_0;
+uniform vec2 foo_chroma_offset_1;
+
+void main()
+{
+ // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+ //
+ // 2.000 0.000 0.000 -1.000
+ // 0.000 2.000 0.000 -1.000
+ // 0.000 0.000 -2.000 -1.000
+ // 0.000 0.000 0.000 1.000
+ gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
+ vec2 flipped_tc = texcoord;
+ tc0 = flipped_tc + foo_chroma_offset_0;
+ tc1 = flipped_tc + foo_chroma_offset_1;
+};
#include <movit/resource_pool.h>
#include <movit/util.h>
+#include "embedded_files.h"
+#include "shared/read_file.h"
+
using namespace movit;
using namespace std;
// See also http://www.poynton.com/PDFs/Merging_RGB_and_422.pdf, pages 6–7.
// Cb/Cr shader.
- string cbcr_vert_shader =
- "#version 130 \n"
- " \n"
- "in vec2 position; \n"
- "in vec2 texcoord; \n"
- "out vec2 tc0, tc1; \n"
- "uniform vec2 foo_chroma_offset_0; \n"
- "uniform vec2 foo_chroma_offset_1; \n"
- " \n"
- "void main() \n"
- "{ \n"
- " // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: \n"
- " // \n"
- " // 2.000 0.000 0.000 -1.000 \n"
- " // 0.000 2.000 0.000 -1.000 \n"
- " // 0.000 0.000 -2.000 -1.000 \n"
- " // 0.000 0.000 0.000 1.000 \n"
- " gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); \n"
- " vec2 flipped_tc = texcoord; \n"
- " tc0 = flipped_tc + foo_chroma_offset_0; \n"
- " tc1 = flipped_tc + foo_chroma_offset_1; \n"
- "} \n";
- string cbcr_frag_shader =
- "#version 130 \n"
- "in vec2 tc0, tc1; \n"
- "uniform sampler2D cbcr_tex; \n"
- "out vec4 FragColor, FragColor2; \n"
- "void main() { \n"
- " FragColor = 0.5 * (texture(cbcr_tex, tc0) + texture(cbcr_tex, tc1)); \n"
- " FragColor2 = FragColor; \n"
- "} \n";
+ string cbcr_vert_shader = read_file("cbcr_subsample.vert", _binary_cbcr_subsample_vert_data, _binary_cbcr_subsample_vert_size);
+ string cbcr_frag_shader = read_file("cbcr_subsample.frag", _binary_cbcr_subsample_frag_data, _binary_cbcr_subsample_frag_size);
cbcr_program_num = resource_pool->compile_glsl_program(cbcr_vert_shader, cbcr_frag_shader, frag_shader_outputs);
check_error();
cbcr_chroma_offset_0_location = get_uniform_location(cbcr_program_num, "foo", "chroma_offset_0");
check_error();
// Same, for UYVY conversion.
- string uyvy_vert_shader =
- "#version 130 \n"
- " \n"
- "in vec2 position; \n"
- "in vec2 texcoord; \n"
- "out vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1; \n"
- "uniform vec2 foo_luma_offset_0; \n"
- "uniform vec2 foo_luma_offset_1; \n"
- "uniform vec2 foo_chroma_offset_0; \n"
- "uniform vec2 foo_chroma_offset_1; \n"
- " \n"
- "void main() \n"
- "{ \n"
- " // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: \n"
- " // \n"
- " // 2.000 0.000 0.000 -1.000 \n"
- " // 0.000 2.000 0.000 -1.000 \n"
- " // 0.000 0.000 -2.000 -1.000 \n"
- " // 0.000 0.000 0.000 1.000 \n"
- " gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); \n"
- " vec2 flipped_tc = texcoord; \n"
- " y_tc0 = flipped_tc + foo_luma_offset_0; \n"
- " y_tc1 = flipped_tc + foo_luma_offset_1; \n"
- " cbcr_tc0 = flipped_tc + foo_chroma_offset_0; \n"
- " cbcr_tc1 = flipped_tc + foo_chroma_offset_1; \n"
- "} \n";
- string uyvy_frag_shader =
- "#version 130 \n"
- "in vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1; \n"
- "uniform sampler2D y_tex, cbcr_tex; \n"
- "out vec4 FragColor; \n"
- "void main() { \n"
- " float y0 = texture(y_tex, y_tc0).r; \n"
- " float y1 = texture(y_tex, y_tc1).r; \n"
- " vec2 cbcr0 = texture(cbcr_tex, cbcr_tc0).rg; \n"
- " vec2 cbcr1 = texture(cbcr_tex, cbcr_tc1).rg; \n"
- " vec2 cbcr = 0.5 * (cbcr0 + cbcr1); \n"
- " FragColor = vec4(cbcr.g, y0, cbcr.r, y1); \n"
- "} \n";
+ string uyvy_vert_shader = read_file("uyvy_subsample.vert", _binary_uyvy_subsample_vert_data, _binary_uyvy_subsample_vert_size);
+ string uyvy_frag_shader = read_file("uyvy_subsample.frag", _binary_uyvy_subsample_frag_data, _binary_uyvy_subsample_frag_size);
uyvy_program_num = resource_pool->compile_glsl_program(uyvy_vert_shader, uyvy_frag_shader, frag_shader_outputs);
check_error();
// v210 compute shader.
if (v210Converter::has_hardware_support()) {
- string v210_shader_src = R"(#version 150
-#extension GL_ARB_compute_shader : enable
-#extension GL_ARB_shader_image_load_store : enable
-layout(local_size_x=2, local_size_y=16) in;
-layout(r16) uniform restrict readonly image2D in_y;
-uniform sampler2D in_cbcr; // Of type RG16.
-layout(rgb10_a2) uniform restrict writeonly image2D outbuf;
-uniform float inv_width, inv_height;
-
-void main()
-{
- int xb = int(gl_GlobalInvocationID.x); // X block number.
- int y = int(gl_GlobalInvocationID.y); // Y (actual line).
- float yf = (gl_GlobalInvocationID.y + 0.5f) * inv_height; // Y float coordinate.
-
- // Load and scale CbCr values, sampling in-between the texels to get
- // to (left/4 + center/2 + right/4).
- vec2 pix_cbcr[3];
- for (int i = 0; i < 3; ++i) {
- vec2 a = texture(in_cbcr, vec2((xb * 6 + i * 2) * inv_width, yf)).xy;
- vec2 b = texture(in_cbcr, vec2((xb * 6 + i * 2 + 1) * inv_width, yf)).xy;
- pix_cbcr[i] = (a + b) * (0.5 * 65535.0 / 1023.0);
- }
-
- // Load and scale the Y values. Note that we use integer coordinates here,
- // so we don't need to offset by 0.5.
- float pix_y[6];
- for (int i = 0; i < 6; ++i) {
- pix_y[i] = imageLoad(in_y, ivec2(xb * 6 + i, y)).x * (65535.0 / 1023.0);
- }
-
- imageStore(outbuf, ivec2(xb * 4 + 0, y), vec4(pix_cbcr[0].x, pix_y[0], pix_cbcr[0].y, 1.0));
- imageStore(outbuf, ivec2(xb * 4 + 1, y), vec4(pix_y[1], pix_cbcr[1].x, pix_y[2], 1.0));
- imageStore(outbuf, ivec2(xb * 4 + 2, y), vec4(pix_cbcr[1].y, pix_y[3], pix_cbcr[2].x, 1.0));
- imageStore(outbuf, ivec2(xb * 4 + 3, y), vec4(pix_y[4], pix_cbcr[2].y, pix_y[5], 1.0));
-}
-)";
+ string v210_shader_src = read_file("v210_subsample.comp", _binary_v210_subsample_comp_data, _binary_v210_subsample_comp_size);
GLuint shader_num = movit::compile_shader(v210_shader_src, GL_COMPUTE_SHADER);
check_error();
v210_program_num = glCreateProgram();
-#ifndef _DB_H
-#define _DB_H 1
+#ifndef _DECIBEL_H
+#define _DECIBEL_H 1
// Utility routines for working with decibels.
static inline double from_db(double db) { return pow(10.0, db / 20.0); }
static inline double to_db(double val) { return 20.0 * log10(val); }
-#endif // !defined(_DB_H)
+#endif // !defined(_DECIBEL_H)
#include "bmusb/bmusb.h"
#include "decklink_util.h"
#include "flags.h"
-#include "memcpy_interleaved.h"
+#include "shared/memcpy_interleaved.h"
#include "v210_converter.h"
#define FRAME_SIZE (8 << 20) // 8 MB.
#include "decklink_output.h"
#include "decklink_util.h"
#include "flags.h"
-#include "metrics.h"
+#include "shared/metrics.h"
#include "print_latency.h"
-#include "timebase.h"
+#include "shared/timebase.h"
#include "v210_converter.h"
using namespace movit;
#include "DeckLinkAPITypes.h"
#include "LinuxCOM.h"
-#include "context.h"
+#include "shared/context.h"
#include "print_latency.h"
#include "quittable_sleeper.h"
#include "ref_counted_frame.h"
-#include "ref_counted_gl_sync.h"
+#include "shared/ref_counted_gl_sync.h"
namespace movit {
#include <libavformat/version.h>
-// This flag is only supported in FFmpeg 3.3 and up, and we only require 3.1.
-#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 71, 100)
-#define MUX_SKIP_TRAILER "+skip_trailer"
-#else
-#define MUX_SKIP_TRAILER ""
-#endif
-
#define OUTPUT_FREQUENCY 48000 // Currently needs to be exactly 48000, since bmusb outputs in that.
#define MAX_FPS 60
#define FAKE_FPS 25 // Must be an integer.
#define LOCAL_DUMP_SUFFIX ".nut"
#define DEFAULT_STREAM_MUX_NAME "nut" // Only for HTTP. Local dump guesses from LOCAL_DUMP_SUFFIX.
#define DEFAULT_HTTPD_PORT 9095
-#define MUX_OPTS { \
- /* Make seekable .mov files, and keep MP4 muxer from using unlimited amounts of memory. */ \
- { "movflags", "empty_moov+frag_keyframe+default_base_moof" MUX_SKIP_TRAILER }, \
- \
- /* Make for somewhat less bursty stream output when using .mov. */ \
- { "frag_duration", "125000" }, \
- \
- /* Keep nut muxer from using unlimited amounts of memory. */ \
- { "write_index", "0" } \
-}
-
-// In bytes. Beware, if too small, stream clients will start dropping data.
-// For mov, you want this at 10MB or so (for the reason mentioned above),
-// but for nut, there's no flushing, so such a large mux buffer would cause
-// the output to be very uneven.
-#define MUX_BUFFER_SIZE 10485760
+
+#include "shared/shared_defs.h"
// In number of frames. Comes in addition to any internal queues in x264
// (frame threading, lookahead, etc.).
--- /dev/null
+#ifndef _EMBEDDED_FILES_H
+#define _EMBEDDED_FILES_H 1
+
+// Files that are embedded into the binary as part of the build process.
+// They are used as a backup if the files are not available on disk
+// (which is typically the case if the program is installed, as opposed to
+// being run during development).
+
+#include <stddef.h>
+
+extern const unsigned char *_binary_cbcr_subsample_vert_data;
+extern const size_t _binary_cbcr_subsample_vert_size;
+extern const unsigned char *_binary_cbcr_subsample_frag_data;
+extern const size_t _binary_cbcr_subsample_frag_size;
+extern const unsigned char *_binary_uyvy_subsample_vert_data;
+extern const size_t _binary_uyvy_subsample_vert_size;
+extern const unsigned char *_binary_uyvy_subsample_frag_data;
+extern const size_t _binary_uyvy_subsample_frag_size;
+extern const unsigned char *_binary_v210_subsample_comp_data;
+extern const size_t _binary_v210_subsample_comp_size;
+extern const unsigned char *_binary_timecode_vert_data;
+extern const size_t _binary_timecode_vert_size;
+extern const unsigned char *_binary_timecode_frag_data;
+extern const size_t _binary_timecode_frag_size;
+extern const unsigned char *_binary_timecode_10bit_frag_data;
+extern const size_t _binary_timecode_10bit_frag_size;
+
+#endif // !defined(_EMBEDDED_FILES_H)
#include <vector>
#include "bmusb/bmusb.h"
-#include "ffmpeg_raii.h"
+#include "shared/ffmpeg_raii.h"
#include "ffmpeg_util.h"
#include "flags.h"
#include "image_input.h"
#include "ref_counted_frame.h"
-#include "timebase.h"
+#include "shared/timebase.h"
#define FRAME_SIZE (8 << 20) // 8 MB.
}
#include "bmusb/bmusb.h"
-#include "ffmpeg_raii.h"
+#include "shared/ffmpeg_raii.h"
#include "ref_counted_frame.h"
#include "quittable_sleeper.h"
#include <utility>
#include "audio_mixer.h"
-#include "context.h"
+#include "shared/context.h"
#include "context_menus.h"
#include "flags.h"
#include "mainwindow.h"
#include "mixer.h"
-#include "ref_counted_gl_sync.h"
+#include "shared/ref_counted_gl_sync.h"
class QMouseEvent;
#include <utility>
#include <vector>
-#include "ffmpeg_raii.h"
+#include "shared/ffmpeg_raii.h"
#include "ffmpeg_util.h"
#include "flags.h"
#include "alsa_pool.h"
#include "defs.h"
-#include "post_to_main_thread.h"
+#include "shared/post_to_main_thread.h"
#include "ui_input_mapping.h"
using namespace std;
#include "flags.h"
#include "ffmpeg_capture.h"
#include "mixer.h"
-#include "mux.h"
+#include "shared/mux.h"
#include "quittable_sleeper.h"
-#include "timebase.h"
+#include "shared/timebase.h"
#include "x264_encoder.h"
#include <assert.h>
string video_extradata = x264_encoder->get_global_headers();
unique_ptr<Mux> mux;
- mux.reset(new Mux(avctx, global_flags.width, global_flags.height, Mux::CODEC_H264, video_extradata, audio_encoder->get_codec_parameters().get(), COARSE_TIMEBASE,
+ mux.reset(new Mux(avctx, global_flags.width, global_flags.height, Mux::CODEC_H264, video_extradata, audio_encoder->get_codec_parameters().get(),
+ get_color_space(global_flags.ycbcr_rec709_coefficients), Mux::WITH_AUDIO, COARSE_TIMEBASE,
/*write_callback=*/nullptr, Mux::WRITE_FOREGROUND, { &stream_mux_metrics }));
stream_mux_metrics.init({{ "destination", "http" }});
return mux;
#ifdef HAVE_CEF
#include "nageru_cef_app.h"
#endif
-#include "context.h"
+#include "shared/context.h"
#include "flags.h"
#include "image_input.h"
#include "mainwindow.h"
// We normally use EGL for zerocopy, but if we use VA against DRM
// instead of against X11, we turn it off, and then don't need EGL.
setenv("QT_XCB_GL_INTEGRATION", "xcb_egl", 0);
- using_egl = true;
}
setlinebuf(stdout);
#if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(58, 9, 100)
#include "clickable_label.h"
#include "context_menus.h"
#include "correlation_meter.h"
-#include "disk_space_estimator.h"
+#include "shared/disk_space_estimator.h"
#include "ellipsis_label.h"
#include "flags.h"
#include "glwidget.h"
#include "midi_mapping_dialog.h"
#include "mixer.h"
#include "nonlinear_fader.h"
-#include "post_to_main_thread.h"
+#include "shared/post_to_main_thread.h"
#include "ui_audio_expanded_view.h"
#include "ui_audio_miniview.h"
#include "ui_display.h"
--- /dev/null
+qt5 = import('qt5')
+protoc = find_program('protoc')
+cxx = meson.get_compiler('cpp')
+
+embedded_bmusb = get_option('embedded_bmusb')
+
+alsadep = dependency('alsa')
+bmusbdep = dependency('bmusb', required: not embedded_bmusb)
+dldep = cxx.find_library('dl')
+epoxydep = dependency('epoxy')
+libavcodecdep = dependency('libavcodec')
+libavformatdep = dependency('libavformat')
+libavresampledep = dependency('libavresample')
+libavutildep = dependency('libavutil')
+libjpegdep = dependency('libjpeg')
+libswscaledep = dependency('libswscale')
+libusbdep = dependency('libusb-1.0')
+luajitdep = dependency('luajit')
+movitdep = dependency('movit')
+protobufdep = dependency('protobuf')
+qcustomplotdep = cxx.find_library('qcustomplot')
+qt5deps = dependency('qt5', modules: ['Core', 'Gui', 'Widgets', 'OpenGLExtensions', 'OpenGL', 'PrintSupport'])
+threaddep = dependency('threads')
+vadrmdep = dependency('libva-drm')
+vax11dep = dependency('libva-x11')
+x11dep = dependency('x11')
+x264dep = dependency('x264')
+zitaresamplerdep = cxx.find_library('zita-resampler')
+
+srcs = []
+nageru_deps = [shareddep, qt5deps, libjpegdep, movitdep, protobufdep,
+ vax11dep, vadrmdep, x11dep, libavformatdep, libavresampledep, libavcodecdep, libavutildep,
+ libswscaledep, libusbdep, luajitdep, dldep, x264dep, alsadep, zitaresamplerdep,
+ qcustomplotdep, threaddep]
+nageru_include_dirs = []
+nageru_link_with = []
+nageru_build_rpath = ''
+nageru_install_rpath = ''
+
+kaeru_link_with = []
+kaeru_extra_deps = []
+
+# CEF.
+exe_dir = join_paths(get_option('prefix'), 'lib/nageru')
+cef_dir = get_option('cef_dir')
+cef_build_type = get_option('cef_build_type')
+have_cef = (cef_dir != '')
+if have_cef
+ # This is done in the top-level file; just kept here for reference.
+ # add_project_arguments('-DHAVE_CEF=1', language: 'cpp')
+
+ system_cef = (cef_build_type == 'system')
+ if system_cef
+ cef_lib_dir = cef_dir
+ cef_resource_dir = '/usr/share/cef/Resources'
+ else
+ cef_lib_dir = join_paths(cef_dir, cef_build_type)
+ cef_resource_dir = join_paths(cef_dir, 'Resources')
+
+ nageru_include_dirs += include_directories(cef_dir)
+ nageru_include_dirs += include_directories(join_paths(cef_dir, 'include'))
+ nageru_build_rpath = cef_lib_dir
+ nageru_install_rpath = '$ORIGIN/'
+ endif
+
+ cefdep = cxx.find_library('cef')
+ nageru_deps += cefdep
+
+ # CEF wrapper library; not built as part of the CEF binary distribution,
+ # but should be if CEF is installed as a system library.
+ if system_cef
+ cefdlldep = cxx.find_library('cef_dll_wrapper')
+ nageru_deps += cefdlldep
+ else
+ cmake = find_program('cmake')
+ cef_compile_script = find_program('scripts/compile_cef_dll_wrapper.sh')
+
+ cef_dll_target = custom_target('libcef_dll_wrapper',
+ input: join_paths(cef_dir, 'libcef_dll/CMakeLists.txt'),
+ output: ['libcef_dll_wrapper.a', 'cef-stamp'],
+ command: [cef_compile_script, '@BUILD_DIR@', cef_dir, cmake, '@OUTPUT@'])
+
+ # Putting the .a in sources seemingly hits a bug where the .a files get sorted
+ # in the wrong order. This is a workaround; see
+ # https://github.com/mesonbuild/meson/issues/3613#issuecomment-408276296 .
+ cefdlldep = declare_dependency(sources: cef_dll_target[1], link_args: cef_dll_target.full_path())
+ nageru_deps += cefdlldep
+ endif
+
+ cef_libs = ['libEGL.so', 'libGLESv2.so', 'natives_blob.bin', 'snapshot_blob.bin', 'v8_context_snapshot.bin']
+ cef_resources = ['cef.pak', 'cef_100_percent.pak', 'cef_200_percent.pak', 'cef_extensions.pak', 'devtools_resources.pak']
+ if not get_option('cef_no_icudtl')
+ cef_resources += ['icudtl.dat']
+ endif
+ if cef_build_type != 'system'
+ cef_libs += ['libcef.so']
+ endif
+
+ # Symlink the files into the build directory, so that running nageru without ninja install works.
+ run_command('mkdir', join_paths(meson.current_build_dir(), 'locales/'))
+ foreach file : cef_libs
+ run_command('ln', '-s', join_paths(cef_lib_dir, file), meson.current_build_dir())
+ install_data(join_paths(cef_lib_dir, file), install_dir: exe_dir)
+ endforeach
+ foreach file : cef_resources
+ run_command('ln', '-s', join_paths(cef_resource_dir, file), meson.current_build_dir())
+ install_data(join_paths(cef_resource_dir, file), install_dir: exe_dir)
+ endforeach
+ run_command('ln', '-s', join_paths(cef_resource_dir, 'locales/en-US.pak'), join_paths(meson.current_build_dir(), 'locales/'))
+ install_data(join_paths(cef_resource_dir, 'locales/en-US.pak'), install_dir: join_paths(exe_dir, 'locales'))
+endif
+
+# bmusb.
+if embedded_bmusb
+ bmusb_dir = include_directories('bmusb')
+ nageru_include_dirs += bmusb_dir
+
+ bmusb = static_library('bmusb', 'bmusb/bmusb.cpp', 'bmusb/fake_capture.cpp',
+ dependencies: [libusbdep],
+ include_directories: [bmusb_dir])
+ nageru_link_with += bmusb
+ kaeru_link_with += bmusb
+else
+ nageru_deps += bmusbdep
+ kaeru_extra_deps += bmusbdep
+endif
+
+# Protobuf compilation.
+gen = generator(protoc, \
+ output : ['@BASENAME@.pb.cc', '@BASENAME@.pb.h'],
+ arguments : ['--proto_path=@CURRENT_SOURCE_DIR@', '--cpp_out=@BUILD_DIR@', '@INPUT@'])
+proto_generated = gen.process(['state.proto', 'midi_mapping.proto', 'json.proto'])
+protobuf_lib = static_library('protobufs', proto_generated, dependencies: nageru_deps, include_directories: nageru_include_dirs)
+protobuf_hdrs = declare_dependency(sources: proto_generated)
+nageru_link_with += protobuf_lib
+
+# Preprocess Qt as needed.
+qt_files = qt5.preprocess(
+ moc_headers: ['aboutdialog.h', 'analyzer.h', 'clickable_label.h', 'compression_reduction_meter.h', 'correlation_meter.h',
+ 'ellipsis_label.h', 'glwidget.h', 'input_mapping_dialog.h', 'lrameter.h', 'mainwindow.h', 'midi_mapping_dialog.h',
+ 'nonlinear_fader.h', 'vumeter.h'],
+ ui_files: ['aboutdialog.ui', 'analyzer.ui', 'audio_expanded_view.ui', 'audio_miniview.ui', 'display.ui',
+ 'input_mapping.ui', 'mainwindow.ui', 'midi_mapping.ui'],
+ dependencies: qt5deps)
+
+# Qt objects.
+srcs += ['glwidget.cpp', 'mainwindow.cpp', 'vumeter.cpp', 'lrameter.cpp', 'compression_reduction_meter.cpp',
+ 'correlation_meter.cpp', 'aboutdialog.cpp', 'analyzer.cpp', 'input_mapping_dialog.cpp', 'midi_mapping_dialog.cpp',
+ 'nonlinear_fader.cpp', 'context_menus.cpp', 'vu_common.cpp', 'piecewise_interpolator.cpp', 'midi_mapper.cpp']
+
+# Auxiliary objects used for nearly everything.
+aux_srcs = ['flags.cpp']
+aux = static_library('aux', aux_srcs, dependencies: nageru_deps, include_directories: nageru_include_dirs)
+nageru_link_with += aux
+
+# Audio objects.
+audio_mixer_srcs = ['audio_mixer.cpp', 'alsa_input.cpp', 'alsa_pool.cpp', 'ebu_r128_proc.cc', 'stereocompressor.cpp',
+ 'resampling_queue.cpp', 'flags.cpp', 'correlation_measurer.cpp', 'filter.cpp', 'input_mapping.cpp']
+audio = static_library('audio', audio_mixer_srcs, dependencies: [nageru_deps, protobuf_hdrs], include_directories: nageru_include_dirs)
+nageru_link_with += audio
+
+# Mixer objects.
+srcs += ['chroma_subsampler.cpp', 'v210_converter.cpp', 'mixer.cpp', 'pbo_frame_allocator.cpp',
+ 'theme.cpp', 'image_input.cpp', 'alsa_output.cpp',
+ 'timecode_renderer.cpp', 'tweaked_inputs.cpp', 'mjpeg_encoder.cpp']
+
+# Streaming and encoding objects (largely the set that is shared between Nageru and Kaeru).
+stream_srcs = ['quicksync_encoder.cpp', 'x264_encoder.cpp', 'x264_dynamic.cpp', 'x264_speed_control.cpp', 'video_encoder.cpp',
+ 'audio_encoder.cpp', 'ffmpeg_util.cpp', 'ffmpeg_capture.cpp',
+ 'print_latency.cpp', 'basic_stats.cpp', 'ref_counted_frame.cpp']
+stream = static_library('stream', stream_srcs, dependencies: nageru_deps, include_directories: nageru_include_dirs)
+nageru_link_with += stream
+
+# DeckLink.
+srcs += ['decklink_capture.cpp', 'decklink_util.cpp', 'decklink_output.cpp',
+ 'decklink/DeckLinkAPIDispatch.cpp']
+decklink_dir = include_directories('decklink')
+nageru_include_dirs += decklink_dir
+
+# CEF input.
+if have_cef
+ srcs += ['nageru_cef_app.cpp', 'cef_capture.cpp']
+endif
+
+srcs += qt_files
+srcs += proto_generated
+
+# Shaders needed at runtime.
+shaders = ['cbcr_subsample.vert', 'cbcr_subsample.frag', 'uyvy_subsample.vert', 'uyvy_subsample.frag', 'v210_subsample.comp', 'timecode.vert', 'timecode.frag', 'timecode_10bit.frag']
+foreach shader : shaders
+ run_command('ln', '-s', join_paths(meson.current_source_dir(), shader), meson.current_build_dir())
+endforeach
+
+shader_srcs = bin2h_gen.process(shaders)
+srcs += shader_srcs
+
+# Everything except main.cpp. (We do this because if you specify a .cpp file in
+# both Nageru and Kaeru, it gets compiled twice. In the older Makefiles, Kaeru
+# depended on a smaller set of objects.)
+core = static_library('core', srcs, dependencies: nageru_deps, include_directories: nageru_include_dirs)
+nageru_link_with += core
+
+# Nageru executable; it goes into /usr/lib/nageru since CEF files go there, too
+# (we can't put them straight into /usr/bin).
+executable('nageru', 'main.cpp',
+ dependencies: nageru_deps,
+ include_directories: nageru_include_dirs,
+ link_with: nageru_link_with,
+ build_rpath: nageru_build_rpath,
+ install_rpath: nageru_install_rpath,
+ install: true,
+ install_dir: exe_dir
+)
+meson.add_install_script('scripts/setup_nageru_symlink.sh')
+
+# Kaeru executable.
+executable('kaeru', 'kaeru.cpp',
+ dependencies: [nageru_deps, kaeru_extra_deps],
+ include_directories: nageru_include_dirs,
+ link_with: [stream, aux, kaeru_link_with],
+ install: true)
+
+# Audio mixer microbenchmark.
+executable('benchmark_audio_mixer', 'benchmark_audio_mixer.cpp', dependencies: nageru_deps, include_directories: nageru_include_dirs, link_with: [audio, aux])
+
+# These are needed for a default run.
+data_files = ['theme.lua', 'simple.lua', 'bg.jpeg', 'akai_midimix.midimapping']
+install_data(data_files, install_dir: join_paths(get_option('prefix'), 'share/nageru'))
+foreach file : data_files
+ run_command('ln', '-s', join_paths(meson.current_source_dir(), file), meson.current_build_dir())
+endforeach
#include "midi_mapper.h"
#include "midi_mapping.pb.h"
-#include "post_to_main_thread.h"
+#include "shared/post_to_main_thread.h"
#include "ui_midi_mapping.h"
class QObject;
#include "cef_capture.h"
#endif
#include "chroma_subsampler.h"
-#include "context.h"
+#include "shared/context.h"
#include "decklink_capture.h"
#include "decklink_output.h"
#include "defs.h"
-#include "disk_space_estimator.h"
+#include "shared/disk_space_estimator.h"
#include "ffmpeg_capture.h"
#include "flags.h"
#include "input_mapping.h"
-#include "metrics.h"
+#include "shared/metrics.h"
#include "mjpeg_encoder.h"
#include "pbo_frame_allocator.h"
-#include "ref_counted_gl_sync.h"
+#include "shared/ref_counted_gl_sync.h"
#include "resampling_queue.h"
-#include "timebase.h"
+#include "shared/timebase.h"
#include "timecode_renderer.h"
#include "v210_converter.h"
#include "va_display_with_cleanup.h"
#include "audio_mixer.h"
#include "bmusb/bmusb.h"
#include "defs.h"
-#include "httpd.h"
+#include "shared/httpd.h"
#include "input_state.h"
#include "libusb.h"
#include "pbo_frame_allocator.h"
#include "ref_counted_frame.h"
-#include "ref_counted_gl_sync.h"
+#include "shared/ref_counted_gl_sync.h"
#include "theme.h"
-#include "timebase.h"
+#include "shared/timebase.h"
#include "video_encoder.h"
#include "ycbcr_interpretation.h"
}
#include "defs.h"
-#include "ffmpeg_raii.h"
+#include "shared/ffmpeg_raii.h"
#include "flags.h"
-#include "httpd.h"
-#include "memcpy_interleaved.h"
+#include "shared/httpd.h"
+#include "shared/memcpy_interleaved.h"
#include "pbo_frame_allocator.h"
-#include "timebase.h"
+#include "shared/timebase.h"
#include "va_display_with_cleanup.h"
#include <va/va.h>
#ifndef _MJPEG_ENCODER_H
#define _MJPEG_ENCODER_H 1
-#include "ffmpeg_raii.h"
+#include "shared/ffmpeg_raii.h"
#include "ref_counted_frame.h"
extern "C" {
#include "print_latency.h"
#include "flags.h"
-#include "metrics.h"
+#include "shared/metrics.h"
#include "mixer.h"
#include <stdio.h>
#include <vector>
#include "ref_counted_frame.h"
-#include "metrics.h"
+#include "shared/metrics.h"
// Since every output frame is based on multiple input frames, we need
// more than one start timestamp; one for each input.
} // namespace
#include "audio_encoder.h"
-#include "context.h"
+#include "shared/context.h"
#include "defs.h"
-#include "disk_space_estimator.h"
-#include "ffmpeg_raii.h"
+#include "shared/disk_space_estimator.h"
+#include "shared/ffmpeg_raii.h"
#include "flags.h"
-#include "mux.h"
+#include "shared/mux.h"
#include "print_latency.h"
#include "quicksync_encoder_impl.h"
#include "ref_counted_frame.h"
-#include "timebase.h"
+#include "shared/timebase.h"
#include "x264_encoder.h"
using namespace movit;
{
lock_guard<mutex> lock(file_audio_encoder_mutex);
AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters();
- file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE,
- std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1),
+ file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), get_color_space(global_flags.ycbcr_rec709_coefficients), Mux::WITH_AUDIO, TIMEBASE,
+ std::bind(&DiskSpaceEstimator::report_append, disk_space_estimator, filename, _1),
Mux::WRITE_BACKGROUND,
{ ¤t_file_mux_metrics, &total_mux_metrics }));
}
#include <libavformat/avformat.h>
}
-#include "ref_counted_gl_sync.h"
+#include "shared/ref_counted_gl_sync.h"
class DiskSpaceEstimator;
class Mux;
#include "audio_encoder.h"
#include "defs.h"
-#include "timebase.h"
+#include "shared/timebase.h"
#include "print_latency.h"
-#include "ref_counted_gl_sync.h"
+#include "shared/ref_counted_gl_sync.h"
#include "va_display_with_cleanup.h"
#define SURFACE_NUM 16 /* 16 surfaces for source YUV */
--- /dev/null
+#version 130
+
+in vec2 tc0;
+uniform sampler2D tex;
+out vec4 Y, CbCr, YCbCr;
+
+void main() {
+ vec4 gray = texture(tex, tc0);;
+ gray.r = gray.r * ((235.0-16.0)/255.0) + 16.0/255.0; // Limited-range Y'CbCr.
+ CbCr = vec4(128.0/255.0, 128.0/255.0, 0.0, 1.0);;
+ Y = gray.rrra;
+ YCbCr = vec4(Y.r, CbCr.r, CbCr.g, CbCr.a);
+}
--- /dev/null
+#version 130
+
+in vec2 position;
+in vec2 texcoord;
+out vec2 tc0;
+
+void main()
+{
+ // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+ //
+ // 2.000 0.000 0.000 -1.000
+ // 0.000 2.000 0.000 -1.000
+ // 0.000 0.000 -2.000 -1.000
+ // 0.000 0.000 0.000 1.000
+ gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
+ tc0 = texcoord;
+}
--- /dev/null
+#version 130
+
+in vec2 tc0;
+uniform sampler2D tex;
+out vec4 Y, CbCr, YCbCr;
+
+void main() {
+ vec4 gray = texture(tex, tc0);;
+ gray.r = gray.r * ((940.0-16.0)/65535.0) + 16.0/65535.0; // Limited-range Y'CbCr.
+ CbCr = vec4(512.0/65535.0, 512.0/65535.0, 0.0, 1.0);;
+ Y = gray.rrra;
+ YCbCr = vec4(Y.r, CbCr.r, CbCr.g, CbCr.a);
+}
#include <sys/time.h>
#include "flags.h"
+#include "embedded_files.h"
+#include "shared/read_file.h"
using namespace std;
using namespace movit;
TimecodeRenderer::TimecodeRenderer(movit::ResourcePool *resource_pool, unsigned display_width, unsigned display_height)
: resource_pool(resource_pool), display_width(display_width), display_height(display_height), height(28)
{
- string vert_shader =
- "#version 130 \n"
- " \n"
- "in vec2 position; \n"
- "in vec2 texcoord; \n"
- "out vec2 tc0; \n"
- " \n"
- "void main() \n"
- "{ \n"
- " // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: \n"
- " // \n"
- " // 2.000 0.000 0.000 -1.000 \n"
- " // 0.000 2.000 0.000 -1.000 \n"
- " // 0.000 0.000 -2.000 -1.000 \n"
- " // 0.000 0.000 0.000 1.000 \n"
- " gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); \n"
- " tc0 = texcoord; \n"
- "} \n";
- string frag_shader =
- "#version 130 \n"
- "in vec2 tc0; \n"
- "uniform sampler2D tex; \n"
- "out vec4 Y, CbCr, YCbCr; \n"
- "void main() { \n"
- " vec4 gray = texture(tex, tc0); \n";
+ string vert_shader = read_file("timecode.vert", _binary_timecode_vert_data, _binary_timecode_vert_size);
+ string frag_shader;
if (global_flags.ten_bit_output) {
- frag_shader +=
- " gray.r = gray.r * ((940.0-16.0)/65535.0) + 16.0/65535.0; \n" // Limited-range Y'CbCr.
- " CbCr = vec4(512.0/65535.0, 512.0/65535.0, 0.0, 1.0); \n";
+ frag_shader = read_file("timecode_10bit.frag", _binary_timecode_10bit_frag_data, _binary_timecode_10bit_frag_size);
} else {
- frag_shader +=
- " gray.r = gray.r * ((235.0-16.0)/255.0) + 16.0/255.0; \n" // Limited-range Y'CbCr.
- " CbCr = vec4(128.0/255.0, 128.0/255.0, 0.0, 1.0); \n";
+ frag_shader = read_file("timecode.frag", _binary_timecode_frag_data, _binary_timecode_frag_size);
}
- frag_shader +=
- " Y = gray.rrra; \n"
- " YCbCr = vec4(Y.r, CbCr.r, CbCr.g, CbCr.a); \n"
- "} \n";
vector<string> frag_shader_outputs;
program_num = resource_pool->compile_glsl_program(vert_shader, frag_shader, frag_shader_outputs);
--- /dev/null
+#version 130
+
+in vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1;
+uniform sampler2D y_tex, cbcr_tex;
+out vec4 FragColor;
+void main() {
+ float y0 = texture(y_tex, y_tc0).r;
+ float y1 = texture(y_tex, y_tc1).r;
+ vec2 cbcr0 = texture(cbcr_tex, cbcr_tc0).rg;
+ vec2 cbcr1 = texture(cbcr_tex, cbcr_tc1).rg;
+ vec2 cbcr = 0.5 * (cbcr0 + cbcr1);
+ FragColor = vec4(cbcr.g, y0, cbcr.r, y1);
+};
--- /dev/null
+#version 130
+
+in vec2 position;
+in vec2 texcoord;
+out vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1;
+uniform vec2 foo_luma_offset_0;
+uniform vec2 foo_luma_offset_1;
+uniform vec2 foo_chroma_offset_0;
+uniform vec2 foo_chroma_offset_1;
+
+void main()
+{
+ // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
+ //
+ // 2.000 0.000 0.000 -1.000
+ // 0.000 2.000 0.000 -1.000
+ // 0.000 0.000 -2.000 -1.000
+ // 0.000 0.000 0.000 1.000
+ gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
+ vec2 flipped_tc = texcoord;
+ y_tc0 = flipped_tc + foo_luma_offset_0;
+ y_tc1 = flipped_tc + foo_luma_offset_1;
+ cbcr_tc0 = flipped_tc + foo_chroma_offset_0;
+ cbcr_tc1 = flipped_tc + foo_chroma_offset_1;
+};
--- /dev/null
+#version 150
+#extension GL_ARB_compute_shader : enable
+#extension GL_ARB_shader_image_load_store : enable
+
+layout(local_size_x=2, local_size_y=16) in;
+layout(r16) uniform restrict readonly image2D in_y;
+uniform sampler2D in_cbcr; // Of type RG16.
+layout(rgb10_a2) uniform restrict writeonly image2D outbuf;
+uniform float inv_width, inv_height;
+
+void main()
+{
+ int xb = int(gl_GlobalInvocationID.x); // X block number.
+ int y = int(gl_GlobalInvocationID.y); // Y (actual line).
+ float yf = (gl_GlobalInvocationID.y + 0.5f) * inv_height; // Y float coordinate.
+
+ // Load and scale CbCr values, sampling in-between the texels to get
+ // to (left/4 + center/2 + right/4).
+ vec2 pix_cbcr[3];
+ for (int i = 0; i < 3; ++i) {
+ vec2 a = texture(in_cbcr, vec2((xb * 6 + i * 2) * inv_width, yf)).xy;
+ vec2 b = texture(in_cbcr, vec2((xb * 6 + i * 2 + 1) * inv_width, yf)).xy;
+ pix_cbcr[i] = (a + b) * (0.5 * 65535.0 / 1023.0);
+ }
+
+ // Load and scale the Y values. Note that we use integer coordinates here,
+ // so we don't need to offset by 0.5.
+ float pix_y[6];
+ for (int i = 0; i < 6; ++i) {
+ pix_y[i] = imageLoad(in_y, ivec2(xb * 6 + i, y)).x * (65535.0 / 1023.0);
+ }
+
+ imageStore(outbuf, ivec2(xb * 4 + 0, y), vec4(pix_cbcr[0].x, pix_y[0], pix_cbcr[0].y, 1.0));
+ imageStore(outbuf, ivec2(xb * 4 + 1, y), vec4(pix_y[1], pix_cbcr[1].x, pix_y[2], 1.0));
+ imageStore(outbuf, ivec2(xb * 4 + 2, y), vec4(pix_cbcr[1].y, pix_y[3], pix_cbcr[2].x, 1.0));
+ imageStore(outbuf, ivec2(xb * 4 + 3, y), vec4(pix_y[4], pix_cbcr[2].y, pix_y[5], 1.0));
+}
#include "audio_encoder.h"
#include "defs.h"
-#include "ffmpeg_raii.h"
+#include "shared/ffmpeg_raii.h"
#include "flags.h"
-#include "httpd.h"
-#include "mux.h"
+#include "shared/httpd.h"
+#include "shared/mux.h"
#include "quicksync_encoder.h"
-#include "timebase.h"
+#include "shared/timebase.h"
#include "x264_encoder.h"
class RefCountedFrame;
video_extradata = x264_encoder->get_global_headers();
}
- stream_mux.reset(new Mux(avctx, width, height, video_codec, video_extradata, stream_audio_encoder->get_codec_parameters().get(), COARSE_TIMEBASE,
+ stream_mux.reset(new Mux(avctx, width, height, video_codec, video_extradata, stream_audio_encoder->get_codec_parameters().get(),
+ get_color_space(global_flags.ycbcr_rec709_coefficients),
+ Mux::WITH_AUDIO, COARSE_TIMEBASE,
/*write_callback=*/nullptr, Mux::WRITE_FOREGROUND, { &stream_mux_metrics }));
stream_mux_metrics.init({{ "destination", "http" }});
}
#include <libavformat/avio.h>
}
-#include "mux.h"
-#include "ref_counted_gl_sync.h"
+#include "shared/mux.h"
+#include "shared/ref_counted_gl_sync.h"
class AudioEncoder;
class DiskSpaceEstimator;
#include "defs.h"
#include "flags.h"
-#include "metrics.h"
-#include "mux.h"
+#include "shared/metrics.h"
+#include "shared/mux.h"
#include "print_latency.h"
-#include "timebase.h"
+#include "shared/timebase.h"
#include "x264_dynamic.h"
#include "x264_speed_control.h"
#include <movit/image_format.h>
#include "defs.h"
-#include "metrics.h"
+#include "shared/metrics.h"
#include "print_latency.h"
#include "x264_dynamic.h"
#include <type_traits>
#include "flags.h"
-#include "metrics.h"
+#include "shared/metrics.h"
using namespace std;
using namespace std::chrono;
#include <x264.h>
}
-#include "metrics.h"
+#include "shared/metrics.h"
#include "x264_dynamic.h"
class X264SpeedControl {
--- /dev/null
+#include <stdio.h>
+#include <string>
+
+using namespace std;
+
+int main(int argc, char **argv)
+{
+ if (argc != 4) {
+ fprintf(stderr, "Usage: bin2h INFILE BASENAME OUTFILE\n");
+ return 1;
+ }
+
+ string basename = argv[2];
+ for (char &ch : basename) {
+ if (!isalpha(ch) && !isdigit(ch)) {
+ ch = '_';
+ }
+ }
+
+ FILE *infp = fopen(argv[1], "rb");
+ if (infp == nullptr) {
+ perror(argv[1]);
+ exit(1);
+ }
+
+ FILE *outfp = fopen(argv[3], "w");
+ if (outfp == nullptr) {
+ perror(argv[3]);
+ exit(1);
+ }
+
+ fprintf(outfp, "// Generated by bin2h.cpp from %s. Do not edit by hand.\n", argv[1]);
+ fprintf(outfp, "#include <stddef.h>\n");
+ fprintf(outfp, "unsigned char _binary_%s[] = {", basename.c_str());
+
+ size_t num_bytes = 0;
+ while (!feof(infp)) {
+ if (num_bytes++ % 16 == 0) {
+ fprintf(outfp, "\n\t");
+ }
+ int ch = getc(infp);
+ if (ch == -1) {
+ break;
+ }
+ fprintf(outfp, "0x%02x, ", ch);
+ }
+ fprintf(outfp, "\n};\n");
+ fprintf(outfp, "unsigned char *_binary_%s_data = _binary_%s;\n", basename.c_str(), basename.c_str());
+ fprintf(outfp, "size_t _binary_%s_size = sizeof(_binary_%s);\n", basename.c_str(), basename.c_str());
+ return 0;
+}
-#include <stdio.h>
-
-#include <string>
-
#include <QGL>
#include <QOffscreenSurface>
#include <QOpenGLContext>
#include <QSurface>
#include <QSurfaceFormat>
+#include <stdio.h>
+#include <string>
QGLWidget *global_share_widget = nullptr;
-bool using_egl = false;
using namespace std;
+QSurface *create_surface()
+{
+ QSurfaceFormat fmt;
+ fmt.setDepthBufferSize(0);
+ fmt.setStencilBufferSize(0);
+ fmt.setProfile(QSurfaceFormat::CoreProfile);
+ fmt.setMajorVersion(4);
+ fmt.setMinorVersion(5);
+ fmt.setSwapInterval(0);
+ QOffscreenSurface *surface = new QOffscreenSurface;
+ surface->setFormat(fmt);
+ surface->create();
+ if (!surface->isValid()) {
+ fprintf(stderr, "ERROR: surface not valid!\n");
+ exit(1);
+ }
+ return surface;
+}
+
QSurface *create_surface(const QSurfaceFormat &format)
{
QOffscreenSurface *surface = new QOffscreenSurface;
--- /dev/null
+
+// Needs to be in its own file because Qt and libepoxy seemingly don't coexist well
+// within the same file.
+
+class QSurface;
+class QOpenGLContext;
+class QSurfaceFormat;
+class QGLWidget;
+
+extern bool using_egl;
+extern QGLWidget *global_share_widget;
+QSurface *create_surface();
+QSurface *create_surface(const QSurfaceFormat &format);
+QSurface *create_surface_with_same_format(const QSurface *surface);
+QOpenGLContext *create_context(const QSurface *surface);
+bool make_current(QOpenGLContext *context, QSurface *surface);
+void delete_context(QOpenGLContext *context);
-#include "disk_space_estimator.h"
+#include "shared/disk_space_estimator.h"
+#include <memory>
#include <stdio.h>
#include <sys/stat.h>
#include <sys/statfs.h>
-#include <memory>
-#include "metrics.h"
-#include "timebase.h"
+#include "shared/metrics.h"
+#include "shared/timebase.h"
+
+using namespace std;
DiskSpaceEstimator::DiskSpaceEstimator(DiskSpaceEstimator::callback_t callback)
: callback(callback)
global_metrics.add("disk_free_bytes", &metric_disk_free_bytes, Metrics::TYPE_GAUGE);
}
-void DiskSpaceEstimator::report_write(const std::string &filename, uint64_t pts)
+void DiskSpaceEstimator::report_write(const string &filename, off_t bytes, uint64_t pts)
+{
+ total_size += bytes;
+ report_write_internal(filename, total_size, pts);
+}
+
+void DiskSpaceEstimator::report_append(const string &filename, uint64_t pts)
{
if (filename != last_filename) {
last_filename = filename;
measure_points.clear();
}
+ struct stat st;
+ if (stat(filename.c_str(), &st) == -1) {
+ perror(filename.c_str());
+ return;
+ }
+
+ report_write_internal(filename, st.st_size, pts);
+}
+
+void DiskSpaceEstimator::report_write_internal(const string &filename, off_t file_size, uint64_t pts)
+{
// Reject points that are out-of-order (happens with B-frames).
- if (!measure_points.empty() && pts < measure_points.back().pts) {
+ if (!measure_points.empty() && pts <= measure_points.back().pts) {
return;
}
measure_points.pop_front();
}
- struct stat st;
- if (stat(filename.c_str(), &st) == -1) {
- perror(filename.c_str());
- return;
- }
-
struct statfs fst;
if (statfs(filename.c_str(), &fst) == -1) {
perror(filename.c_str());
metric_disk_free_bytes = free_bytes;
if (!measure_points.empty()) {
- double bytes_per_second = double(st.st_size - measure_points.front().size) /
+ double bytes_per_second = double(file_size - measure_points.front().size) /
(pts - measure_points.front().pts) * TIMEBASE;
double seconds_left = free_bytes / bytes_per_second;
}
}
- measure_points.push_back({ pts, st.st_size });
+ measure_points.push_back({ pts, file_size });
}
DiskSpaceEstimator *global_disk_space_estimator = nullptr; // Created in MainWindow::MainWindow().
//
// The bitrate is measured over a simple 30-second sliding window.
-#include <stdint.h>
-#include <sys/types.h>
#include <atomic>
#include <deque>
#include <functional>
+#include <stdint.h>
#include <string>
+#include <sys/types.h>
-#include "timebase.h"
+#include "shared/timebase.h"
-class DiskSpaceEstimator
-{
+class DiskSpaceEstimator {
public:
typedef std::function<void(off_t free_bytes, double estimated_seconds_left)> callback_t;
DiskSpaceEstimator(callback_t callback);
+ // Report that a video frame with the given pts and size has just been
+ // written (possibly appended) to the given file.
+ //
+ // <pts> is taken to be in TIMEBASE units (see shared/timebase.h).
+ void report_write(const std::string &filename, off_t bytes, uint64_t pts);
+
// Report that a video frame with the given pts has just been written
// to the given file, so the estimator should stat the file and see
// by how much it grew since last time. Called by the Mux object
// responsible for writing to the stream on disk.
//
// If the filename changed since last time, the estimation is reset.
- // <pts> is taken to be in TIMEBASE units (see timebase.h).
- void report_write(const std::string &filename, uint64_t pts);
+ // <pts> is taken to be in TIMEBASE units (see shared/timebase.h).
+ //
+ // You should probably not mix this and report_write() on the same
+ // object. Really, report_write() matches Futatabi's controlled writes
+ // to a custom format, and report_append() matches Nageru's use of Mux
+ // (where we don't see the bytes flowing past).
+ void report_append(const std::string &filename, uint64_t pts);
private:
static constexpr int64_t window_length = 30 * TIMEBASE;
+ void report_write_internal(const std::string &filename, off_t file_size, uint64_t pts);
+
callback_t callback;
- std::string last_filename;
struct MeasurePoint {
uint64_t pts;
std::deque<MeasurePoint> measure_points;
uint64_t last_pts_reported = 0;
+ off_t total_size = 0; // For report_write().
+ std::string last_filename; // For report_append().
+
// Metrics.
std::atomic<int64_t> metric_disk_free_bytes{-1};
};
-#include "httpd.h"
+#include "shared/httpd.h"
#include <assert.h>
#include <byteswap.h>
#include <endian.h>
+#include <memory>
#include <microhttpd.h>
#include <netinet/in.h>
#include <stdio.h>
#include <string.h>
#include <sys/time.h>
#include <time.h>
-#include <memory>
extern "C" {
#include <libavutil/avutil.h>
}
-#include "defs.h"
-#include "metacube2.h"
-#include "metrics.h"
+#include "shared/shared_defs.h"
+#include "shared/metacube2.h"
+#include "shared/metrics.h"
struct MHD_Connection;
struct MHD_Response;
int HTTPD::answer_to_connection(MHD_Connection *connection,
const char *url, const char *method,
- const char *version, const char *upload_data,
- size_t *upload_data_size, void **con_cls)
+ const char *version, const char *upload_data,
+ size_t *upload_data_size, void **con_cls)
{
// See if the URL ends in “.metacube”.
HTTPD::Stream::Framing framing;
ssize_t HTTPD::Stream::reader_callback(uint64_t pos, char *buf, size_t max)
{
unique_lock<mutex> lock(buffer_mutex);
- has_buffered_data.wait(lock, [this]{ return should_quit || !buffered_data.empty(); });
+ has_buffered_data.wait(lock, [this] { return should_quit || !buffered_data.empty(); });
if (should_quit) {
return 0;
}
buffered_data.emplace_back((char *)&packet, sizeof(packet));
}
- has_buffered_data.notify_all();
+ has_buffered_data.notify_all();
}
void HTTPD::Stream::stop()
// A class dealing with stream output to HTTP.
-#include <stddef.h>
-#include <stdint.h>
-#include <sys/types.h>
#include <atomic>
#include <condition_variable>
#include <deque>
#include <functional>
#include <mutex>
#include <set>
+#include <stddef.h>
+#include <stdint.h>
#include <string>
+#include <sys/types.h>
#include <unordered_map>
#include <utility>
NO_CORS_POLICY,
ALLOW_ALL_ORIGINS
};
- void add_endpoint(const std::string &url, const EndpointCallback &callback, CORSPolicy cors_policy) {
+ void add_endpoint(const std::string &url, const EndpointCallback &callback, CORSPolicy cors_policy)
+ {
endpoints[url] = Endpoint{ callback, cors_policy };
}
void start(int port);
void stop();
void add_data(StreamType stream_type, const char *buf, size_t size, bool keyframe, int64_t time, AVRational timebase);
- int64_t get_num_connected_clients() const {
+ int64_t get_num_connected_clients() const
+ {
return metric_num_connected_clients.load();
}
static void free_stream(void *cls);
-
class Stream {
public:
enum Framing {
std::string header[NUM_STREAM_TYPES];
// Metrics.
- std::atomic<int64_t> metric_num_connected_clients{0};
+ std::atomic<int64_t> metric_num_connected_clients{ 0 };
};
#endif // !defined(_HTTPD_H)
-#include <cstdint>
#include <algorithm>
#include <assert.h>
+#include <cstdint>
#if __SSE2__
#include <immintrin.h>
#endif
assert(((limit - src) % 64) == 0);
#if __AVX2__
- const __m256i * __restrict in = (const __m256i *)src;
- __m256i * __restrict out1 = (__m256i *)dest1;
- __m256i * __restrict out2 = (__m256i *)dest2;
+ const __m256i *__restrict in = (const __m256i *)src;
+ __m256i *__restrict out1 = (__m256i *)dest1;
+ __m256i *__restrict out2 = (__m256i *)dest2;
__m256i shuffle_cw = _mm256_set_epi8(
15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
--- /dev/null
+qt5 = import('qt5')
+qt5deps = dependency('qt5', modules: ['OpenGL'])
+libmicrohttpddep = dependency('libmicrohttpd')
+
+srcs = ['memcpy_interleaved.cpp', 'metacube2.cpp', 'ffmpeg_raii.cpp', 'mux.cpp', 'metrics.cpp', 'context.cpp', 'httpd.cpp', 'disk_space_estimator.cpp', 'read_file.cpp']
+shared = static_library('shared', srcs, include_directories: top_include, dependencies: [qt5deps, libmicrohttpddep])
+shareddep = declare_dependency(
+ include_directories: top_include,
+ link_with: shared)
+
+bin2h = executable('bin2h', 'bin2h.cpp')
+bin2h_gen = generator(bin2h, \
+ output : ['@PLAINNAME@.cpp'],
+ arguments : ['@INPUT@', '@PLAINNAME@', '@OUTPUT@'])
-#include "metrics.h"
+#include "shared/metrics.h"
#include <assert.h>
#include <math.h>
-#include "mux.h"
+#include "shared/mux.h"
+#include <algorithm>
#include <assert.h>
+#include <mutex>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <algorithm>
-#include <mutex>
#include <string>
#include <utility>
#include <vector>
#include <libavutil/rational.h>
}
-#include "defs.h"
-#include "flags.h"
-#include "metrics.h"
-#include "timebase.h"
+#include "shared/metrics.h"
+#include "shared/shared_defs.h"
+#include "shared/timebase.h"
using namespace std;
const AVFormatContext * const ctx;
};
-Mux::Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const string &video_extradata, const AVCodecParameters *audio_codecpar, int time_base, std::function<void(int64_t)> write_callback, WriteStrategy write_strategy, const vector<MuxMetrics *> &metrics)
+Mux::Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const string &video_extradata, const AVCodecParameters *audio_codecpar, AVColorSpace color_space, WithAudio with_audio, int time_base, function<void(int64_t)> write_callback, WriteStrategy write_strategy, const vector<MuxMetrics *> &metrics)
: write_strategy(write_strategy), avctx(avctx), write_callback(write_callback), metrics(metrics)
{
avstream_video = avformat_new_stream(avctx, nullptr);
avstream_video->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
if (video_codec == CODEC_H264) {
avstream_video->codecpar->codec_id = AV_CODEC_ID_H264;
- } else {
- assert(video_codec == CODEC_NV12);
+ } else if (video_codec == CODEC_NV12) {
avstream_video->codecpar->codec_id = AV_CODEC_ID_RAWVIDEO;
avstream_video->codecpar->codec_tag = avcodec_pix_fmt_to_codec_tag(AV_PIX_FMT_NV12);
+ } else {
+ assert(video_codec == CODEC_MJPEG);
+ avstream_video->codecpar->codec_id = AV_CODEC_ID_MJPEG;
}
avstream_video->codecpar->width = width;
avstream_video->codecpar->height = height;
avstream_video->codecpar->color_primaries = AVCOL_PRI_BT709; // RGB colorspace (inout_format.color_space).
avstream_video->codecpar->color_trc = AVCOL_TRC_IEC61966_2_1; // Gamma curve (inout_format.gamma_curve).
// YUV colorspace (output_ycbcr_format.luma_coefficients).
- if (global_flags.ycbcr_rec709_coefficients) {
- avstream_video->codecpar->color_space = AVCOL_SPC_BT709;
- } else {
- avstream_video->codecpar->color_space = AVCOL_SPC_SMPTE170M;
- }
+ avstream_video->codecpar->color_space = color_space;
avstream_video->codecpar->color_range = AVCOL_RANGE_MPEG; // Full vs. limited range (output_ycbcr_format.full_range).
avstream_video->codecpar->chroma_location = AVCHROMA_LOC_LEFT; // Chroma sample location. See chroma_offset_0[] in Mixer::subsample_chroma().
avstream_video->codecpar->field_order = AV_FIELD_PROGRESSIVE;
memcpy(avstream_video->codecpar->extradata, video_extradata.data(), video_extradata.size());
}
- avstream_audio = avformat_new_stream(avctx, nullptr);
- if (avstream_audio == nullptr) {
- fprintf(stderr, "avformat_new_stream() failed\n");
- exit(1);
- }
- avstream_audio->time_base = AVRational{1, time_base};
- if (avcodec_parameters_copy(avstream_audio->codecpar, audio_codecpar) < 0) {
- fprintf(stderr, "avcodec_parameters_copy() failed\n");
- exit(1);
+ if (with_audio == WITH_AUDIO) {
+ avstream_audio = avformat_new_stream(avctx, nullptr);
+ if (avstream_audio == nullptr) {
+ fprintf(stderr, "avformat_new_stream() failed\n");
+ exit(1);
+ }
+ avstream_audio->time_base = AVRational{1, time_base};
+ if (avcodec_parameters_copy(avstream_audio->codecpar, audio_codecpar) < 0) {
+ fprintf(stderr, "avcodec_parameters_copy() failed\n");
+ exit(1);
+ }
+ } else {
+ assert(with_audio == WITHOUT_AUDIO);
+ avstream_audio = nullptr;
}
AVDictionary *options = NULL;
lock_guard<mutex> lock(mu);
if (write_strategy == WriteStrategy::WRITE_BACKGROUND) {
packet_queue.push_back(QueuedPacket{ av_packet_clone(&pkt_copy), pts });
- if (plug_count == 0) packet_queue_ready.notify_all();
+ if (plug_count == 0)
+ packet_queue_ready.notify_all();
} else if (plug_count > 0) {
packet_queue.push_back(QueuedPacket{ av_packet_clone(&pkt_copy), pts });
} else {
int64_t old_pos = avctx->pb->pos;
if (av_interleaved_write_frame(avctx, const_cast<AVPacket *>(&pkt)) < 0) {
fprintf(stderr, "av_interleaved_write_frame() failed\n");
- exit(1);
+ abort();
}
avio_flush(avctx->pb);
for (MuxMetrics *metric : metrics) {
void Mux::thread_func()
{
+ pthread_setname_np(pthread_self(), "Mux");
+
unique_lock<mutex> lock(mu);
for ( ;; ) {
packet_queue_ready.wait(lock, [this]() {
#include <thread>
#include <vector>
-#include "timebase.h"
+#include "shared/timebase.h"
struct MuxMetrics {
// “written” will usually be equal video + audio + mux overhead,
}
};
+inline AVColorSpace get_color_space(bool ycbcr_rec709_coefficients)
+{
+ if (ycbcr_rec709_coefficients) {
+ return AVCOL_SPC_BT709;
+ } else {
+ return AVCOL_SPC_SMPTE170M;
+ }
+}
+
class Mux {
public:
enum Codec {
CODEC_H264,
CODEC_NV12, // Uncompressed 4:2:0.
+ CODEC_MJPEG
+ };
+ enum WithAudio {
+ WITH_AUDIO,
+ WITHOUT_AUDIO
};
enum WriteStrategy {
// add_packet() will write the packet immediately, unless plugged.
// the just-written frame. (write_callback can be nullptr.)
// Does not take ownership of <metrics>; elements in there, if any,
// will be added to.
- Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const std::string &video_extradata, const AVCodecParameters *audio_codecpar, int time_base, std::function<void(int64_t)> write_callback, WriteStrategy write_strategy, const std::vector<MuxMetrics *> &metrics);
+ Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const std::string &video_extradata, const AVCodecParameters *audio_codecpar, AVColorSpace color_space, WithAudio with_audio, int time_base, std::function<void(int64_t)> write_callback, WriteStrategy write_strategy, const std::vector<MuxMetrics *> &metrics);
~Mux();
void add_packet(const AVPacket &pkt, int64_t pts, int64_t dts, AVRational timebase = { 1, TIMEBASE }, int stream_index_override = -1);
--- /dev/null
+#include "shared/read_file.h"
+
+#include <stdio.h>
+
+using namespace std;
+
+string read_file(const string &filename, const unsigned char *start, const size_t size)
+{
+ FILE *fp = fopen(filename.c_str(), "r");
+ if (fp == nullptr) {
+ // Fall back to the version we compiled in. (We prefer disk if we can,
+ // since that makes it possible to work on shaders without recompiling
+ // all the time.)
+ if (start != nullptr) {
+ return string(reinterpret_cast<const char *>(start),
+ reinterpret_cast<const char *>(start) + size);
+ }
+
+ perror(filename.c_str());
+ exit(1);
+ }
+
+ int ret = fseek(fp, 0, SEEK_END);
+ if (ret == -1) {
+ perror("fseek(SEEK_END)");
+ exit(1);
+ }
+
+ int disk_size = ftell(fp);
+
+ ret = fseek(fp, 0, SEEK_SET);
+ if (ret == -1) {
+ perror("fseek(SEEK_SET)");
+ exit(1);
+ }
+
+ string str;
+ str.resize(disk_size);
+ ret = fread(&str[0], disk_size, 1, fp);
+ if (ret == -1) {
+ perror("fread");
+ exit(1);
+ }
+ if (ret == 0) {
+ fprintf(stderr, "Short read when trying to read %d bytes from %s\n",
+ disk_size, filename.c_str());
+ exit(1);
+ }
+ fclose(fp);
+
+ return str;
+}
+
--- /dev/null
+#ifndef _READ_FILE_H
+#define _READ_FILE_H 1
+
+#include <string>
+
+#include <stdint.h>
+
+// Read the contents of <filename> and return it as a string.
+// If the file does not exist, which is typical outside of development,
+// return the given memory area instead (presumably created by bin2h).
+
+std::string read_file(const std::string &filename, const unsigned char *start = nullptr, const size_t size = 0);
+
+#endif
public:
RefCountedGLsync() {}
- RefCountedGLsync(GLenum condition, GLbitfield flags)
+ RefCountedGLsync(GLenum condition, GLbitfield flags)
: RefCountedGLsyncBase(locked_glFenceSync(condition, flags), glDeleteSync) {}
private:
--- /dev/null
+#ifndef _SHARED_DEFS_H
+#define _SHARED_DEFS_H 1
+
+// This flag is only supported in FFmpeg 3.3 and up, and we only require 3.1.
+#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 71, 100)
+#define MUX_SKIP_TRAILER "+skip_trailer"
+#else
+#define MUX_SKIP_TRAILER ""
+#endif
+
+#define MUX_OPTS { \
+ /* Make seekable .mov files, and keep MP4 muxer from using unlimited amounts of memory. */ \
+ { "movflags", "empty_moov+frag_keyframe+default_base_moof" MUX_SKIP_TRAILER }, \
+ \
+ /* Make for somewhat less bursty stream output when using .mov. */ \
+ { "frag_duration", "125000" }, \
+ \
+ /* Keep nut muxer from using unlimited amounts of memory. */ \
+ { "write_index", "0" } \
+}
+
+// In bytes. Beware, if too small, stream clients will start dropping data.
+// For mov, you want this at 10MB or so (for the reason mentioned above),
+// but for nut, there's no flushing, so such a large mux buffer would cause
+// the output to be very uneven.
+#define MUX_BUFFER_SIZE 10485760
+
+#endif // !defined(_SHARED_DEFS_H)
#ifndef _TIMEBASE_H
#define _TIMEBASE_H 1
+#include <ratio>
+
// Common timebase that allows us to represent one frame exactly in all the
// relevant frame rates:
//
// but can do at least 50 and 60 precisely, and months of streaming.
#define COARSE_TIMEBASE 300
+using TimebaseRatio = std::ratio<1, TIMEBASE>;
+
#endif // !defined(_TIMEBASE_H)