]> git.sesse.net Git - nageru/commitdiff
Cache computed flow between textures.
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Mon, 24 Dec 2018 13:26:44 +0000 (14:26 +0100)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Mon, 24 Dec 2018 13:26:44 +0000 (14:26 +0100)
Flow computation is over half the computational burden for making a frame,
and in super-slow cases (more than 2x), we'll reuse the same flow many times.

futatabi/frame_on_disk.h
futatabi/video_stream.cpp
futatabi/video_stream.h

index 2ac86fca697291f2d7d22ff2c92985aef79a618d..7807c52b4a69cdb2a08819bb10636f9e59bdc44e 100644 (file)
@@ -20,6 +20,14 @@ struct FrameOnDisk {
 extern std::vector<FrameOnDisk> frames[MAX_STREAMS];  // Under frame_mu.
 extern std::vector<std::string> frame_filenames;  // Under frame_mu.
 
+static bool inline operator==(const FrameOnDisk &a, const FrameOnDisk &b)
+{
+       return a.pts == b.pts &&
+               a.offset == b.offset &&
+               a.filename_idx == b.filename_idx &&
+               a.size == b.size;
+}
+
 // A helper class to read frames from disk. It caches the file descriptor
 // so that the kernel has a better chance of doing readahead when it sees
 // the sequential reads. (For this reason, each display has a private
index 6ee9608816f1940be13c06ba8ca9fc2edacd6afa..27e2c4a81e499ddc031b21434c05fd0490f1f37f 100644 (file)
@@ -236,7 +236,12 @@ VideoStream::VideoStream(AVFormatContext *file_avctx)
        last_frame = encode_jpeg(y.get(), cb_or_cr.get(), cb_or_cr.get(), global_flags.width, global_flags.height);
 }
 
-VideoStream::~VideoStream() {}
+VideoStream::~VideoStream()
+{
+       if (last_flow_tex != 0) {
+               compute_flow->release_texture(last_flow_tex);
+       }
+}
 
 void VideoStream::start()
 {
@@ -287,7 +292,9 @@ void VideoStream::clear_queue()
        for (const QueuedFrame &qf : q) {
                if (qf.type == QueuedFrame::INTERPOLATED ||
                    qf.type == QueuedFrame::FADED_INTERPOLATED) {
-                       compute_flow->release_texture(qf.flow_tex);
+                       if (qf.flow_tex != 0) {
+                               compute_flow->release_texture(qf.flow_tex);
+                       }
                }
                if (qf.type == QueuedFrame::INTERPOLATED) {
                        interpolate->release_texture(qf.output_tex);
@@ -436,13 +443,30 @@ void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts
        glGenerateTextureMipmap(resources->gray_tex);
        check_error();
 
-       // Compute the interpolated frame.
-       qf.flow_tex = compute_flow->exec(resources->gray_tex, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
-       check_error();
+       GLuint flow_tex;
+       if (last_flow_tex != 0 && frame1 == last_frame1 && frame2 == last_frame2) {
+               // Reuse the flow from previous computation. This frequently happens
+               // if we slow down by more than 2x, so that there are multiple interpolated
+               // frames between each original.
+               flow_tex = last_flow_tex;
+               qf.flow_tex = 0;
+       } else {
+               // Cache miss, so release last_flow_tex.
+               qf.flow_tex = last_flow_tex;
+
+               // Compute the flow.
+               flow_tex = compute_flow->exec(resources->gray_tex, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
+               check_error();
+
+               // Store the flow texture for possible reuse next frame.
+               last_flow_tex = flow_tex;
+               last_frame1 = frame1;
+               last_frame2 = frame2;
+       }
 
        if (secondary_frame.pts != -1) {
                // Fade. First kick off the interpolation.
-               tie(qf.output_tex, ignore) = interpolate_no_split->exec(resources->input_tex, resources->gray_tex, qf.flow_tex, global_flags.width, global_flags.height, alpha);
+               tie(qf.output_tex, ignore) = interpolate_no_split->exec(resources->input_tex, resources->gray_tex, flow_tex, global_flags.width, global_flags.height, alpha);
                check_error();
 
                // Now decode the image we are fading against.
@@ -457,7 +481,7 @@ void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts
 
                interpolate_no_split->release_texture(qf.output_tex);
        } else {
-               tie(qf.output_tex, qf.cbcr_tex) = interpolate->exec(resources->input_tex, resources->gray_tex, qf.flow_tex, global_flags.width, global_flags.height, alpha);
+               tie(qf.output_tex, qf.cbcr_tex) = interpolate->exec(resources->input_tex, resources->gray_tex, flow_tex, global_flags.width, global_flags.height, alpha);
                check_error();
 
                // Subsample and split Cb/Cr.
@@ -467,6 +491,9 @@ void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts
        // We could have released qf.flow_tex here, but to make sure we don't cause a stall
        // when trying to reuse it for the next frame, we can just as well hold on to it
        // and release it only when the readback is done.
+       //
+       // TODO: This is maybe less relevant now that qf.flow_tex contains the texture we used
+       // _last_ frame, not this one.
 
        // Read it down (asynchronously) to the CPU.
        glPixelStorei(GL_PACK_ROW_LENGTH, 0);
@@ -624,7 +651,9 @@ void VideoStream::encode_thread_func()
 
                        // Now JPEG encode it, and send it on to the stream.
                        vector<uint8_t> jpeg = encode_jpeg(frame->y.get(), frame->cb.get(), frame->cr.get(), global_flags.width, global_flags.height);
-                       compute_flow->release_texture(qf.flow_tex);
+                       if (qf.flow_tex != 0) {
+                               compute_flow->release_texture(qf.flow_tex);
+                       }
                        if (qf.type != QueuedFrame::FADED_INTERPOLATED) {
                                interpolate->release_texture(qf.output_tex);
                                interpolate->release_texture(qf.cbcr_tex);
index d522ab1e8f5a98f9494004599b9c1aa2a74a76c0..422b1522a28a993d056d5eea2344a05ce13243c6 100644 (file)
@@ -121,7 +121,7 @@ private:
                float alpha;
                BorrowedInterpolatedFrameResources resources;
                RefCountedGLsync fence;  // Set when the interpolated image is read back to the CPU.
-               GLuint flow_tex, output_tex, cbcr_tex;  // Released in the receiving thread; not really used for anything else.
+               GLuint flow_tex, output_tex, cbcr_tex;  // Released in the receiving thread; not really used for anything else. flow_tex will typically even be from a previous frame.
                FrameOnDisk id;
 
                std::function<void()> display_func;  // Called when the image is done decoding.
@@ -146,6 +146,10 @@ private:
        std::unique_ptr<Interpolate> interpolate, interpolate_no_split;
        std::unique_ptr<ChromaSubsampler> chroma_subsampler;
 
+       // Cached flow computation from previous frame, if any.
+       GLuint last_flow_tex = 0;
+       FrameOnDisk last_frame1, last_frame2;
+
        std::vector<uint8_t> last_frame;
 };