]> git.sesse.net Git - nageru/commitdiff
Add audio output when playing at 100% speed.
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 30 Mar 2019 13:45:58 +0000 (14:45 +0100)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 30 Mar 2019 13:45:58 +0000 (14:45 +0100)
Fairly untested, but should work both on single-track export and
on realtime output. No audio stretching or pitch shift, so only
plays when we're at regular speed. Note: There's no monitor output yet,
so the Futatabi operator will be deaf. There are also no VU bars.

futatabi/export.cpp
futatabi/frame_on_disk.cpp
futatabi/frame_on_disk.h
futatabi/jpeg_frame_view.cpp
futatabi/player.cpp
futatabi/player.h
futatabi/video_stream.cpp
futatabi/video_stream.h

index 1b7c59c90ae75f853b1d9eaf6b7f27a190d06996..7833f91f1d17030433e214304b34f00caf501d57 100644 (file)
@@ -182,7 +182,7 @@ void export_multitrack_clip(const string &filename, const Clip &clip)
                        }
                }
 
-               FrameReader::Frame frame = readers[first_frame_stream_idx].read_frame(first_frame, /*read_audio=*/true);
+               FrameReader::Frame frame = readers[first_frame_stream_idx].read_frame(first_frame, /*read_video=*/true, /*read_audio=*/true);
 
                // Write audio. (Before video, since that's what we expect on input.)
                if (!frame.audio.empty()) {
index 6bdaf23a25569edb97d691a26ec1f3949dc792b1..f9a5639259be7a153777feb4ca76bded039d6185 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <atomic>
 #include <chrono>
+#include <assert.h>
 #include <fcntl.h>
 #include <mutex>
 #include <unistd.h>
@@ -68,8 +69,9 @@ string read_string(int fd, size_t size, off_t offset)
 
 }  // namespace
 
-FrameReader::Frame FrameReader::read_frame(FrameOnDisk frame, bool read_audio)
+FrameReader::Frame FrameReader::read_frame(FrameOnDisk frame, bool read_video, bool read_audio)
 {
+       assert(read_video || read_audio);
        steady_clock::time_point start = steady_clock::now();
 
        if (int(frame.filename_idx) != last_filename_idx) {
@@ -98,7 +100,9 @@ FrameReader::Frame FrameReader::read_frame(FrameOnDisk frame, bool read_audio)
        }
 
        Frame ret;
-       ret.video = read_string(fd, frame.size, frame.offset);
+       if (read_video) {
+               ret.video = read_string(fd, frame.size, frame.offset);
+       }
        if (read_audio) {
                ret.audio = read_string(fd, frame.audio_size, frame.offset + frame.size);
        }
index 360bd23ce1222271cfef1f82df5006de0b141a60..35f375bb78ef37a6522b351cea981e4a0643bf4e 100644 (file)
@@ -46,7 +46,7 @@ public:
                std::string video;
                std::string audio;
        };
-       Frame read_frame(FrameOnDisk frame, bool read_audio);
+       Frame read_frame(FrameOnDisk frame, bool read_video, bool read_audio);
 
 private:
        int fd = -1;
index c1afafd765ca32f803c35c23b821c949bb807e91..943b3e15ac5858886472c1866371debc9e6eb733 100644 (file)
@@ -238,7 +238,7 @@ shared_ptr<Frame> decode_jpeg_with_cache(FrameOnDisk frame_spec, CacheMissBehavi
        ++metric_jpeg_cache_miss_frames;
 
        *did_decode = true;
-       shared_ptr<Frame> frame = decode_jpeg(frame_reader->read_frame(frame_spec, /*read_audio=*/false).video);
+       shared_ptr<Frame> frame = decode_jpeg(frame_reader->read_frame(frame_spec, /*read_video=*/true, /*read_audio=*/false).video);
 
        lock_guard<mutex> lock(cache_mu);
        cache_bytes_used += frame_size(*frame);
index 611f0abce4151ceff3a9adc8e51da2dc14e987b3..779e685ec10dc62484cd16deee7e9fe080bd7072 100644 (file)
@@ -139,7 +139,14 @@ void Player::play_playlist_once()
        }
 
        steady_clock::duration time_slept = steady_clock::now() - before_sleep;
-       pts += duration_cast<duration<size_t, TimebaseRatio>>(time_slept).count();
+       int64_t slept_pts = duration_cast<duration<size_t, TimebaseRatio>>(time_slept).count();
+       if (slept_pts > 0) {
+               if (video_stream != nullptr) {
+                       // Add silence for the time we're waiting.
+                       video_stream->schedule_silence(steady_clock::now(), pts, slept_pts, QueueSpotHolder());
+               }
+               pts += slept_pts;
+       }
 
        if (!clip_ready) {
                if (video_stream != nullptr) {
@@ -205,6 +212,10 @@ void Player::play_playlist_once()
                                break;
                        }
 
+                       // Only play audio if we're within 0.1% of normal speed. We could do
+                       // stretching or pitch shift later if it becomes needed.
+                       bool play_audio = clip->speed * master_speed >= 0.999 && clip->speed * master_speed <= 1.001;
+
                        {
                                lock_guard<mutex> lock(queue_state_mu);
                                if (splice_ready) {
@@ -349,7 +360,7 @@ void Player::play_playlist_once()
                        if (frame_lower.pts == frame_upper.pts || global_flags.interpolation_quality == 0 || video_stream == nullptr) {
                                display_single_frame(primary_stream_idx, frame_lower, secondary_stream_idx,
                                                     secondary_frame, fade_alpha, next_frame_start, /*snapped=*/false,
-                                                    subtitle);
+                                                    subtitle, play_audio);
                                continue;
                        }
 
@@ -362,7 +373,7 @@ void Player::play_playlist_once()
                                if (fabs(snap_frame.pts - in_pts) < pts_snap_tolerance) {
                                        display_single_frame(primary_stream_idx, snap_frame, secondary_stream_idx,
                                                             secondary_frame, fade_alpha, next_frame_start, /*snapped=*/true,
-                                                            subtitle);
+                                                            subtitle, play_audio);
                                        in_pts_origin += snap_frame.pts - in_pts;
                                        snapped = true;
                                        break;
@@ -418,7 +429,7 @@ void Player::play_playlist_once()
                        video_stream->schedule_interpolated_frame(
                                next_frame_start, pts, display_func, QueueSpotHolder(this),
                                frame_lower, frame_upper, alpha,
-                               secondary_frame, fade_alpha, subtitle);
+                               secondary_frame, fade_alpha, subtitle, play_audio);
                        last_pts_played = in_pts;  // Not really needed; only previews use last_pts_played.
                }
 
@@ -439,7 +450,7 @@ void Player::play_playlist_once()
        }
 }
 
-void Player::display_single_frame(int primary_stream_idx, const FrameOnDisk &primary_frame, int secondary_stream_idx, const FrameOnDisk &secondary_frame, double fade_alpha, steady_clock::time_point frame_start, bool snapped, const std::string &subtitle)
+void Player::display_single_frame(int primary_stream_idx, const FrameOnDisk &primary_frame, int secondary_stream_idx, const FrameOnDisk &secondary_frame, double fade_alpha, steady_clock::time_point frame_start, bool snapped, const std::string &subtitle, bool play_audio)
 {
        auto display_func = [this, primary_stream_idx, primary_frame, secondary_frame, fade_alpha] {
                if (destination != nullptr) {
@@ -458,7 +469,7 @@ void Player::display_single_frame(int primary_stream_idx, const FrameOnDisk &pri
                        }
                        video_stream->schedule_original_frame(
                                frame_start, pts, display_func, QueueSpotHolder(this),
-                               primary_frame, subtitle);
+                               primary_frame, subtitle, play_audio);
                } else {
                        assert(secondary_frame.pts != -1);
                        // NOTE: We could be increasing unused metrics for previews, but that's harmless.
index da5a4435da936778dbec3c606c39a312e45af953..b912b8c2ef2ab9b7117ed152f5bd8589be9eefad 100644 (file)
@@ -94,7 +94,7 @@ public:
 private:
        void thread_func(AVFormatContext *file_avctx);
        void play_playlist_once();
-       void display_single_frame(int primary_stream_idx, const FrameOnDisk &primary_frame, int secondary_stream_idx, const FrameOnDisk &secondary_frame, double fade_alpha, std::chrono::steady_clock::time_point frame_start, bool snapped, const std::string &subtitle);
+       void display_single_frame(int primary_stream_idx, const FrameOnDisk &primary_frame, int secondary_stream_idx, const FrameOnDisk &secondary_frame, double fade_alpha, std::chrono::steady_clock::time_point frame_start, bool snapped, const std::string &subtitle, bool play_audio);
        void open_output_stream();
        static int write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
        int write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
index 06acfd2601a43ebe4ff17af8b0f7fffee788fc25..9647836feaa4294a4653bdb8eb7b84b8af77da27 100644 (file)
@@ -13,6 +13,7 @@ extern "C" {
 #include "player.h"
 #include "shared/context.h"
 #include "shared/httpd.h"
+#include "shared/shared_defs.h"
 #include "shared/mux.h"
 #include "util.h"
 #include "ycbcr_converter.h"
@@ -286,10 +287,19 @@ void VideoStream::start()
                avctx->flags = AVFMT_FLAG_CUSTOM_IO;
        }
 
+       AVCodecParameters *audio_codecpar = avcodec_parameters_alloc();
+
+       audio_codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
+       audio_codecpar->codec_id = AV_CODEC_ID_PCM_S32LE;
+       audio_codecpar->channel_layout = AV_CH_LAYOUT_STEREO;
+       audio_codecpar->channels = 2;
+       audio_codecpar->sample_rate = OUTPUT_FREQUENCY;
+
        size_t width = global_flags.width, height = global_flags.height;  // Doesn't matter for MJPEG.
-       mux.reset(new Mux(avctx, width, height, Mux::CODEC_MJPEG, /*video_extradata=*/"", /*audio_codec_parameters=*/nullptr,
+       mux.reset(new Mux(avctx, width, height, Mux::CODEC_MJPEG, /*video_extradata=*/"", audio_codecpar,
                          AVCOL_SPC_BT709, COARSE_TIMEBASE, /*write_callback=*/nullptr, Mux::WRITE_FOREGROUND, {}, Mux::WITH_SUBTITLES));
 
+       avcodec_parameters_free(&audio_codecpar);
        encode_thread = thread(&VideoStream::encode_thread_func, this);
 }
 
@@ -331,12 +341,10 @@ void VideoStream::clear_queue()
 void VideoStream::schedule_original_frame(steady_clock::time_point local_pts,
                                           int64_t output_pts, function<void()> &&display_func,
                                           QueueSpotHolder &&queue_spot_holder,
-                                          FrameOnDisk frame, const string &subtitle)
+                                          FrameOnDisk frame, const string &subtitle, bool include_audio)
 {
        fprintf(stderr, "output_pts=%" PRId64 "  original      input_pts=%" PRId64 "\n", output_pts, frame.pts);
 
-       // TODO: Write audio if at the right speed.
-
        QueuedFrame qf;
        qf.local_pts = local_pts;
        qf.type = QueuedFrame::ORIGINAL;
@@ -344,7 +352,9 @@ void VideoStream::schedule_original_frame(steady_clock::time_point local_pts,
        qf.display_func = move(display_func);
        qf.queue_spot_holder = move(queue_spot_holder);
        qf.subtitle = subtitle;
-       qf.encoded_jpeg.reset(new string(frame_reader.read_frame(frame, /*read_audio=*/false).video));
+       FrameReader::Frame read_frame = frame_reader.read_frame(frame, /*read_video=*/true, include_audio);
+       qf.encoded_jpeg.reset(new string(move(read_frame.video)));
+       qf.audio = move(read_frame.audio);
 
        lock_guard<mutex> lock(queue_lock);
        frame_queue.push_back(move(qf));
@@ -424,7 +434,8 @@ void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts
                                               int64_t output_pts, function<void(shared_ptr<Frame>)> &&display_func,
                                               QueueSpotHolder &&queue_spot_holder,
                                               FrameOnDisk frame1, FrameOnDisk frame2,
-                                              float alpha, FrameOnDisk secondary_frame, float fade_alpha, const string &subtitle)
+                                              float alpha, FrameOnDisk secondary_frame, float fade_alpha, const string &subtitle,
+                                              bool play_audio)
 {
        if (secondary_frame.pts != -1) {
                fprintf(stderr, "output_pts=%" PRId64 "  interpolated  input_pts1=%" PRId64 " input_pts2=%" PRId64 " alpha=%.3f  secondary_pts=%" PRId64 "  fade_alpha=%.2f\n", output_pts, frame1.pts, frame2.pts, alpha, secondary_frame.pts, fade_alpha);
@@ -452,6 +463,10 @@ void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts
        qf.local_pts = local_pts;
        qf.subtitle = subtitle;
 
+       if (play_audio) {
+               qf.audio = frame_reader.read_frame(frame1, /*read_video=*/false, /*read_audio=*/true).audio;
+       }
+
        check_error();
 
        // Convert frame0 and frame1 to OpenGL textures.
@@ -563,6 +578,20 @@ void VideoStream::schedule_refresh_frame(steady_clock::time_point local_pts,
        queue_changed.notify_all();
 }
 
+void VideoStream::schedule_silence(steady_clock::time_point local_pts, int64_t output_pts,
+                                   int64_t length_pts, QueueSpotHolder &&queue_spot_holder)
+{
+       QueuedFrame qf;
+       qf.type = QueuedFrame::SILENCE;
+       qf.output_pts = output_pts;
+       qf.queue_spot_holder = move(queue_spot_holder);
+       qf.silence_length_pts = length_pts;
+
+       lock_guard<mutex> lock(queue_lock);
+       frame_queue.push_back(move(qf));
+       queue_changed.notify_all();
+}
+
 namespace {
 
 shared_ptr<Frame> frame_from_pbo(void *contents, size_t width, size_t height)
@@ -662,6 +691,8 @@ void VideoStream::encode_thread_func()
                        pkt.flags = AV_PKT_FLAG_KEY;
                        mux->add_packet(pkt, qf.output_pts, qf.output_pts);
                        last_frame = move(jpeg);
+
+                       add_audio_or_silence(qf);
                } else if (qf.type == QueuedFrame::FADED) {
                        glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
 
@@ -678,6 +709,8 @@ void VideoStream::encode_thread_func()
                        pkt.flags = AV_PKT_FLAG_KEY;
                        mux->add_packet(pkt, qf.output_pts, qf.output_pts);
                        last_frame = move(jpeg);
+
+                       add_audio_or_silence(qf);
                } else if (qf.type == QueuedFrame::INTERPOLATED || qf.type == QueuedFrame::FADED_INTERPOLATED) {
                        glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
 
@@ -705,6 +738,8 @@ void VideoStream::encode_thread_func()
                        pkt.flags = AV_PKT_FLAG_KEY;
                        mux->add_packet(pkt, qf.output_pts, qf.output_pts);
                        last_frame = move(jpeg);
+
+                       add_audio_or_silence(qf);
                } else if (qf.type == QueuedFrame::REFRESH) {
                        AVPacket pkt;
                        av_init_packet(&pkt);
@@ -713,6 +748,10 @@ void VideoStream::encode_thread_func()
                        pkt.size = last_frame.size();
                        pkt.flags = AV_PKT_FLAG_KEY;
                        mux->add_packet(pkt, qf.output_pts, qf.output_pts);
+
+                       add_audio_or_silence(qf);  // Definitely silence.
+               } else if (qf.type == QueuedFrame::SILENCE) {
+                       add_silence(qf.output_pts, qf.silence_length_pts);
                } else {
                        assert(false);
                }
@@ -746,3 +785,38 @@ int VideoStream::write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType ty
        }
        return buf_size;
 }
+
+void VideoStream::add_silence(int64_t pts, int64_t length_pts)
+{
+       // At 59.94, this will never quite add up (even discounting refresh frames,
+       // which have unpredictable length), but hopefully, the player in the other
+       // end should be able to stretch silence easily enough.
+       long num_samples = lrint(length_pts * double(OUTPUT_FREQUENCY) / double(TIMEBASE)) * 2;
+       uint8_t *zero = (uint8_t *)calloc(num_samples, sizeof(int32_t));
+
+       AVPacket pkt;
+       av_init_packet(&pkt);
+       pkt.stream_index = 1;
+       pkt.data = zero;
+       pkt.size = num_samples * sizeof(int32_t);
+       pkt.flags = AV_PKT_FLAG_KEY;
+       mux->add_packet(pkt, pts, pts);
+
+       free(zero);
+}
+
+void VideoStream::add_audio_or_silence(const QueuedFrame &qf)
+{
+       if (qf.audio.empty()) {
+               int64_t frame_length = lrint(double(TIMEBASE) / global_flags.output_framerate);
+               add_silence(qf.output_pts, frame_length);
+       } else {
+               AVPacket pkt;
+               av_init_packet(&pkt);
+               pkt.stream_index = 1;
+               pkt.data = (uint8_t *)qf.audio.data();
+               pkt.size = qf.audio.size();
+               pkt.flags = AV_PKT_FLAG_KEY;
+               mux->add_packet(pkt, qf.output_pts, qf.output_pts);
+       }
+}
index 26cb7c808a537af72d0f01cccbc4f0331185a912..f156be9af662578ee96cabec201f3337a08b09bc 100644 (file)
@@ -47,23 +47,29 @@ public:
        void schedule_original_frame(std::chrono::steady_clock::time_point,
                                     int64_t output_pts, std::function<void()> &&display_func,
                                     QueueSpotHolder &&queue_spot_holder,
-                                    FrameOnDisk frame, const std::string &subtitle);
+                                    FrameOnDisk frame, const std::string &subtitle,
+                                    bool include_audio);
        void schedule_faded_frame(std::chrono::steady_clock::time_point, int64_t output_pts,
                                  std::function<void()> &&display_func,
                                  QueueSpotHolder &&queue_spot_holder,
                                  FrameOnDisk frame1, FrameOnDisk frame2,
-                                 float fade_alpha, const std::string &subtitle);
+                                 float fade_alpha, const std::string &subtitle);  // Always no audio.
        void schedule_interpolated_frame(std::chrono::steady_clock::time_point, int64_t output_pts,
                                         std::function<void(std::shared_ptr<Frame>)> &&display_func,
                                         QueueSpotHolder &&queue_spot_holder,
                                         FrameOnDisk frame1, FrameOnDisk frame2,
                                         float alpha, FrameOnDisk secondary_frame,  // Empty = no secondary (fade) frame.
-                                        float fade_alpha, const std::string &subtitle);
+                                        float fade_alpha, const std::string &subtitle,
+                                        bool include_audio);
        void schedule_refresh_frame(std::chrono::steady_clock::time_point, int64_t output_pts,
                                    std::function<void()> &&display_func,
-                                   QueueSpotHolder &&queue_spot_holder, const std::string &subtitle);
+                                   QueueSpotHolder &&queue_spot_holder, const std::string &subtitle);  // Always no audio.
+       void schedule_silence(std::chrono::steady_clock::time_point, int64_t output_pts,
+                             int64_t length_pts, QueueSpotHolder &&queue_spot_holder);
 
 private:
+       struct QueuedFrame;
+
        FrameReader frame_reader;
 
        void encode_thread_func();
@@ -72,6 +78,8 @@ private:
 
        static int write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
        int write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
+       void add_silence(int64_t pts, int64_t length_pts);
+       void add_audio_or_silence(const QueuedFrame &qf);
 
        // Allocated at the very start; if we're empty, we start dropping frames
        // (so that we don't build up an infinite interpolation backlog).
@@ -110,13 +118,13 @@ private:
                std::chrono::steady_clock::time_point local_pts;
 
                int64_t output_pts;
-               enum Type { ORIGINAL, FADED, INTERPOLATED, FADED_INTERPOLATED, REFRESH } type;
+               enum Type { ORIGINAL, FADED, INTERPOLATED, FADED_INTERPOLATED, REFRESH, SILENCE } type;
 
                // For original frames only. Made move-only so we know explicitly
                // we don't copy these ~200 kB files around inadvertedly.
                std::unique_ptr<std::string> encoded_jpeg;
 
-               // For everything except original frames.
+               // For everything except original frames and silence.
                FrameOnDisk frame1;
 
                // For fades only (including fades against interpolated frames).
@@ -135,6 +143,13 @@ private:
 
                std::string subtitle;  // Blank for none.
 
+               // Audio, in stereo interleaved 32-bit PCM. If empty and not of type SILENCE, one frame's worth of silence samples
+               // is synthesized.
+               std::string audio;
+
+               // For silence frames only.
+               int64_t silence_length_pts;
+
                QueueSpotHolder queue_spot_holder;
        };
        std::deque<QueuedFrame> frame_queue;  // Under <queue_lock>.