Add audio output when playing at 100% speed.

author Steinar H. Gunderson <sgunderson@bigfoot.com>

Sat, 30 Mar 2019 13:45:58 +0000 (14:45 +0100)

committer Steinar H. Gunderson <sgunderson@bigfoot.com>

Sat, 30 Mar 2019 13:45:58 +0000 (14:45 +0100)
author Steinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 30 Mar 2019 13:45:58 +0000 (14:45 +0100)
committer Steinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 30 Mar 2019 13:45:58 +0000 (14:45 +0100)
diff --git a/futatabi/export.cpp b/futatabi/export.cpp

index 1b7c59c90ae75f853b1d9eaf6b7f27a190d06996..7833f91f1d17030433e214304b34f00caf501d57 100644 (file)
--- a/futatabi/export.cpp
+++ b/futatabi/export.cpp
@@ -182,7 +182,7 @@ void export_multitrack_clip(const string &filename, const Clip &clip)
                         }
                 }
  
-               FrameReader::Frame frame = readers[first_frame_stream_idx].read_frame(first_frame, /*read_audio=*/true);
+               FrameReader::Frame frame = readers[first_frame_stream_idx].read_frame(first_frame, /*read_video=*/true, /*read_audio=*/true);
  
                 // Write audio. (Before video, since that's what we expect on input.)
                 if (!frame.audio.empty()) {
diff --git a/futatabi/frame_on_disk.cpp b/futatabi/frame_on_disk.cpp

index 6bdaf23a25569edb97d691a26ec1f3949dc792b1..f9a5639259be7a153777feb4ca76bded039d6185 100644 (file)
--- a/futatabi/frame_on_disk.cpp
+++ b/futatabi/frame_on_disk.cpp
@@ -4,6 +4,7 @@
  
  #include <atomic>
  #include <chrono>
+#include <assert.h>
  #include <fcntl.h>
  #include <mutex>
  #include <unistd.h>
@@ -68,8 +69,9 @@ string read_string(int fd, size_t size, off_t offset)
  
  }  // namespace
  
-FrameReader::Frame FrameReader::read_frame(FrameOnDisk frame, bool read_audio)
+FrameReader::Frame FrameReader::read_frame(FrameOnDisk frame, bool read_video, bool read_audio)
  {
+       assert(read_video || read_audio);
         steady_clock::time_point start = steady_clock::now();
  
         if (int(frame.filename_idx) != last_filename_idx) {
@@ -98,7 +100,9 @@ FrameReader::Frame FrameReader::read_frame(FrameOnDisk frame, bool read_audio)
         }
  
         Frame ret;
-       ret.video = read_string(fd, frame.size, frame.offset);
+       if (read_video) {
+               ret.video = read_string(fd, frame.size, frame.offset);
+       }
         if (read_audio) {
                 ret.audio = read_string(fd, frame.audio_size, frame.offset + frame.size);
         }
diff --git a/futatabi/frame_on_disk.h b/futatabi/frame_on_disk.h

index 360bd23ce1222271cfef1f82df5006de0b141a60..35f375bb78ef37a6522b351cea981e4a0643bf4e 100644 (file)
--- a/futatabi/frame_on_disk.h
+++ b/futatabi/frame_on_disk.h
@@ -46,7 +46,7 @@ public:
                 std::string video;
                 std::string audio;
         };
-       Frame read_frame(FrameOnDisk frame, bool read_audio);
+       Frame read_frame(FrameOnDisk frame, bool read_video, bool read_audio);
  
  private:
         int fd = -1;
diff --git a/futatabi/jpeg_frame_view.cpp b/futatabi/jpeg_frame_view.cpp

index c1afafd765ca32f803c35c23b821c949bb807e91..943b3e15ac5858886472c1866371debc9e6eb733 100644 (file)
--- a/futatabi/jpeg_frame_view.cpp
+++ b/futatabi/jpeg_frame_view.cpp
@@ -238,7 +238,7 @@ shared_ptr<Frame> decode_jpeg_with_cache(FrameOnDisk frame_spec, CacheMissBehavi
         ++metric_jpeg_cache_miss_frames;
  
         *did_decode = true;
-       shared_ptr<Frame> frame = decode_jpeg(frame_reader->read_frame(frame_spec, /*read_audio=*/false).video);
+       shared_ptr<Frame> frame = decode_jpeg(frame_reader->read_frame(frame_spec, /*read_video=*/true, /*read_audio=*/false).video);
  
         lock_guard<mutex> lock(cache_mu);
         cache_bytes_used += frame_size(*frame);
diff --git a/futatabi/player.cpp b/futatabi/player.cpp

index 611f0abce4151ceff3a9adc8e51da2dc14e987b3..779e685ec10dc62484cd16deee7e9fe080bd7072 100644 (file)
--- a/futatabi/player.cpp
+++ b/futatabi/player.cpp
@@ -139,7 +139,14 @@ void Player::play_playlist_once()
         }
  
         steady_clock::duration time_slept = steady_clock::now() - before_sleep;
-       pts += duration_cast<duration<size_t, TimebaseRatio>>(time_slept).count();
+       int64_t slept_pts = duration_cast<duration<size_t, TimebaseRatio>>(time_slept).count();
+       if (slept_pts > 0) {
+               if (video_stream != nullptr) {
+                       // Add silence for the time we're waiting.
+                       video_stream->schedule_silence(steady_clock::now(), pts, slept_pts, QueueSpotHolder());
+               }
+               pts += slept_pts;
+       }
  
         if (!clip_ready) {
                 if (video_stream != nullptr) {
@@ -205,6 +212,10 @@ void Player::play_playlist_once()
                                 break;
                         }
  
+                       // Only play audio if we're within 0.1% of normal speed. We could do
+                       // stretching or pitch shift later if it becomes needed.
+                       bool play_audio = clip->speed * master_speed >= 0.999 && clip->speed * master_speed <= 1.001;
+
                         {
                                 lock_guard<mutex> lock(queue_state_mu);
                                 if (splice_ready) {
@@ -349,7 +360,7 @@ void Player::play_playlist_once()
                         if (frame_lower.pts == frame_upper.pts || global_flags.interpolation_quality == 0 || video_stream == nullptr) {
                                 display_single_frame(primary_stream_idx, frame_lower, secondary_stream_idx,
                                                      secondary_frame, fade_alpha, next_frame_start, /*snapped=*/false,
-                                                    subtitle);
+                                                    subtitle, play_audio);
                                 continue;
                         }
  
@@ -362,7 +373,7 @@ void Player::play_playlist_once()
                                 if (fabs(snap_frame.pts - in_pts) < pts_snap_tolerance) {
                                         display_single_frame(primary_stream_idx, snap_frame, secondary_stream_idx,
                                                              secondary_frame, fade_alpha, next_frame_start, /*snapped=*/true,
-                                                            subtitle);
+                                                            subtitle, play_audio);
                                         in_pts_origin += snap_frame.pts - in_pts;
                                         snapped = true;
                                         break;
@@ -418,7 +429,7 @@ void Player::play_playlist_once()
                         video_stream->schedule_interpolated_frame(
                                 next_frame_start, pts, display_func, QueueSpotHolder(this),
                                 frame_lower, frame_upper, alpha,
-                               secondary_frame, fade_alpha, subtitle);
+                               secondary_frame, fade_alpha, subtitle, play_audio);
                         last_pts_played = in_pts;  // Not really needed; only previews use last_pts_played.
                 }
  
@@ -439,7 +450,7 @@ void Player::play_playlist_once()
         }
  }
  
-void Player::display_single_frame(int primary_stream_idx, const FrameOnDisk &primary_frame, int secondary_stream_idx, const FrameOnDisk &secondary_frame, double fade_alpha, steady_clock::time_point frame_start, bool snapped, const std::string &subtitle)
+void Player::display_single_frame(int primary_stream_idx, const FrameOnDisk &primary_frame, int secondary_stream_idx, const FrameOnDisk &secondary_frame, double fade_alpha, steady_clock::time_point frame_start, bool snapped, const std::string &subtitle, bool play_audio)
  {
         auto display_func = [this, primary_stream_idx, primary_frame, secondary_frame, fade_alpha] {
                 if (destination != nullptr) {
@@ -458,7 +469,7 @@ void Player::display_single_frame(int primary_stream_idx, const FrameOnDisk &pri
                         }
                         video_stream->schedule_original_frame(
                                 frame_start, pts, display_func, QueueSpotHolder(this),
-                               primary_frame, subtitle);
+                               primary_frame, subtitle, play_audio);
                 } else {
                         assert(secondary_frame.pts != -1);
                         // NOTE: We could be increasing unused metrics for previews, but that's harmless.
diff --git a/futatabi/player.h b/futatabi/player.h

index da5a4435da936778dbec3c606c39a312e45af953..b912b8c2ef2ab9b7117ed152f5bd8589be9eefad 100644 (file)
--- a/futatabi/player.h
+++ b/futatabi/player.h
@@ -94,7 +94,7 @@ public:
  private:
         void thread_func(AVFormatContext *file_avctx);
         void play_playlist_once();
-       void display_single_frame(int primary_stream_idx, const FrameOnDisk &primary_frame, int secondary_stream_idx, const FrameOnDisk &secondary_frame, double fade_alpha, std::chrono::steady_clock::time_point frame_start, bool snapped, const std::string &subtitle);
+       void display_single_frame(int primary_stream_idx, const FrameOnDisk &primary_frame, int secondary_stream_idx, const FrameOnDisk &secondary_frame, double fade_alpha, std::chrono::steady_clock::time_point frame_start, bool snapped, const std::string &subtitle, bool play_audio);
         void open_output_stream();
         static int write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
         int write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
diff --git a/futatabi/video_stream.cpp b/futatabi/video_stream.cpp

index 06acfd2601a43ebe4ff17af8b0f7fffee788fc25..9647836feaa4294a4653bdb8eb7b84b8af77da27 100644 (file)
--- a/futatabi/video_stream.cpp
+++ b/futatabi/video_stream.cpp
@@ -13,6 +13,7 @@ extern "C" {
  #include "player.h"
  #include "shared/context.h"
  #include "shared/httpd.h"
+#include "shared/shared_defs.h"
  #include "shared/mux.h"
  #include "util.h"
  #include "ycbcr_converter.h"
@@ -286,10 +287,19 @@ void VideoStream::start()
                 avctx->flags = AVFMT_FLAG_CUSTOM_IO;
         }
  
+       AVCodecParameters *audio_codecpar = avcodec_parameters_alloc();
+
+       audio_codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
+       audio_codecpar->codec_id = AV_CODEC_ID_PCM_S32LE;
+       audio_codecpar->channel_layout = AV_CH_LAYOUT_STEREO;
+       audio_codecpar->channels = 2;
+       audio_codecpar->sample_rate = OUTPUT_FREQUENCY;
+
         size_t width = global_flags.width, height = global_flags.height;  // Doesn't matter for MJPEG.
-       mux.reset(new Mux(avctx, width, height, Mux::CODEC_MJPEG, /*video_extradata=*/"", /*audio_codec_parameters=*/nullptr,
+       mux.reset(new Mux(avctx, width, height, Mux::CODEC_MJPEG, /*video_extradata=*/"", audio_codecpar,
                           AVCOL_SPC_BT709, COARSE_TIMEBASE, /*write_callback=*/nullptr, Mux::WRITE_FOREGROUND, {}, Mux::WITH_SUBTITLES));
  
+       avcodec_parameters_free(&audio_codecpar);
         encode_thread = thread(&VideoStream::encode_thread_func, this);
  }
  
@@ -331,12 +341,10 @@ void VideoStream::clear_queue()
  void VideoStream::schedule_original_frame(steady_clock::time_point local_pts,
                                            int64_t output_pts, function<void()> &&display_func,
                                            QueueSpotHolder &&queue_spot_holder,
-                                          FrameOnDisk frame, const string &subtitle)
+                                          FrameOnDisk frame, const string &subtitle, bool include_audio)
  {
         fprintf(stderr, "output_pts=%" PRId64 "  original      input_pts=%" PRId64 "\n", output_pts, frame.pts);
  
-       // TODO: Write audio if at the right speed.
-
         QueuedFrame qf;
         qf.local_pts = local_pts;
         qf.type = QueuedFrame::ORIGINAL;
@@ -344,7 +352,9 @@ void VideoStream::schedule_original_frame(steady_clock::time_point local_pts,
         qf.display_func = move(display_func);
         qf.queue_spot_holder = move(queue_spot_holder);
         qf.subtitle = subtitle;
-       qf.encoded_jpeg.reset(new string(frame_reader.read_frame(frame, /*read_audio=*/false).video));
+       FrameReader::Frame read_frame = frame_reader.read_frame(frame, /*read_video=*/true, include_audio);
+       qf.encoded_jpeg.reset(new string(move(read_frame.video)));
+       qf.audio = move(read_frame.audio);
  
         lock_guard<mutex> lock(queue_lock);
         frame_queue.push_back(move(qf));
@@ -424,7 +434,8 @@ void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts
                                                int64_t output_pts, function<void(shared_ptr<Frame>)> &&display_func,
                                                QueueSpotHolder &&queue_spot_holder,
                                                FrameOnDisk frame1, FrameOnDisk frame2,
-                                              float alpha, FrameOnDisk secondary_frame, float fade_alpha, const string &subtitle)
+                                              float alpha, FrameOnDisk secondary_frame, float fade_alpha, const string &subtitle,
+                                              bool play_audio)
  {
         if (secondary_frame.pts != -1) {
                 fprintf(stderr, "output_pts=%" PRId64 "  interpolated  input_pts1=%" PRId64 " input_pts2=%" PRId64 " alpha=%.3f  secondary_pts=%" PRId64 "  fade_alpha=%.2f\n", output_pts, frame1.pts, frame2.pts, alpha, secondary_frame.pts, fade_alpha);
@@ -452,6 +463,10 @@ void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts
         qf.local_pts = local_pts;
         qf.subtitle = subtitle;
  
+       if (play_audio) {
+               qf.audio = frame_reader.read_frame(frame1, /*read_video=*/false, /*read_audio=*/true).audio;
+       }
+
         check_error();
  
         // Convert frame0 and frame1 to OpenGL textures.
@@ -563,6 +578,20 @@ void VideoStream::schedule_refresh_frame(steady_clock::time_point local_pts,
         queue_changed.notify_all();
  }
  
+void VideoStream::schedule_silence(steady_clock::time_point local_pts, int64_t output_pts,
+                                   int64_t length_pts, QueueSpotHolder &&queue_spot_holder)
+{
+       QueuedFrame qf;
+       qf.type = QueuedFrame::SILENCE;
+       qf.output_pts = output_pts;
+       qf.queue_spot_holder = move(queue_spot_holder);
+       qf.silence_length_pts = length_pts;
+
+       lock_guard<mutex> lock(queue_lock);
+       frame_queue.push_back(move(qf));
+       queue_changed.notify_all();
+}
+
  namespace {
  
  shared_ptr<Frame> frame_from_pbo(void *contents, size_t width, size_t height)
@@ -662,6 +691,8 @@ void VideoStream::encode_thread_func()
                         pkt.flags = AV_PKT_FLAG_KEY;
                         mux->add_packet(pkt, qf.output_pts, qf.output_pts);
                         last_frame = move(jpeg);
+
+                       add_audio_or_silence(qf);
                 } else if (qf.type == QueuedFrame::FADED) {
                         glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
  
@@ -678,6 +709,8 @@ void VideoStream::encode_thread_func()
                         pkt.flags = AV_PKT_FLAG_KEY;
                         mux->add_packet(pkt, qf.output_pts, qf.output_pts);
                         last_frame = move(jpeg);
+
+                       add_audio_or_silence(qf);
                 } else if (qf.type == QueuedFrame::INTERPOLATED || qf.type == QueuedFrame::FADED_INTERPOLATED) {
                         glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
  
@@ -705,6 +738,8 @@ void VideoStream::encode_thread_func()
                         pkt.flags = AV_PKT_FLAG_KEY;
                         mux->add_packet(pkt, qf.output_pts, qf.output_pts);
                         last_frame = move(jpeg);
+
+                       add_audio_or_silence(qf);
                 } else if (qf.type == QueuedFrame::REFRESH) {
                         AVPacket pkt;
                         av_init_packet(&pkt);
@@ -713,6 +748,10 @@ void VideoStream::encode_thread_func()
                         pkt.size = last_frame.size();
                         pkt.flags = AV_PKT_FLAG_KEY;
                         mux->add_packet(pkt, qf.output_pts, qf.output_pts);
+
+                       add_audio_or_silence(qf);  // Definitely silence.
+               } else if (qf.type == QueuedFrame::SILENCE) {
+                       add_silence(qf.output_pts, qf.silence_length_pts);
                 } else {
                         assert(false);
                 }
@@ -746,3 +785,38 @@ int VideoStream::write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType ty
         }
         return buf_size;
  }
+
+void VideoStream::add_silence(int64_t pts, int64_t length_pts)
+{
+       // At 59.94, this will never quite add up (even discounting refresh frames,
+       // which have unpredictable length), but hopefully, the player in the other
+       // end should be able to stretch silence easily enough.
+       long num_samples = lrint(length_pts * double(OUTPUT_FREQUENCY) / double(TIMEBASE)) * 2;
+       uint8_t *zero = (uint8_t *)calloc(num_samples, sizeof(int32_t));
+
+       AVPacket pkt;
+       av_init_packet(&pkt);
+       pkt.stream_index = 1;
+       pkt.data = zero;
+       pkt.size = num_samples * sizeof(int32_t);
+       pkt.flags = AV_PKT_FLAG_KEY;
+       mux->add_packet(pkt, pts, pts);
+
+       free(zero);
+}
+
+void VideoStream::add_audio_or_silence(const QueuedFrame &qf)
+{
+       if (qf.audio.empty()) {
+               int64_t frame_length = lrint(double(TIMEBASE) / global_flags.output_framerate);
+               add_silence(qf.output_pts, frame_length);
+       } else {
+               AVPacket pkt;
+               av_init_packet(&pkt);
+               pkt.stream_index = 1;
+               pkt.data = (uint8_t *)qf.audio.data();
+               pkt.size = qf.audio.size();
+               pkt.flags = AV_PKT_FLAG_KEY;
+               mux->add_packet(pkt, qf.output_pts, qf.output_pts);
+       }
+}
diff --git a/futatabi/video_stream.h b/futatabi/video_stream.h

index 26cb7c808a537af72d0f01cccbc4f0331185a912..f156be9af662578ee96cabec201f3337a08b09bc 100644 (file)
--- a/futatabi/video_stream.h
+++ b/futatabi/video_stream.h
@@ -47,23 +47,29 @@ public:
         void schedule_original_frame(std::chrono::steady_clock::time_point,
                                      int64_t output_pts, std::function<void()> &&display_func,
                                      QueueSpotHolder &&queue_spot_holder,
-                                    FrameOnDisk frame, const std::string &subtitle);
+                                    FrameOnDisk frame, const std::string &subtitle,
+                                    bool include_audio);
         void schedule_faded_frame(std::chrono::steady_clock::time_point, int64_t output_pts,
                                   std::function<void()> &&display_func,
                                   QueueSpotHolder &&queue_spot_holder,
                                   FrameOnDisk frame1, FrameOnDisk frame2,
-                                 float fade_alpha, const std::string &subtitle);
+                                 float fade_alpha, const std::string &subtitle);  // Always no audio.
         void schedule_interpolated_frame(std::chrono::steady_clock::time_point, int64_t output_pts,
                                          std::function<void(std::shared_ptr<Frame>)> &&display_func,
                                          QueueSpotHolder &&queue_spot_holder,
                                          FrameOnDisk frame1, FrameOnDisk frame2,
                                          float alpha, FrameOnDisk secondary_frame,  // Empty = no secondary (fade) frame.
-                                        float fade_alpha, const std::string &subtitle);
+                                        float fade_alpha, const std::string &subtitle,
+                                        bool include_audio);
         void schedule_refresh_frame(std::chrono::steady_clock::time_point, int64_t output_pts,
                                     std::function<void()> &&display_func,
-                                   QueueSpotHolder &&queue_spot_holder, const std::string &subtitle);
+                                   QueueSpotHolder &&queue_spot_holder, const std::string &subtitle);  // Always no audio.
+       void schedule_silence(std::chrono::steady_clock::time_point, int64_t output_pts,
+                             int64_t length_pts, QueueSpotHolder &&queue_spot_holder);
  
  private:
+       struct QueuedFrame;
+
         FrameReader frame_reader;
  
         void encode_thread_func();
@@ -72,6 +78,8 @@ private:
  
         static int write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
         int write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time);
+       void add_silence(int64_t pts, int64_t length_pts);
+       void add_audio_or_silence(const QueuedFrame &qf);
  
         // Allocated at the very start; if we're empty, we start dropping frames
         // (so that we don't build up an infinite interpolation backlog).
@@ -110,13 +118,13 @@ private:
                 std::chrono::steady_clock::time_point local_pts;
  
                 int64_t output_pts;
-               enum Type { ORIGINAL, FADED, INTERPOLATED, FADED_INTERPOLATED, REFRESH } type;
+               enum Type { ORIGINAL, FADED, INTERPOLATED, FADED_INTERPOLATED, REFRESH, SILENCE } type;
  
                 // For original frames only. Made move-only so we know explicitly
                 // we don't copy these ~200 kB files around inadvertedly.
                 std::unique_ptr<std::string> encoded_jpeg;
  
-               // For everything except original frames.
+               // For everything except original frames and silence.
                 FrameOnDisk frame1;
  
                 // For fades only (including fades against interpolated frames).
@@ -135,6 +143,13 @@ private:
  
                 std::string subtitle;  // Blank for none.
  
+               // Audio, in stereo interleaved 32-bit PCM. If empty and not of type SILENCE, one frame's worth of silence samples
+               // is synthesized.
+               std::string audio;
+
+               // For silence frames only.
+               int64_t silence_length_pts;
+
                 QueueSpotHolder queue_spot_holder;
         };
         std::deque<QueuedFrame> frame_queue;  // Under <queue_lock>.
author	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Sat, 30 Mar 2019 13:45:58 +0000 (14:45 +0100)
committer	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Sat, 30 Mar 2019 13:45:58 +0000 (14:45 +0100)
futatabi/export.cpp		patch \| blob \| history
futatabi/frame_on_disk.cpp		patch \| blob \| history
futatabi/frame_on_disk.h		patch \| blob \| history
futatabi/jpeg_frame_view.cpp		patch \| blob \| history
futatabi/player.cpp		patch \| blob \| history
futatabi/player.h		patch \| blob \| history
futatabi/video_stream.cpp		patch \| blob \| history
futatabi/video_stream.h		patch \| blob \| history