From adf28dcc8d96304785b05034c323e4c854c76896 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sat, 30 Mar 2019 14:45:58 +0100 Subject: [PATCH] Add audio output when playing at 100% speed. Fairly untested, but should work both on single-track export and on realtime output. No audio stretching or pitch shift, so only plays when we're at regular speed. Note: There's no monitor output yet, so the Futatabi operator will be deaf. There are also no VU bars. --- futatabi/export.cpp | 2 +- futatabi/frame_on_disk.cpp | 8 +++- futatabi/frame_on_disk.h | 2 +- futatabi/jpeg_frame_view.cpp | 2 +- futatabi/player.cpp | 23 +++++++--- futatabi/player.h | 2 +- futatabi/video_stream.cpp | 86 +++++++++++++++++++++++++++++++++--- futatabi/video_stream.h | 27 ++++++++--- 8 files changed, 128 insertions(+), 24 deletions(-) diff --git a/futatabi/export.cpp b/futatabi/export.cpp index 1b7c59c..7833f91 100644 --- a/futatabi/export.cpp +++ b/futatabi/export.cpp @@ -182,7 +182,7 @@ void export_multitrack_clip(const string &filename, const Clip &clip) } } - FrameReader::Frame frame = readers[first_frame_stream_idx].read_frame(first_frame, /*read_audio=*/true); + FrameReader::Frame frame = readers[first_frame_stream_idx].read_frame(first_frame, /*read_video=*/true, /*read_audio=*/true); // Write audio. (Before video, since that's what we expect on input.) if (!frame.audio.empty()) { diff --git a/futatabi/frame_on_disk.cpp b/futatabi/frame_on_disk.cpp index 6bdaf23..f9a5639 100644 --- a/futatabi/frame_on_disk.cpp +++ b/futatabi/frame_on_disk.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -68,8 +69,9 @@ string read_string(int fd, size_t size, off_t offset) } // namespace -FrameReader::Frame FrameReader::read_frame(FrameOnDisk frame, bool read_audio) +FrameReader::Frame FrameReader::read_frame(FrameOnDisk frame, bool read_video, bool read_audio) { + assert(read_video || read_audio); steady_clock::time_point start = steady_clock::now(); if (int(frame.filename_idx) != last_filename_idx) { @@ -98,7 +100,9 @@ FrameReader::Frame FrameReader::read_frame(FrameOnDisk frame, bool read_audio) } Frame ret; - ret.video = read_string(fd, frame.size, frame.offset); + if (read_video) { + ret.video = read_string(fd, frame.size, frame.offset); + } if (read_audio) { ret.audio = read_string(fd, frame.audio_size, frame.offset + frame.size); } diff --git a/futatabi/frame_on_disk.h b/futatabi/frame_on_disk.h index 360bd23..35f375b 100644 --- a/futatabi/frame_on_disk.h +++ b/futatabi/frame_on_disk.h @@ -46,7 +46,7 @@ public: std::string video; std::string audio; }; - Frame read_frame(FrameOnDisk frame, bool read_audio); + Frame read_frame(FrameOnDisk frame, bool read_video, bool read_audio); private: int fd = -1; diff --git a/futatabi/jpeg_frame_view.cpp b/futatabi/jpeg_frame_view.cpp index c1afafd..943b3e1 100644 --- a/futatabi/jpeg_frame_view.cpp +++ b/futatabi/jpeg_frame_view.cpp @@ -238,7 +238,7 @@ shared_ptr decode_jpeg_with_cache(FrameOnDisk frame_spec, CacheMissBehavi ++metric_jpeg_cache_miss_frames; *did_decode = true; - shared_ptr frame = decode_jpeg(frame_reader->read_frame(frame_spec, /*read_audio=*/false).video); + shared_ptr frame = decode_jpeg(frame_reader->read_frame(frame_spec, /*read_video=*/true, /*read_audio=*/false).video); lock_guard lock(cache_mu); cache_bytes_used += frame_size(*frame); diff --git a/futatabi/player.cpp b/futatabi/player.cpp index 611f0ab..779e685 100644 --- a/futatabi/player.cpp +++ b/futatabi/player.cpp @@ -139,7 +139,14 @@ void Player::play_playlist_once() } steady_clock::duration time_slept = steady_clock::now() - before_sleep; - pts += duration_cast>(time_slept).count(); + int64_t slept_pts = duration_cast>(time_slept).count(); + if (slept_pts > 0) { + if (video_stream != nullptr) { + // Add silence for the time we're waiting. + video_stream->schedule_silence(steady_clock::now(), pts, slept_pts, QueueSpotHolder()); + } + pts += slept_pts; + } if (!clip_ready) { if (video_stream != nullptr) { @@ -205,6 +212,10 @@ void Player::play_playlist_once() break; } + // Only play audio if we're within 0.1% of normal speed. We could do + // stretching or pitch shift later if it becomes needed. + bool play_audio = clip->speed * master_speed >= 0.999 && clip->speed * master_speed <= 1.001; + { lock_guard lock(queue_state_mu); if (splice_ready) { @@ -349,7 +360,7 @@ void Player::play_playlist_once() if (frame_lower.pts == frame_upper.pts || global_flags.interpolation_quality == 0 || video_stream == nullptr) { display_single_frame(primary_stream_idx, frame_lower, secondary_stream_idx, secondary_frame, fade_alpha, next_frame_start, /*snapped=*/false, - subtitle); + subtitle, play_audio); continue; } @@ -362,7 +373,7 @@ void Player::play_playlist_once() if (fabs(snap_frame.pts - in_pts) < pts_snap_tolerance) { display_single_frame(primary_stream_idx, snap_frame, secondary_stream_idx, secondary_frame, fade_alpha, next_frame_start, /*snapped=*/true, - subtitle); + subtitle, play_audio); in_pts_origin += snap_frame.pts - in_pts; snapped = true; break; @@ -418,7 +429,7 @@ void Player::play_playlist_once() video_stream->schedule_interpolated_frame( next_frame_start, pts, display_func, QueueSpotHolder(this), frame_lower, frame_upper, alpha, - secondary_frame, fade_alpha, subtitle); + secondary_frame, fade_alpha, subtitle, play_audio); last_pts_played = in_pts; // Not really needed; only previews use last_pts_played. } @@ -439,7 +450,7 @@ void Player::play_playlist_once() } } -void Player::display_single_frame(int primary_stream_idx, const FrameOnDisk &primary_frame, int secondary_stream_idx, const FrameOnDisk &secondary_frame, double fade_alpha, steady_clock::time_point frame_start, bool snapped, const std::string &subtitle) +void Player::display_single_frame(int primary_stream_idx, const FrameOnDisk &primary_frame, int secondary_stream_idx, const FrameOnDisk &secondary_frame, double fade_alpha, steady_clock::time_point frame_start, bool snapped, const std::string &subtitle, bool play_audio) { auto display_func = [this, primary_stream_idx, primary_frame, secondary_frame, fade_alpha] { if (destination != nullptr) { @@ -458,7 +469,7 @@ void Player::display_single_frame(int primary_stream_idx, const FrameOnDisk &pri } video_stream->schedule_original_frame( frame_start, pts, display_func, QueueSpotHolder(this), - primary_frame, subtitle); + primary_frame, subtitle, play_audio); } else { assert(secondary_frame.pts != -1); // NOTE: We could be increasing unused metrics for previews, but that's harmless. diff --git a/futatabi/player.h b/futatabi/player.h index da5a443..b912b8c 100644 --- a/futatabi/player.h +++ b/futatabi/player.h @@ -94,7 +94,7 @@ public: private: void thread_func(AVFormatContext *file_avctx); void play_playlist_once(); - void display_single_frame(int primary_stream_idx, const FrameOnDisk &primary_frame, int secondary_stream_idx, const FrameOnDisk &secondary_frame, double fade_alpha, std::chrono::steady_clock::time_point frame_start, bool snapped, const std::string &subtitle); + void display_single_frame(int primary_stream_idx, const FrameOnDisk &primary_frame, int secondary_stream_idx, const FrameOnDisk &secondary_frame, double fade_alpha, std::chrono::steady_clock::time_point frame_start, bool snapped, const std::string &subtitle, bool play_audio); void open_output_stream(); static int write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time); int write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time); diff --git a/futatabi/video_stream.cpp b/futatabi/video_stream.cpp index 06acfd2..9647836 100644 --- a/futatabi/video_stream.cpp +++ b/futatabi/video_stream.cpp @@ -13,6 +13,7 @@ extern "C" { #include "player.h" #include "shared/context.h" #include "shared/httpd.h" +#include "shared/shared_defs.h" #include "shared/mux.h" #include "util.h" #include "ycbcr_converter.h" @@ -286,10 +287,19 @@ void VideoStream::start() avctx->flags = AVFMT_FLAG_CUSTOM_IO; } + AVCodecParameters *audio_codecpar = avcodec_parameters_alloc(); + + audio_codecpar->codec_type = AVMEDIA_TYPE_AUDIO; + audio_codecpar->codec_id = AV_CODEC_ID_PCM_S32LE; + audio_codecpar->channel_layout = AV_CH_LAYOUT_STEREO; + audio_codecpar->channels = 2; + audio_codecpar->sample_rate = OUTPUT_FREQUENCY; + size_t width = global_flags.width, height = global_flags.height; // Doesn't matter for MJPEG. - mux.reset(new Mux(avctx, width, height, Mux::CODEC_MJPEG, /*video_extradata=*/"", /*audio_codec_parameters=*/nullptr, + mux.reset(new Mux(avctx, width, height, Mux::CODEC_MJPEG, /*video_extradata=*/"", audio_codecpar, AVCOL_SPC_BT709, COARSE_TIMEBASE, /*write_callback=*/nullptr, Mux::WRITE_FOREGROUND, {}, Mux::WITH_SUBTITLES)); + avcodec_parameters_free(&audio_codecpar); encode_thread = thread(&VideoStream::encode_thread_func, this); } @@ -331,12 +341,10 @@ void VideoStream::clear_queue() void VideoStream::schedule_original_frame(steady_clock::time_point local_pts, int64_t output_pts, function &&display_func, QueueSpotHolder &&queue_spot_holder, - FrameOnDisk frame, const string &subtitle) + FrameOnDisk frame, const string &subtitle, bool include_audio) { fprintf(stderr, "output_pts=%" PRId64 " original input_pts=%" PRId64 "\n", output_pts, frame.pts); - // TODO: Write audio if at the right speed. - QueuedFrame qf; qf.local_pts = local_pts; qf.type = QueuedFrame::ORIGINAL; @@ -344,7 +352,9 @@ void VideoStream::schedule_original_frame(steady_clock::time_point local_pts, qf.display_func = move(display_func); qf.queue_spot_holder = move(queue_spot_holder); qf.subtitle = subtitle; - qf.encoded_jpeg.reset(new string(frame_reader.read_frame(frame, /*read_audio=*/false).video)); + FrameReader::Frame read_frame = frame_reader.read_frame(frame, /*read_video=*/true, include_audio); + qf.encoded_jpeg.reset(new string(move(read_frame.video))); + qf.audio = move(read_frame.audio); lock_guard lock(queue_lock); frame_queue.push_back(move(qf)); @@ -424,7 +434,8 @@ void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts int64_t output_pts, function)> &&display_func, QueueSpotHolder &&queue_spot_holder, FrameOnDisk frame1, FrameOnDisk frame2, - float alpha, FrameOnDisk secondary_frame, float fade_alpha, const string &subtitle) + float alpha, FrameOnDisk secondary_frame, float fade_alpha, const string &subtitle, + bool play_audio) { if (secondary_frame.pts != -1) { fprintf(stderr, "output_pts=%" PRId64 " interpolated input_pts1=%" PRId64 " input_pts2=%" PRId64 " alpha=%.3f secondary_pts=%" PRId64 " fade_alpha=%.2f\n", output_pts, frame1.pts, frame2.pts, alpha, secondary_frame.pts, fade_alpha); @@ -452,6 +463,10 @@ void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts qf.local_pts = local_pts; qf.subtitle = subtitle; + if (play_audio) { + qf.audio = frame_reader.read_frame(frame1, /*read_video=*/false, /*read_audio=*/true).audio; + } + check_error(); // Convert frame0 and frame1 to OpenGL textures. @@ -563,6 +578,20 @@ void VideoStream::schedule_refresh_frame(steady_clock::time_point local_pts, queue_changed.notify_all(); } +void VideoStream::schedule_silence(steady_clock::time_point local_pts, int64_t output_pts, + int64_t length_pts, QueueSpotHolder &&queue_spot_holder) +{ + QueuedFrame qf; + qf.type = QueuedFrame::SILENCE; + qf.output_pts = output_pts; + qf.queue_spot_holder = move(queue_spot_holder); + qf.silence_length_pts = length_pts; + + lock_guard lock(queue_lock); + frame_queue.push_back(move(qf)); + queue_changed.notify_all(); +} + namespace { shared_ptr frame_from_pbo(void *contents, size_t width, size_t height) @@ -662,6 +691,8 @@ void VideoStream::encode_thread_func() pkt.flags = AV_PKT_FLAG_KEY; mux->add_packet(pkt, qf.output_pts, qf.output_pts); last_frame = move(jpeg); + + add_audio_or_silence(qf); } else if (qf.type == QueuedFrame::FADED) { glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED); @@ -678,6 +709,8 @@ void VideoStream::encode_thread_func() pkt.flags = AV_PKT_FLAG_KEY; mux->add_packet(pkt, qf.output_pts, qf.output_pts); last_frame = move(jpeg); + + add_audio_or_silence(qf); } else if (qf.type == QueuedFrame::INTERPOLATED || qf.type == QueuedFrame::FADED_INTERPOLATED) { glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED); @@ -705,6 +738,8 @@ void VideoStream::encode_thread_func() pkt.flags = AV_PKT_FLAG_KEY; mux->add_packet(pkt, qf.output_pts, qf.output_pts); last_frame = move(jpeg); + + add_audio_or_silence(qf); } else if (qf.type == QueuedFrame::REFRESH) { AVPacket pkt; av_init_packet(&pkt); @@ -713,6 +748,10 @@ void VideoStream::encode_thread_func() pkt.size = last_frame.size(); pkt.flags = AV_PKT_FLAG_KEY; mux->add_packet(pkt, qf.output_pts, qf.output_pts); + + add_audio_or_silence(qf); // Definitely silence. + } else if (qf.type == QueuedFrame::SILENCE) { + add_silence(qf.output_pts, qf.silence_length_pts); } else { assert(false); } @@ -746,3 +785,38 @@ int VideoStream::write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType ty } return buf_size; } + +void VideoStream::add_silence(int64_t pts, int64_t length_pts) +{ + // At 59.94, this will never quite add up (even discounting refresh frames, + // which have unpredictable length), but hopefully, the player in the other + // end should be able to stretch silence easily enough. + long num_samples = lrint(length_pts * double(OUTPUT_FREQUENCY) / double(TIMEBASE)) * 2; + uint8_t *zero = (uint8_t *)calloc(num_samples, sizeof(int32_t)); + + AVPacket pkt; + av_init_packet(&pkt); + pkt.stream_index = 1; + pkt.data = zero; + pkt.size = num_samples * sizeof(int32_t); + pkt.flags = AV_PKT_FLAG_KEY; + mux->add_packet(pkt, pts, pts); + + free(zero); +} + +void VideoStream::add_audio_or_silence(const QueuedFrame &qf) +{ + if (qf.audio.empty()) { + int64_t frame_length = lrint(double(TIMEBASE) / global_flags.output_framerate); + add_silence(qf.output_pts, frame_length); + } else { + AVPacket pkt; + av_init_packet(&pkt); + pkt.stream_index = 1; + pkt.data = (uint8_t *)qf.audio.data(); + pkt.size = qf.audio.size(); + pkt.flags = AV_PKT_FLAG_KEY; + mux->add_packet(pkt, qf.output_pts, qf.output_pts); + } +} diff --git a/futatabi/video_stream.h b/futatabi/video_stream.h index 26cb7c8..f156be9 100644 --- a/futatabi/video_stream.h +++ b/futatabi/video_stream.h @@ -47,23 +47,29 @@ public: void schedule_original_frame(std::chrono::steady_clock::time_point, int64_t output_pts, std::function &&display_func, QueueSpotHolder &&queue_spot_holder, - FrameOnDisk frame, const std::string &subtitle); + FrameOnDisk frame, const std::string &subtitle, + bool include_audio); void schedule_faded_frame(std::chrono::steady_clock::time_point, int64_t output_pts, std::function &&display_func, QueueSpotHolder &&queue_spot_holder, FrameOnDisk frame1, FrameOnDisk frame2, - float fade_alpha, const std::string &subtitle); + float fade_alpha, const std::string &subtitle); // Always no audio. void schedule_interpolated_frame(std::chrono::steady_clock::time_point, int64_t output_pts, std::function)> &&display_func, QueueSpotHolder &&queue_spot_holder, FrameOnDisk frame1, FrameOnDisk frame2, float alpha, FrameOnDisk secondary_frame, // Empty = no secondary (fade) frame. - float fade_alpha, const std::string &subtitle); + float fade_alpha, const std::string &subtitle, + bool include_audio); void schedule_refresh_frame(std::chrono::steady_clock::time_point, int64_t output_pts, std::function &&display_func, - QueueSpotHolder &&queue_spot_holder, const std::string &subtitle); + QueueSpotHolder &&queue_spot_holder, const std::string &subtitle); // Always no audio. + void schedule_silence(std::chrono::steady_clock::time_point, int64_t output_pts, + int64_t length_pts, QueueSpotHolder &&queue_spot_holder); private: + struct QueuedFrame; + FrameReader frame_reader; void encode_thread_func(); @@ -72,6 +78,8 @@ private: static int write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time); int write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time); + void add_silence(int64_t pts, int64_t length_pts); + void add_audio_or_silence(const QueuedFrame &qf); // Allocated at the very start; if we're empty, we start dropping frames // (so that we don't build up an infinite interpolation backlog). @@ -110,13 +118,13 @@ private: std::chrono::steady_clock::time_point local_pts; int64_t output_pts; - enum Type { ORIGINAL, FADED, INTERPOLATED, FADED_INTERPOLATED, REFRESH } type; + enum Type { ORIGINAL, FADED, INTERPOLATED, FADED_INTERPOLATED, REFRESH, SILENCE } type; // For original frames only. Made move-only so we know explicitly // we don't copy these ~200 kB files around inadvertedly. std::unique_ptr encoded_jpeg; - // For everything except original frames. + // For everything except original frames and silence. FrameOnDisk frame1; // For fades only (including fades against interpolated frames). @@ -135,6 +143,13 @@ private: std::string subtitle; // Blank for none. + // Audio, in stereo interleaved 32-bit PCM. If empty and not of type SILENCE, one frame's worth of silence samples + // is synthesized. + std::string audio; + + // For silence frames only. + int64_t silence_length_pts; + QueueSpotHolder queue_spot_holder; }; std::deque frame_queue; // Under . -- 2.39.2