From e0cb348ca42ae7057f8f5acee92a23e7eb26075f Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Thu, 28 Mar 2019 20:25:59 +0100 Subject: [PATCH] Make multitrack export include audio. --- futatabi/export.cpp | 74 ++++++++++++++++++++++++++---------- futatabi/frame_on_disk.cpp | 43 +++++++++++++-------- futatabi/frame_on_disk.h | 7 +++- futatabi/jpeg_frame_view.cpp | 2 +- futatabi/video_stream.cpp | 4 +- nageru/defs.h | 1 - shared/shared_defs.h | 2 + 7 files changed, 92 insertions(+), 41 deletions(-) diff --git a/futatabi/export.cpp b/futatabi/export.cpp index 5b8da13..1b7c59c 100644 --- a/futatabi/export.cpp +++ b/futatabi/export.cpp @@ -6,6 +6,7 @@ #include "frame_on_disk.h" #include "player.h" #include "shared/ffmpeg_raii.h" +#include "shared/shared_defs.h" #include "shared/timebase.h" #include @@ -23,22 +24,22 @@ using namespace std; namespace { // Only used in export_cliplist_clip_multitrack_triggered. -struct BufferedJPEG { +struct BufferedFrame { int64_t pts; - unsigned stream_idx; - string jpeg; + unsigned video_stream_idx; + string data; }; -bool write_buffered_jpegs(AVFormatContext *avctx, const vector &buffered_jpegs) +bool write_buffered_frames(AVFormatContext *avctx, const vector &buffered_frames) { - for (const BufferedJPEG &jpeg : buffered_jpegs) { + for (const BufferedFrame &frame : buffered_frames) { AVPacket pkt; av_init_packet(&pkt); - pkt.stream_index = jpeg.stream_idx; - pkt.data = (uint8_t *)jpeg.jpeg.data(); - pkt.size = jpeg.jpeg.size(); - pkt.pts = jpeg.pts; - pkt.dts = jpeg.pts; + pkt.stream_index = frame.video_stream_idx; + pkt.data = (uint8_t *)frame.data.data(); + pkt.size = frame.data.size(); + pkt.pts = frame.pts; + pkt.dts = frame.pts; pkt.flags = AV_PKT_FLAG_KEY; if (av_write_frame(avctx, &pkt) < 0) { @@ -123,6 +124,23 @@ void export_multitrack_clip(const string &filename, const Clip &clip) video_streams.push_back(avstream_video); } + // Similar, for audio streams. + vector audio_streams; + for (unsigned stream_idx = 0; stream_idx <= last_stream_idx; ++stream_idx) { + AVStream *avstream_audio = avformat_new_stream(avctx, nullptr); + if (avstream_audio == nullptr) { + fprintf(stderr, "avformat_new_stream() failed\n"); + abort(); + } + avstream_audio->time_base = AVRational{ 1, TIMEBASE }; + avstream_audio->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; + avstream_audio->codecpar->codec_id = AV_CODEC_ID_PCM_S32LE; + avstream_audio->codecpar->channel_layout = AV_CH_LAYOUT_STEREO; + avstream_audio->codecpar->channels = 2; + avstream_audio->codecpar->sample_rate = OUTPUT_FREQUENCY; + audio_streams.push_back(avstream_audio); + } + if (avformat_write_header(avctx, nullptr) < 0) { QMessageBox msgbox; msgbox.setText("Writing header failed"); @@ -140,7 +158,7 @@ void export_multitrack_clip(const string &filename, const Clip &clip) // We buffer up to 1000 frames at a time, in a hope that we can reduce // the amount of seeking needed on rotational media. - vector buffered_jpegs; + vector buffered_frames; size_t frames_written = 0; while (num_streams_with_frames_left > 0) { // Find the stream with the lowest frame. Lower stream indexes win. @@ -163,21 +181,35 @@ void export_multitrack_clip(const string &filename, const Clip &clip) --num_streams_with_frames_left; } } - string jpeg = readers[first_frame_stream_idx].read_frame(first_frame); - int64_t scaled_pts = av_rescale_q(first_frame.pts, AVRational{ 1, TIMEBASE }, - video_streams[first_frame_stream_idx]->time_base); - buffered_jpegs.emplace_back(BufferedJPEG{ scaled_pts, first_frame_stream_idx, std::move(jpeg) }); - if (buffered_jpegs.size() >= 1000) { - if (!write_buffered_jpegs(avctx, buffered_jpegs)) { + + FrameReader::Frame frame = readers[first_frame_stream_idx].read_frame(first_frame, /*read_audio=*/true); + + // Write audio. (Before video, since that's what we expect on input.) + if (!frame.audio.empty()) { + unsigned audio_stream_idx = first_frame_stream_idx + video_streams.size(); + int64_t scaled_audio_pts = av_rescale_q(first_frame.pts, AVRational{ 1, TIMEBASE }, + audio_streams[first_frame_stream_idx]->time_base); + buffered_frames.emplace_back(BufferedFrame{ scaled_audio_pts, audio_stream_idx, std::move(frame.audio) }); + } + + // Write video. + unsigned video_stream_idx = first_frame_stream_idx; + int64_t scaled_video_pts = av_rescale_q(first_frame.pts, AVRational{ 1, TIMEBASE }, + video_streams[first_frame_stream_idx]->time_base); + buffered_frames.emplace_back(BufferedFrame{ scaled_video_pts, video_stream_idx, std::move(frame.video) }); + + // Flush to disk if required. + if (buffered_frames.size() >= 1000) { + if (!write_buffered_frames(avctx, buffered_frames)) { QMessageBox msgbox; msgbox.setText("Writing frames failed"); msgbox.exec(); unlink(filename.c_str()); return; } - frames_written += buffered_jpegs.size(); + frames_written += buffered_frames.size(); progress.setValue(frames_written); - buffered_jpegs.clear(); + buffered_frames.clear(); } if (progress.wasCanceled()) { unlink(filename.c_str()); @@ -185,14 +217,14 @@ void export_multitrack_clip(const string &filename, const Clip &clip) } } - if (!write_buffered_jpegs(avctx, buffered_jpegs)) { + if (!write_buffered_frames(avctx, buffered_frames)) { QMessageBox msgbox; msgbox.setText("Writing frames failed"); msgbox.exec(); unlink(filename.c_str()); return; } - frames_written += buffered_jpegs.size(); + frames_written += buffered_frames.size(); progress.setValue(frames_written); } diff --git a/futatabi/frame_on_disk.cpp b/futatabi/frame_on_disk.cpp index 9a87094..6bdaf23 100644 --- a/futatabi/frame_on_disk.cpp +++ b/futatabi/frame_on_disk.cpp @@ -47,7 +47,28 @@ FrameReader::~FrameReader() } } -string FrameReader::read_frame(FrameOnDisk frame) +namespace { + +string read_string(int fd, size_t size, off_t offset) +{ + string str; + str.resize(size); + size_t str_offset = 0; + while (str_offset < size) { + int ret = pread(fd, &str[str_offset], size - str_offset, offset + str_offset); + if (ret <= 0) { + perror("pread"); + abort(); + } + + str_offset += ret; + } + return str; +} + +} // namespace + +FrameReader::Frame FrameReader::read_frame(FrameOnDisk frame, bool read_audio) { steady_clock::time_point start = steady_clock::now(); @@ -76,20 +97,10 @@ string FrameReader::read_frame(FrameOnDisk frame) ++metric_frame_opened_files; } - // TODO: Read the audio. - - string str; - str.resize(frame.size); - off_t offset = 0; - while (offset < frame.size) { - int ret = pread(fd, &str[offset], frame.size - offset, frame.offset + offset); - if (ret <= 0) { - string filename = frame_filenames[frame.filename_idx]; - perror("pread"); - abort(); - } - - offset += ret; + Frame ret; + ret.video = read_string(fd, frame.size, frame.offset); + if (read_audio) { + ret.audio = read_string(fd, frame.audio_size, frame.offset + frame.size); } steady_clock::time_point stop = steady_clock::now(); @@ -98,5 +109,5 @@ string FrameReader::read_frame(FrameOnDisk frame) metric_frame_read_bytes += frame.size; ++metric_frame_read_frames; - return str; + return ret; } diff --git a/futatabi/frame_on_disk.h b/futatabi/frame_on_disk.h index dbe1211..360bd23 100644 --- a/futatabi/frame_on_disk.h +++ b/futatabi/frame_on_disk.h @@ -41,7 +41,12 @@ class FrameReader { public: FrameReader(); ~FrameReader(); - std::string read_frame(FrameOnDisk frame); + + struct Frame { + std::string video; + std::string audio; + }; + Frame read_frame(FrameOnDisk frame, bool read_audio); private: int fd = -1; diff --git a/futatabi/jpeg_frame_view.cpp b/futatabi/jpeg_frame_view.cpp index 198affd..c1afafd 100644 --- a/futatabi/jpeg_frame_view.cpp +++ b/futatabi/jpeg_frame_view.cpp @@ -238,7 +238,7 @@ shared_ptr decode_jpeg_with_cache(FrameOnDisk frame_spec, CacheMissBehavi ++metric_jpeg_cache_miss_frames; *did_decode = true; - shared_ptr frame = decode_jpeg(frame_reader->read_frame(frame_spec)); + shared_ptr frame = decode_jpeg(frame_reader->read_frame(frame_spec, /*read_audio=*/false).video); lock_guard lock(cache_mu); cache_bytes_used += frame_size(*frame); diff --git a/futatabi/video_stream.cpp b/futatabi/video_stream.cpp index 4b0336c..06acfd2 100644 --- a/futatabi/video_stream.cpp +++ b/futatabi/video_stream.cpp @@ -335,6 +335,8 @@ void VideoStream::schedule_original_frame(steady_clock::time_point local_pts, { fprintf(stderr, "output_pts=%" PRId64 " original input_pts=%" PRId64 "\n", output_pts, frame.pts); + // TODO: Write audio if at the right speed. + QueuedFrame qf; qf.local_pts = local_pts; qf.type = QueuedFrame::ORIGINAL; @@ -342,7 +344,7 @@ void VideoStream::schedule_original_frame(steady_clock::time_point local_pts, qf.display_func = move(display_func); qf.queue_spot_holder = move(queue_spot_holder); qf.subtitle = subtitle; - qf.encoded_jpeg.reset(new string(frame_reader.read_frame(frame))); + qf.encoded_jpeg.reset(new string(frame_reader.read_frame(frame, /*read_audio=*/false).video)); lock_guard lock(queue_lock); frame_queue.push_back(move(qf)); diff --git a/nageru/defs.h b/nageru/defs.h index a990330..6d684e7 100644 --- a/nageru/defs.h +++ b/nageru/defs.h @@ -3,7 +3,6 @@ #include -#define OUTPUT_FREQUENCY 48000 // Currently needs to be exactly 48000, since bmusb outputs in that. #define MAX_FPS 60 #define FAKE_FPS 25 // Must be an integer. #define MAX_VIDEO_CARDS 16 diff --git a/shared/shared_defs.h b/shared/shared_defs.h index 20c56bf..62b719d 100644 --- a/shared/shared_defs.h +++ b/shared/shared_defs.h @@ -1,6 +1,8 @@ #ifndef _SHARED_DEFS_H #define _SHARED_DEFS_H 1 +#define OUTPUT_FREQUENCY 48000 // Currently needs to be exactly 48000, since bmusb outputs in that. + #define MUX_OPTS { \ /* Make seekable .mov files, and keep MP4 muxer from using unlimited amounts of memory. */ \ { "movflags", "empty_moov+frag_keyframe+default_base_moof+skip_trailer" }, \ -- 2.39.2