From e0cb348ca42ae7057f8f5acee92a23e7eb26075f Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Thu, 28 Mar 2019 20:25:59 +0100
Subject: [PATCH] Make multitrack export include audio.

---
 futatabi/export.cpp          | 74 ++++++++++++++++++++++++++----------
 futatabi/frame_on_disk.cpp   | 43 +++++++++++++--------
 futatabi/frame_on_disk.h     |  7 +++-
 futatabi/jpeg_frame_view.cpp |  2 +-
 futatabi/video_stream.cpp    |  4 +-
 nageru/defs.h                |  1 -
 shared/shared_defs.h         |  2 +
 7 files changed, 92 insertions(+), 41 deletions(-)
diff --git a/futatabi/export.cpp b/futatabi/export.cpp
index 5b8da13..1b7c59c 100644
--- a/futatabi/export.cpp
+++ b/futatabi/export.cpp
@@ -6,6 +6,7 @@
 #include "frame_on_disk.h"
 #include "player.h"
 #include "shared/ffmpeg_raii.h"
+#include "shared/shared_defs.h"
 #include "shared/timebase.h"
 
 #include <QMessageBox>
@@ -23,22 +24,22 @@ using namespace std;
 namespace {
 
 // Only used in export_cliplist_clip_multitrack_triggered.
-struct BufferedJPEG {
+struct BufferedFrame {
 	int64_t pts;
-	unsigned stream_idx;
-	string jpeg;
+	unsigned video_stream_idx;
+	string data;
 };
 
-bool write_buffered_jpegs(AVFormatContext *avctx, const vector<BufferedJPEG> &buffered_jpegs)
+bool write_buffered_frames(AVFormatContext *avctx, const vector<BufferedFrame> &buffered_frames)
 {
-	for (const BufferedJPEG &jpeg : buffered_jpegs) {
+	for (const BufferedFrame &frame : buffered_frames) {
 		AVPacket pkt;
 		av_init_packet(&pkt);
-		pkt.stream_index = jpeg.stream_idx;
-		pkt.data = (uint8_t *)jpeg.jpeg.data();
-		pkt.size = jpeg.jpeg.size();
-		pkt.pts = jpeg.pts;
-		pkt.dts = jpeg.pts;
+		pkt.stream_index = frame.video_stream_idx;
+		pkt.data = (uint8_t *)frame.data.data();
+		pkt.size = frame.data.size();
+		pkt.pts = frame.pts;
+		pkt.dts = frame.pts;
 		pkt.flags = AV_PKT_FLAG_KEY;
 
 		if (av_write_frame(avctx, &pkt) < 0) {
@@ -123,6 +124,23 @@ void export_multitrack_clip(const string &filename, const Clip &clip)
 		video_streams.push_back(avstream_video);
 	}
 
+	// Similar, for audio streams.
+	vector<AVStream *> audio_streams;
+	for (unsigned stream_idx = 0; stream_idx <= last_stream_idx; ++stream_idx) {
+		AVStream *avstream_audio = avformat_new_stream(avctx, nullptr);
+		if (avstream_audio == nullptr) {
+			fprintf(stderr, "avformat_new_stream() failed\n");
+			abort();
+		}
+		avstream_audio->time_base = AVRational{ 1, TIMEBASE };
+		avstream_audio->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
+		avstream_audio->codecpar->codec_id = AV_CODEC_ID_PCM_S32LE;
+		avstream_audio->codecpar->channel_layout = AV_CH_LAYOUT_STEREO;
+		avstream_audio->codecpar->channels = 2;
+		avstream_audio->codecpar->sample_rate = OUTPUT_FREQUENCY;
+		audio_streams.push_back(avstream_audio);
+	}
+
 	if (avformat_write_header(avctx, nullptr) < 0) {
 		QMessageBox msgbox;
 		msgbox.setText("Writing header failed");
@@ -140,7 +158,7 @@ void export_multitrack_clip(const string &filename, const Clip &clip)
 
 	// We buffer up to 1000 frames at a time, in a hope that we can reduce
 	// the amount of seeking needed on rotational media.
-	vector<BufferedJPEG> buffered_jpegs;
+	vector<BufferedFrame> buffered_frames;
 	size_t frames_written = 0;
 	while (num_streams_with_frames_left > 0) {
 		// Find the stream with the lowest frame. Lower stream indexes win.
@@ -163,21 +181,35 @@ void export_multitrack_clip(const string &filename, const Clip &clip)
 				--num_streams_with_frames_left;
 			}
 		}
-		string jpeg = readers[first_frame_stream_idx].read_frame(first_frame);
-		int64_t scaled_pts = av_rescale_q(first_frame.pts, AVRational{ 1, TIMEBASE },
-		                                  video_streams[first_frame_stream_idx]->time_base);
-		buffered_jpegs.emplace_back(BufferedJPEG{ scaled_pts, first_frame_stream_idx, std::move(jpeg) });
-		if (buffered_jpegs.size() >= 1000) {
-			if (!write_buffered_jpegs(avctx, buffered_jpegs)) {
+
+		FrameReader::Frame frame = readers[first_frame_stream_idx].read_frame(first_frame, /*read_audio=*/true);
+
+		// Write audio. (Before video, since that's what we expect on input.)
+		if (!frame.audio.empty()) {
+			unsigned audio_stream_idx = first_frame_stream_idx + video_streams.size();
+			int64_t scaled_audio_pts = av_rescale_q(first_frame.pts, AVRational{ 1, TIMEBASE },
+								audio_streams[first_frame_stream_idx]->time_base);
+			buffered_frames.emplace_back(BufferedFrame{ scaled_audio_pts, audio_stream_idx, std::move(frame.audio) });
+		}
+
+		// Write video.
+		unsigned video_stream_idx = first_frame_stream_idx;
+		int64_t scaled_video_pts = av_rescale_q(first_frame.pts, AVRational{ 1, TIMEBASE },
+		                                        video_streams[first_frame_stream_idx]->time_base);
+		buffered_frames.emplace_back(BufferedFrame{ scaled_video_pts, video_stream_idx, std::move(frame.video) });
+
+		// Flush to disk if required.
+		if (buffered_frames.size() >= 1000) {
+			if (!write_buffered_frames(avctx, buffered_frames)) {
 				QMessageBox msgbox;
 				msgbox.setText("Writing frames failed");
 				msgbox.exec();
 				unlink(filename.c_str());
 				return;
 			}
-			frames_written += buffered_jpegs.size();
+			frames_written += buffered_frames.size();
 			progress.setValue(frames_written);
-			buffered_jpegs.clear();
+			buffered_frames.clear();
 		}
 		if (progress.wasCanceled()) {
 			unlink(filename.c_str());
@@ -185,14 +217,14 @@ void export_multitrack_clip(const string &filename, const Clip &clip)
 		}
 	}
 
-	if (!write_buffered_jpegs(avctx, buffered_jpegs)) {
+	if (!write_buffered_frames(avctx, buffered_frames)) {
 		QMessageBox msgbox;
 		msgbox.setText("Writing frames failed");
 		msgbox.exec();
 		unlink(filename.c_str());
 		return;
 	}
-	frames_written += buffered_jpegs.size();
+	frames_written += buffered_frames.size();
 	progress.setValue(frames_written);
 }
 
diff --git a/futatabi/frame_on_disk.cpp b/futatabi/frame_on_disk.cpp
index 9a87094..6bdaf23 100644
--- a/futatabi/frame_on_disk.cpp
+++ b/futatabi/frame_on_disk.cpp
@@ -47,7 +47,28 @@ FrameReader::~FrameReader()
 	}
 }
 
-string FrameReader::read_frame(FrameOnDisk frame)
+namespace {
+
+string read_string(int fd, size_t size, off_t offset)
+{
+	string str;
+	str.resize(size);
+	size_t str_offset = 0;
+	while (str_offset < size) {
+		int ret = pread(fd, &str[str_offset], size - str_offset, offset + str_offset);
+		if (ret <= 0) {
+			perror("pread");
+			abort();
+		}
+
+		str_offset += ret;
+	}
+	return str;
+}
+
+}  // namespace
+
+FrameReader::Frame FrameReader::read_frame(FrameOnDisk frame, bool read_audio)
 {
 	steady_clock::time_point start = steady_clock::now();
 
@@ -76,20 +97,10 @@ string FrameReader::read_frame(FrameOnDisk frame)
 		++metric_frame_opened_files;
 	}
 
-	// TODO: Read the audio.
-
-	string str;
-	str.resize(frame.size);
-	off_t offset = 0;
-	while (offset < frame.size) {
-		int ret = pread(fd, &str[offset], frame.size - offset, frame.offset + offset);
-		if (ret <= 0) {
-			string filename = frame_filenames[frame.filename_idx];
-			perror("pread");
-			abort();
-		}
-
-		offset += ret;
+	Frame ret;
+	ret.video = read_string(fd, frame.size, frame.offset);
+	if (read_audio) {
+		ret.audio = read_string(fd, frame.audio_size, frame.offset + frame.size);
 	}
 
 	steady_clock::time_point stop = steady_clock::now();
@@ -98,5 +109,5 @@ string FrameReader::read_frame(FrameOnDisk frame)
 	metric_frame_read_bytes += frame.size;
 	++metric_frame_read_frames;
 
-	return str;
+	return ret;
 }
diff --git a/futatabi/frame_on_disk.h b/futatabi/frame_on_disk.h
index dbe1211..360bd23 100644
--- a/futatabi/frame_on_disk.h
+++ b/futatabi/frame_on_disk.h
@@ -41,7 +41,12 @@ class FrameReader {
 public:
 	FrameReader();
 	~FrameReader();
-	std::string read_frame(FrameOnDisk frame);
+
+	struct Frame {
+		std::string video;
+		std::string audio;
+	};
+	Frame read_frame(FrameOnDisk frame, bool read_audio);
 
 private:
 	int fd = -1;
diff --git a/futatabi/jpeg_frame_view.cpp b/futatabi/jpeg_frame_view.cpp
index 198affd..c1afafd 100644
--- a/futatabi/jpeg_frame_view.cpp
+++ b/futatabi/jpeg_frame_view.cpp
@@ -238,7 +238,7 @@ shared_ptr<Frame> decode_jpeg_with_cache(FrameOnDisk frame_spec, CacheMissBehavi
 	++metric_jpeg_cache_miss_frames;
 
 	*did_decode = true;
-	shared_ptr<Frame> frame = decode_jpeg(frame_reader->read_frame(frame_spec));
+	shared_ptr<Frame> frame = decode_jpeg(frame_reader->read_frame(frame_spec, /*read_audio=*/false).video);
 
 	lock_guard<mutex> lock(cache_mu);
 	cache_bytes_used += frame_size(*frame);
diff --git a/futatabi/video_stream.cpp b/futatabi/video_stream.cpp
index 4b0336c..06acfd2 100644
--- a/futatabi/video_stream.cpp
+++ b/futatabi/video_stream.cpp
@@ -335,6 +335,8 @@ void VideoStream::schedule_original_frame(steady_clock::time_point local_pts,
 {
 	fprintf(stderr, "output_pts=%" PRId64 "  original      input_pts=%" PRId64 "\n", output_pts, frame.pts);
 
+	// TODO: Write audio if at the right speed.
+
 	QueuedFrame qf;
 	qf.local_pts = local_pts;
 	qf.type = QueuedFrame::ORIGINAL;
@@ -342,7 +344,7 @@ void VideoStream::schedule_original_frame(steady_clock::time_point local_pts,
 	qf.display_func = move(display_func);
 	qf.queue_spot_holder = move(queue_spot_holder);
 	qf.subtitle = subtitle;
-	qf.encoded_jpeg.reset(new string(frame_reader.read_frame(frame)));
+	qf.encoded_jpeg.reset(new string(frame_reader.read_frame(frame, /*read_audio=*/false).video));
 
 	lock_guard<mutex> lock(queue_lock);
 	frame_queue.push_back(move(qf));
diff --git a/nageru/defs.h b/nageru/defs.h
index a990330..6d684e7 100644
--- a/nageru/defs.h
+++ b/nageru/defs.h
@@ -3,7 +3,6 @@
 
 #include <libavformat/version.h>
 
-#define OUTPUT_FREQUENCY 48000  // Currently needs to be exactly 48000, since bmusb outputs in that.
 #define MAX_FPS 60
 #define FAKE_FPS 25  // Must be an integer.
 #define MAX_VIDEO_CARDS 16
diff --git a/shared/shared_defs.h b/shared/shared_defs.h
index 20c56bf..62b719d 100644
--- a/shared/shared_defs.h
+++ b/shared/shared_defs.h
@@ -1,6 +1,8 @@
 #ifndef _SHARED_DEFS_H
 #define _SHARED_DEFS_H 1
 
+#define OUTPUT_FREQUENCY 48000  // Currently needs to be exactly 48000, since bmusb outputs in that.
+
 #define MUX_OPTS { \
 	/* Make seekable .mov files, and keep MP4 muxer from using unlimited amounts of memory. */ \
 	{ "movflags", "empty_moov+frag_keyframe+default_base_moof+skip_trailer" }, \
-- 
2.39.2